@nxtedition/rocksdb 8.0.0 → 8.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/BUILDING.md +2 -2
  2. package/binding.cc +2 -7
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -9
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/TARGETS +4 -2
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +8 -29
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +146 -0
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +13 -1
  10. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +57 -146
  11. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +32 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +11 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -9
  14. package/deps/rocksdb/rocksdb/db/column_family.h +20 -0
  15. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -33
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +27 -8
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -1
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -6
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +65 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +5 -0
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -32
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +28 -47
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +28 -22
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -14
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +170 -140
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -4
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  35. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  36. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  37. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  38. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +72 -5
  39. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +119 -10
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +585 -264
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +46 -18
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +6 -15
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -8
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +10 -0
  49. package/deps/rocksdb/rocksdb/db/db_iter.cc +57 -36
  50. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +250 -2
  52. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  53. package/deps/rocksdb/rocksdb/db/db_test2.cc +307 -8
  54. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  55. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  56. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  57. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +5 -2
  60. package/deps/rocksdb/rocksdb/db/flush_job.cc +5 -2
  61. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  62. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  63. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/memtable.cc +55 -9
  65. package/deps/rocksdb/rocksdb/db/merge_helper.cc +76 -102
  66. package/deps/rocksdb/rocksdb/db/merge_helper.h +2 -11
  67. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  68. package/deps/rocksdb/rocksdb/db/repair.cc +64 -22
  69. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  70. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  71. package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
  72. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  73. package/deps/rocksdb/rocksdb/db/version_builder.cc +90 -43
  74. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  75. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +190 -67
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +15 -1
  77. package/deps/rocksdb/rocksdb/db/version_edit.h +16 -4
  78. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +41 -11
  79. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +27 -12
  80. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +18 -16
  81. package/deps/rocksdb/rocksdb/db/version_set.cc +219 -38
  82. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  83. package/deps/rocksdb/rocksdb/db/version_set_test.cc +45 -25
  84. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +122 -61
  85. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +0 -1
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +0 -4
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +12 -17
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +6 -4
  90. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  91. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +1 -0
  92. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +0 -48
  93. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
  94. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +196 -171
  95. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  97. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -18
  98. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +27 -5
  99. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  100. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  101. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +3 -0
  102. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  103. package/deps/rocksdb/rocksdb/logging/logging.h +13 -19
  104. package/deps/rocksdb/rocksdb/memory/arena.cc +4 -3
  105. package/deps/rocksdb/rocksdb/memory/arena_test.cc +30 -0
  106. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
  107. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  108. package/deps/rocksdb/rocksdb/src.mk +2 -1
  109. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  110. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -10
  111. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -29
  112. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  113. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -39
  114. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +0 -1
  115. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  116. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +142 -0
  117. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +241 -0
  118. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  119. package/deps/rocksdb/rocksdb/table/format.h +5 -2
  120. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -11
  121. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +97 -115
  122. package/deps/rocksdb/rocksdb/table/merging_iterator.h +82 -1
  123. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  124. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  125. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  126. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  127. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +0 -6
  128. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  129. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  130. package/deps/rocksdb/rocksdb/util/crc32c.cc +1 -1
  131. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  132. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +5 -0
  133. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -0
  134. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -67
  135. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -3
  136. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  137. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +59 -0
  138. package/deps/rocksdb/rocksdb.gyp +2 -1
  139. package/package.json +1 -1
  140. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  141. package/prebuilds/linux-x64/node.napi.node +0 -0
  142. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +0 -580
  143. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +0 -476
  144. package/max_rev_operator.h +0 -100
@@ -1661,6 +1661,213 @@ TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) {
1661
1661
  ASSERT_EQ(1, num_range_deletions);
1662
1662
  }
1663
1663
 
1664
+ // Test SST partitioner cut after every single key
1665
+ class SingleKeySstPartitioner : public SstPartitioner {
1666
+ public:
1667
+ const char* Name() const override { return "SingleKeySstPartitioner"; }
1668
+
1669
+ PartitionerResult ShouldPartition(
1670
+ const PartitionerRequest& /*request*/) override {
1671
+ return kRequired;
1672
+ }
1673
+
1674
+ bool CanDoTrivialMove(const Slice& /*smallest_user_key*/,
1675
+ const Slice& /*largest_user_key*/) override {
1676
+ return false;
1677
+ }
1678
+ };
1679
+
1680
+ class SingleKeySstPartitionerFactory : public SstPartitionerFactory {
1681
+ public:
1682
+ static const char* kClassName() { return "SingleKeySstPartitionerFactory"; }
1683
+ const char* Name() const override { return kClassName(); }
1684
+
1685
+ std::unique_ptr<SstPartitioner> CreatePartitioner(
1686
+ const SstPartitioner::Context& /* context */) const override {
1687
+ return std::unique_ptr<SstPartitioner>(new SingleKeySstPartitioner());
1688
+ }
1689
+ };
1690
+
1691
+ TEST_F(DBRangeDelTest, LevelCompactOutputCutAtRangeTombstoneForTtlFiles) {
1692
+ Options options = CurrentOptions();
1693
+ options.compression = kNoCompression;
1694
+ options.compaction_pri = kMinOverlappingRatio;
1695
+ options.disable_auto_compactions = true;
1696
+ options.ttl = 24 * 60 * 60; // 24 hours
1697
+ options.target_file_size_base = 8 << 10;
1698
+ env_->SetMockSleep();
1699
+ options.env = env_;
1700
+ DestroyAndReopen(options);
1701
+
1702
+ Random rnd(301);
1703
+ // Fill some data so that future compactions are not bottommost level
1704
+ // compaction, and hence they would try cut around files for ttl
1705
+ for (int i = 5; i < 10; ++i) {
1706
+ ASSERT_OK(Put(Key(i), rnd.RandomString(1 << 10)));
1707
+ }
1708
+ ASSERT_OK(Flush());
1709
+ MoveFilesToLevel(3);
1710
+ ASSERT_EQ("0,0,0,1", FilesPerLevel());
1711
+
1712
+ for (int i = 5; i < 10; ++i) {
1713
+ ASSERT_OK(Put(Key(i), rnd.RandomString(1 << 10)));
1714
+ }
1715
+ ASSERT_OK(Flush());
1716
+ MoveFilesToLevel(1);
1717
+ ASSERT_EQ("0,1,0,1", FilesPerLevel());
1718
+
1719
+ env_->MockSleepForSeconds(20 * 60 * 60);
1720
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1721
+ Key(11), Key(12)));
1722
+ ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10)));
1723
+ ASSERT_OK(Flush());
1724
+ ASSERT_EQ("1,1,0,1", FilesPerLevel());
1725
+ // L0 file is new, L1 and L3 file are old and qualified for TTL
1726
+ env_->MockSleepForSeconds(10 * 60 * 60);
1727
+ MoveFilesToLevel(1);
1728
+ // L1 output should be cut into 3 files:
1729
+ // File 0: Key(0)
1730
+ // File 1: (qualified for TTL): Key(5) - Key(10)
1731
+ // File 1: DeleteRange [11, 12)
1732
+ ASSERT_EQ("0,3,0,1", FilesPerLevel());
1733
+ }
1734
+
1735
+ TEST_F(DBRangeDelTest, CompactionEmitRangeTombstoneToSSTPartitioner) {
1736
+ Options options = CurrentOptions();
1737
+ auto factory = std::make_shared<SingleKeySstPartitionerFactory>();
1738
+ options.sst_partitioner_factory = factory;
1739
+ options.disable_auto_compactions = true;
1740
+ DestroyAndReopen(options);
1741
+
1742
+ Random rnd(301);
1743
+ // range deletion keys are not processed when compacting to bottommost level,
1744
+ // so creating a file at older level to make the next compaction not
1745
+ // bottommost level
1746
+ ASSERT_OK(db_->Put(WriteOptions(), Key(4), rnd.RandomString(10)));
1747
+ ASSERT_OK(Flush());
1748
+ MoveFilesToLevel(5);
1749
+
1750
+ ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(10)));
1751
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2),
1752
+ Key(5)));
1753
+ ASSERT_OK(Flush());
1754
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1755
+ MoveFilesToLevel(1);
1756
+ // SSTPartitioner decides to cut when range tombstone start key is passed to
1757
+ // it Note that the range tombstone [2, 5) itself span multiple keys but we
1758
+ // are not able to partition in between yet.
1759
+ ASSERT_EQ(2, NumTableFilesAtLevel(1));
1760
+ }
1761
+
1762
+ TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenPointKeyAndTombstone) {
1763
+ // L2 has two files
1764
+ // L2_0: 0, 1, 2, 3, 4. L2_1: 5, 6, 7
1765
+ // L0 has 0, [5, 6), 8
1766
+ // max_compaction_bytes is less than the size of L2_0 and L2_1.
1767
+ // When compacting L0 into L1, it should split into 3 files.
1768
+ const int kNumPerFile = 4, kNumFiles = 2;
1769
+ Options options = CurrentOptions();
1770
+ options.disable_auto_compactions = true;
1771
+ options.target_file_size_base = 9 * 1024;
1772
+ options.max_compaction_bytes = 9 * 1024;
1773
+ DestroyAndReopen(options);
1774
+ Random rnd(301);
1775
+ for (int i = 0; i < kNumFiles; ++i) {
1776
+ std::vector<std::string> values;
1777
+ for (int j = 0; j < kNumPerFile; j++) {
1778
+ values.push_back(rnd.RandomString(3 << 10));
1779
+ ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1780
+ }
1781
+ }
1782
+ ASSERT_OK(db_->Flush(FlushOptions()));
1783
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1784
+ MoveFilesToLevel(2);
1785
+ ASSERT_EQ(2, NumTableFilesAtLevel(2));
1786
+ ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10)));
1787
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(5),
1788
+ Key(6)));
1789
+ ASSERT_OK(Put(Key(8), rnd.RandomString(1 << 10)));
1790
+ ASSERT_OK(db_->Flush(FlushOptions()));
1791
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1792
+
1793
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1794
+ true /* disallow_trivial_move */));
1795
+ ASSERT_EQ(3, NumTableFilesAtLevel(1));
1796
+ }
1797
+
1798
+ TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenTombstone) {
1799
+ // L2 has two files
1800
+ // L2_0: 0, 1, 2, 3, 4. L2_1: 5, 6, 7
1801
+ // L0 has two range tombstones [0, 1), [7, 8).
1802
+ // max_compaction_bytes is less than the size of L2_0.
1803
+ // When compacting L0 into L1, the two range tombstones should be
1804
+ // split into two files.
1805
+ const int kNumPerFile = 4, kNumFiles = 2;
1806
+ Options options = CurrentOptions();
1807
+ options.disable_auto_compactions = true;
1808
+ options.target_file_size_base = 9 * 1024;
1809
+ options.max_compaction_bytes = 9 * 1024;
1810
+ DestroyAndReopen(options);
1811
+ Random rnd(301);
1812
+ for (int i = 0; i < kNumFiles; ++i) {
1813
+ std::vector<std::string> values;
1814
+ // Write 12K (4 values, each 3K)
1815
+ for (int j = 0; j < kNumPerFile; j++) {
1816
+ values.push_back(rnd.RandomString(3 << 10));
1817
+ ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1818
+ }
1819
+ }
1820
+ ASSERT_OK(db_->Flush(FlushOptions()));
1821
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1822
+ MoveFilesToLevel(2);
1823
+ ASSERT_EQ(2, NumTableFilesAtLevel(2));
1824
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1825
+ Key(1)));
1826
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(7),
1827
+ Key(8)));
1828
+ ASSERT_OK(db_->Flush(FlushOptions()));
1829
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1830
+
1831
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1832
+ true /* disallow_trivial_move */));
1833
+ // This is L0 -> L1 compaction
1834
+ // The two range tombstones are broken up into two output files
1835
+ // to limit compaction size.
1836
+ ASSERT_EQ(2, NumTableFilesAtLevel(1));
1837
+ }
1838
+
1839
+ TEST_F(DBRangeDelTest, OversizeCompactionPointKeyWithinRangetombstone) {
1840
+ // L2 has two files
1841
+ // L2_0: 0, 1, 2, 3, 4. L2_1: 6, 7, 8
1842
+ // L0 has [0, 9) and point key 5
1843
+ // max_compaction_bytes is less than the size of L2_0.
1844
+ // When compacting L0 into L1, the compaction should cut at point key 5.
1845
+ Options options = CurrentOptions();
1846
+ options.disable_auto_compactions = true;
1847
+ options.target_file_size_base = 9 * 1024;
1848
+ options.max_compaction_bytes = 9 * 1024;
1849
+ DestroyAndReopen(options);
1850
+ Random rnd(301);
1851
+ for (int i = 0; i < 9; ++i) {
1852
+ if (i == 5) {
1853
+ ++i;
1854
+ }
1855
+ ASSERT_OK(Put(Key(i), rnd.RandomString(3 << 10)));
1856
+ }
1857
+ ASSERT_OK(db_->Flush(FlushOptions()));
1858
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1859
+ MoveFilesToLevel(2);
1860
+ ASSERT_EQ(2, NumTableFilesAtLevel(2));
1861
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1862
+ Key(9)));
1863
+ ASSERT_OK(Put(Key(5), rnd.RandomString(1 << 10)));
1864
+ ASSERT_OK(db_->Flush(FlushOptions()));
1865
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1866
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1867
+ true /* disallow_trivial_move */));
1868
+ ASSERT_EQ(2, NumTableFilesAtLevel(1));
1869
+ }
1870
+
1664
1871
  TEST_F(DBRangeDelTest, OverlappedTombstones) {
1665
1872
  const int kNumPerFile = 4, kNumFiles = 2;
1666
1873
  Options options = CurrentOptions();
@@ -2093,6 +2300,7 @@ TEST_F(DBRangeDelTest, NonOverlappingTombstonAtBoundary) {
2093
2300
  options.compression = kNoCompression;
2094
2301
  options.disable_auto_compactions = true;
2095
2302
  options.target_file_size_base = 2 * 1024;
2303
+ options.level_compaction_dynamic_file_size = false;
2096
2304
  DestroyAndReopen(options);
2097
2305
 
2098
2306
  Random rnd(301);
@@ -2508,7 +2716,7 @@ TEST_F(DBRangeDelTest, LeftSentinelKeyTest) {
2508
2716
  options.compression = kNoCompression;
2509
2717
  options.disable_auto_compactions = true;
2510
2718
  options.target_file_size_base = 3 * 1024;
2511
- options.max_compaction_bytes = 1024;
2719
+ options.max_compaction_bytes = 2048;
2512
2720
 
2513
2721
  DestroyAndReopen(options);
2514
2722
  // L2
@@ -2554,7 +2762,7 @@ TEST_F(DBRangeDelTest, LeftSentinelKeyTestWithNewerKey) {
2554
2762
  options.compression = kNoCompression;
2555
2763
  options.disable_auto_compactions = true;
2556
2764
  options.target_file_size_base = 3 * 1024;
2557
- options.max_compaction_bytes = 1024;
2765
+ options.max_compaction_bytes = 3 * 1024;
2558
2766
 
2559
2767
  DestroyAndReopen(options);
2560
2768
  // L2
@@ -2756,6 +2964,46 @@ TEST_F(DBRangeDelTest, RefreshMemtableIter) {
2756
2964
  ASSERT_OK(iter->Refresh());
2757
2965
  }
2758
2966
 
2967
+ TEST_F(DBRangeDelTest, RangeTombstoneRespectIterateUpperBound) {
2968
+ // Memtable: a, [b, bz)
2969
+ // Do a Seek on `a` with iterate_upper_bound being az
2970
+ // range tombstone [b, bz) should not be processed (added to and
2971
+ // popped from the min_heap in MergingIterator).
2972
+ Options options = CurrentOptions();
2973
+ options.disable_auto_compactions = true;
2974
+ DestroyAndReopen(options);
2975
+
2976
+ ASSERT_OK(Put("a", "bar"));
2977
+ ASSERT_OK(
2978
+ db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "b", "bz"));
2979
+
2980
+ // I could not find a cleaner way to test this without relying on
2981
+ // implementation detail. Tried to test the value of
2982
+ // `internal_range_del_reseek_count` but that did not work
2983
+ // since BlockBasedTable iterator becomes !Valid() when point key
2984
+ // is out of bound and that reseek only happens when a point key
2985
+ // is covered by some range tombstone.
2986
+ SyncPoint::GetInstance()->SetCallBack("MergeIterator::PopDeleteRangeStart",
2987
+ [](void*) {
2988
+ // there should not be any range
2989
+ // tombstone in the heap.
2990
+ FAIL();
2991
+ });
2992
+ SyncPoint::GetInstance()->EnableProcessing();
2993
+
2994
+ ReadOptions read_opts;
2995
+ std::string upper_bound = "az";
2996
+ Slice upper_bound_slice = upper_bound;
2997
+ read_opts.iterate_upper_bound = &upper_bound_slice;
2998
+ std::unique_ptr<Iterator> iter{db_->NewIterator(read_opts)};
2999
+ iter->Seek("a");
3000
+ ASSERT_TRUE(iter->Valid());
3001
+ ASSERT_EQ(iter->key(), "a");
3002
+ iter->Next();
3003
+ ASSERT_FALSE(iter->Valid());
3004
+ ASSERT_OK(iter->status());
3005
+ }
3006
+
2759
3007
  #endif // ROCKSDB_LITE
2760
3008
 
2761
3009
  } // namespace ROCKSDB_NAMESPACE
@@ -1203,6 +1203,8 @@ void CheckColumnFamilyMeta(
1203
1203
  file_meta_from_files.file_creation_time);
1204
1204
  ASSERT_GE(file_meta_from_cf.file_creation_time, start_time);
1205
1205
  ASSERT_LE(file_meta_from_cf.file_creation_time, end_time);
1206
+ ASSERT_EQ(file_meta_from_cf.epoch_number,
1207
+ file_meta_from_files.epoch_number);
1206
1208
  ASSERT_GE(file_meta_from_cf.oldest_ancester_time, start_time);
1207
1209
  ASSERT_LE(file_meta_from_cf.oldest_ancester_time, end_time);
1208
1210
  // More from FileStorageInfo
@@ -1253,6 +1255,7 @@ void CheckLiveFilesMeta(
1253
1255
  ASSERT_EQ(meta.largestkey, expected_meta.largest.user_key().ToString());
1254
1256
  ASSERT_EQ(meta.oldest_blob_file_number,
1255
1257
  expected_meta.oldest_blob_file_number);
1258
+ ASSERT_EQ(meta.epoch_number, expected_meta.epoch_number);
1256
1259
 
1257
1260
  // More from FileStorageInfo
1258
1261
  ASSERT_EQ(meta.file_type, kTableFile);
@@ -14,6 +14,7 @@
14
14
 
15
15
  #include "db/db_test_util.h"
16
16
  #include "db/read_callback.h"
17
+ #include "db/version_edit.h"
17
18
  #include "options/options_helper.h"
18
19
  #include "port/port.h"
19
20
  #include "port/stack_trace.h"
@@ -33,6 +34,18 @@ namespace ROCKSDB_NAMESPACE {
33
34
  class DBTest2 : public DBTestBase {
34
35
  public:
35
36
  DBTest2() : DBTestBase("db_test2", /*env_do_fsync=*/true) {}
37
+ std::vector<FileMetaData*> GetLevelFileMetadatas(int level, int cf = 0) {
38
+ VersionSet* const versions = dbfull()->GetVersionSet();
39
+ assert(versions);
40
+ ColumnFamilyData* const cfd =
41
+ versions->GetColumnFamilySet()->GetColumnFamily(cf);
42
+ assert(cfd);
43
+ Version* const current = cfd->current();
44
+ assert(current);
45
+ VersionStorageInfo* const storage_info = current->storage_info();
46
+ assert(storage_info);
47
+ return storage_info->LevelFiles(level);
48
+ }
36
49
  };
37
50
 
38
51
  #ifndef ROCKSDB_LITE
@@ -4523,7 +4536,7 @@ TEST_F(DBTest2, TraceAndReplay) {
4523
4536
  ASSERT_OK(replayer->Prepare());
4524
4537
  // Replay using 1 thread, 1x speed.
4525
4538
  ASSERT_OK(replayer->Replay(ReplayOptions(1, 1.0), res_cb));
4526
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4539
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4527
4540
  ASSERT_EQ(res_handler.GetNumWrites(), 8);
4528
4541
  ASSERT_EQ(res_handler.GetNumGets(), 3);
4529
4542
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
@@ -4549,7 +4562,7 @@ TEST_F(DBTest2, TraceAndReplay) {
4549
4562
  // Re-replay using 2 threads, 2x speed.
4550
4563
  ASSERT_OK(replayer->Prepare());
4551
4564
  ASSERT_OK(replayer->Replay(ReplayOptions(2, 2.0), res_cb));
4552
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4565
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4553
4566
  ASSERT_EQ(res_handler.GetNumWrites(), 8);
4554
4567
  ASSERT_EQ(res_handler.GetNumGets(), 3);
4555
4568
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
@@ -4559,7 +4572,7 @@ TEST_F(DBTest2, TraceAndReplay) {
4559
4572
  // Re-replay using 2 threads, 1/2 speed.
4560
4573
  ASSERT_OK(replayer->Prepare());
4561
4574
  ASSERT_OK(replayer->Replay(ReplayOptions(2, 0.5), res_cb));
4562
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4575
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4563
4576
  ASSERT_EQ(res_handler.GetNumWrites(), 8);
4564
4577
  ASSERT_EQ(res_handler.GetNumGets(), 3);
4565
4578
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 2);
@@ -4757,7 +4770,7 @@ TEST_F(DBTest2, TraceAndManualReplay) {
4757
4770
  // end, or Prepare() was not called.
4758
4771
  ASSERT_TRUE(s.IsIncomplete());
4759
4772
  ASSERT_TRUE(replayer->Next(nullptr).IsIncomplete());
4760
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4773
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4761
4774
  ASSERT_EQ(res_handler.GetNumWrites(), 9);
4762
4775
  ASSERT_EQ(res_handler.GetNumGets(), 3);
4763
4776
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 8);
@@ -4791,7 +4804,7 @@ TEST_F(DBTest2, TraceAndManualReplay) {
4791
4804
  ASSERT_EQ("write1", value);
4792
4805
  ASSERT_OK(db2->Get(ro, handles[0], "trace-record-write2", &value));
4793
4806
  ASSERT_EQ("write2", value);
4794
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4807
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4795
4808
  ASSERT_EQ(res_handler.GetNumWrites(), 1);
4796
4809
  ASSERT_EQ(res_handler.GetNumGets(), 0);
4797
4810
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
@@ -4816,7 +4829,7 @@ TEST_F(DBTest2, TraceAndManualReplay) {
4816
4829
  record.reset(new GetQueryTraceRecord(invalid_cf_id, "whatever", fake_ts++));
4817
4830
  ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
4818
4831
  ASSERT_TRUE(result == nullptr);
4819
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4832
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4820
4833
  ASSERT_EQ(res_handler.GetNumWrites(), 0);
4821
4834
  ASSERT_EQ(res_handler.GetNumGets(), 2);
4822
4835
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
@@ -4845,7 +4858,7 @@ TEST_F(DBTest2, TraceAndManualReplay) {
4845
4858
  ASSERT_TRUE(replayer->Execute(record, &result).IsCorruption());
4846
4859
  ASSERT_TRUE(result == nullptr);
4847
4860
  }
4848
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4861
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4849
4862
  ASSERT_EQ(res_handler.GetNumWrites(), 0);
4850
4863
  ASSERT_EQ(res_handler.GetNumGets(), 0);
4851
4864
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 4); // Seek x 2 in two iterations
@@ -4900,7 +4913,7 @@ TEST_F(DBTest2, TraceAndManualReplay) {
4900
4913
  std::vector<std::string>({"a"}), fake_ts++));
4901
4914
  ASSERT_TRUE(replayer->Execute(record, &result).IsInvalidArgument());
4902
4915
  ASSERT_TRUE(result == nullptr);
4903
- ASSERT_GT(res_handler.GetAvgLatency(), 0.0);
4916
+ ASSERT_GE(res_handler.GetAvgLatency(), 0.0);
4904
4917
  ASSERT_EQ(res_handler.GetNumWrites(), 0);
4905
4918
  ASSERT_EQ(res_handler.GetNumGets(), 0);
4906
4919
  ASSERT_EQ(res_handler.GetNumIterSeeks(), 0);
@@ -7325,6 +7338,218 @@ TEST_F(DBTest2, PointInTimeRecoveryWithSyncFailureInCFCreation) {
7325
7338
  ReopenWithColumnFamilies({"default", "test1", "test2"}, options);
7326
7339
  }
7327
7340
 
7341
+ #ifndef ROCKSDB_LITE
7342
+ TEST_F(DBTest2, SortL0FilesByEpochNumber) {
7343
+ Options options = CurrentOptions();
7344
+ options.num_levels = 1;
7345
+ options.compaction_style = kCompactionStyleUniversal;
7346
+ DestroyAndReopen(options);
7347
+
7348
+ // Set up L0 files to be sorted by their epoch_number
7349
+ ASSERT_OK(Put("key1", "seq1"));
7350
+
7351
+ SstFileWriter sst_file_writer{EnvOptions(), options};
7352
+ std::string external_file1 = dbname_ + "/test_files1.sst";
7353
+ std::string external_file2 = dbname_ + "/test_files2.sst";
7354
+ ASSERT_OK(sst_file_writer.Open(external_file1));
7355
+ ASSERT_OK(sst_file_writer.Put("key2", "seq0"));
7356
+ ASSERT_OK(sst_file_writer.Finish());
7357
+ ASSERT_OK(sst_file_writer.Open(external_file2));
7358
+ ASSERT_OK(sst_file_writer.Put("key3", "seq0"));
7359
+ ASSERT_OK(sst_file_writer.Finish());
7360
+
7361
+ ASSERT_OK(Put("key4", "seq2"));
7362
+ ASSERT_OK(Flush());
7363
+
7364
+ auto* handle = db_->DefaultColumnFamily();
7365
+ ASSERT_OK(db_->IngestExternalFile(handle, {external_file1, external_file2},
7366
+ IngestExternalFileOptions()));
7367
+
7368
+ // To verify L0 files are sorted by epoch_number in descending order
7369
+ // instead of largest_seqno
7370
+ std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
7371
+ ASSERT_EQ(level0_files.size(), 3);
7372
+
7373
+ EXPECT_EQ(level0_files[0]->epoch_number, 3);
7374
+ EXPECT_EQ(level0_files[0]->fd.largest_seqno, 0);
7375
+ ASSERT_EQ(level0_files[0]->num_entries, 1);
7376
+ ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key3"));
7377
+
7378
+ EXPECT_EQ(level0_files[1]->epoch_number, 2);
7379
+ EXPECT_EQ(level0_files[1]->fd.largest_seqno, 0);
7380
+ ASSERT_EQ(level0_files[1]->num_entries, 1);
7381
+ ASSERT_TRUE(level0_files[1]->largest.user_key() == Slice("key2"));
7382
+
7383
+ EXPECT_EQ(level0_files[2]->epoch_number, 1);
7384
+ EXPECT_EQ(level0_files[2]->fd.largest_seqno, 2);
7385
+ ASSERT_EQ(level0_files[2]->num_entries, 2);
7386
+ ASSERT_TRUE(level0_files[2]->largest.user_key() == Slice("key4"));
7387
+ ASSERT_TRUE(level0_files[2]->smallest.user_key() == Slice("key1"));
7388
+
7389
+ // To verify compacted file is assigned with the minimum epoch_number
7390
+ // among input files'
7391
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
7392
+
7393
+ level0_files = GetLevelFileMetadatas(0 /* level*/);
7394
+ ASSERT_EQ(level0_files.size(), 1);
7395
+ EXPECT_EQ(level0_files[0]->epoch_number, 1);
7396
+ ASSERT_EQ(level0_files[0]->num_entries, 4);
7397
+ ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key4"));
7398
+ ASSERT_TRUE(level0_files[0]->smallest.user_key() == Slice("key1"));
7399
+ }
7400
+
7401
+ TEST_F(DBTest2, SameEpochNumberAfterCompactRangeChangeLevel) {
7402
+ Options options = CurrentOptions();
7403
+ options.num_levels = 7;
7404
+ options.compaction_style = CompactionStyle::kCompactionStyleLevel;
7405
+ options.disable_auto_compactions = true;
7406
+ DestroyAndReopen(options);
7407
+
7408
+ // Set up the file in L1 to be moved to L0 in later step of CompactRange()
7409
+ ASSERT_OK(Put("key1", "seq1"));
7410
+ ASSERT_OK(Flush());
7411
+ MoveFilesToLevel(1, 0);
7412
+ std::vector<FileMetaData*> level0_files = GetLevelFileMetadatas(0 /* level*/);
7413
+ ASSERT_EQ(level0_files.size(), 0);
7414
+ std::vector<FileMetaData*> level1_files = GetLevelFileMetadatas(1 /* level*/);
7415
+ ASSERT_EQ(level1_files.size(), 1);
7416
+ std::vector<FileMetaData*> level2_files = GetLevelFileMetadatas(2 /* level*/);
7417
+ ASSERT_EQ(level2_files.size(), 0);
7418
+
7419
+ ASSERT_EQ(level1_files[0]->epoch_number, 1);
7420
+
7421
+ // To verify CompactRange() moving file to L0 still keeps the file's
7422
+ // epoch_number
7423
+ CompactRangeOptions croptions;
7424
+ croptions.change_level = true;
7425
+ croptions.target_level = 0;
7426
+ ASSERT_OK(db_->CompactRange(croptions, nullptr, nullptr));
7427
+ level0_files = GetLevelFileMetadatas(0 /* level*/);
7428
+ level1_files = GetLevelFileMetadatas(1 /* level*/);
7429
+ ASSERT_EQ(level0_files.size(), 1);
7430
+ ASSERT_EQ(level1_files.size(), 0);
7431
+
7432
+ EXPECT_EQ(level0_files[0]->epoch_number, 1);
7433
+
7434
+ ASSERT_EQ(level0_files[0]->num_entries, 1);
7435
+ ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key1"));
7436
+ }
7437
+
7438
+ TEST_F(DBTest2, RecoverEpochNumber) {
7439
+ for (bool allow_ingest_behind : {true, false}) {
7440
+ Options options = CurrentOptions();
7441
+ options.allow_ingest_behind = allow_ingest_behind;
7442
+ options.num_levels = 7;
7443
+ options.compaction_style = kCompactionStyleLevel;
7444
+ options.disable_auto_compactions = true;
7445
+ DestroyAndReopen(options);
7446
+ CreateAndReopenWithCF({"cf1"}, options);
7447
+ VersionSet* versions = dbfull()->GetVersionSet();
7448
+ assert(versions);
7449
+ const ColumnFamilyData* default_cf =
7450
+ versions->GetColumnFamilySet()->GetDefault();
7451
+ const ColumnFamilyData* cf1 =
7452
+ versions->GetColumnFamilySet()->GetColumnFamily("cf1");
7453
+
7454
+ // Set up files in default CF to recover in later step
7455
+ ASSERT_OK(Put("key1", "epoch1"));
7456
+ ASSERT_OK(Flush());
7457
+ MoveFilesToLevel(1 /* level*/, 0 /* cf*/);
7458
+ ASSERT_OK(Put("key2", "epoch2"));
7459
+ ASSERT_OK(Flush());
7460
+
7461
+ std::vector<FileMetaData*> level0_files =
7462
+ GetLevelFileMetadatas(0 /* level*/);
7463
+ ASSERT_EQ(level0_files.size(), 1);
7464
+ ASSERT_EQ(level0_files[0]->epoch_number,
7465
+ allow_ingest_behind
7466
+ ? 2 + kReservedEpochNumberForFileIngestedBehind
7467
+ : 2);
7468
+ ASSERT_EQ(level0_files[0]->num_entries, 1);
7469
+ ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key2"));
7470
+
7471
+ std::vector<FileMetaData*> level1_files =
7472
+ GetLevelFileMetadatas(1 /* level*/);
7473
+ ASSERT_EQ(level1_files.size(), 1);
7474
+ ASSERT_EQ(level1_files[0]->epoch_number,
7475
+ allow_ingest_behind
7476
+ ? 1 + kReservedEpochNumberForFileIngestedBehind
7477
+ : 1);
7478
+ ASSERT_EQ(level1_files[0]->num_entries, 1);
7479
+ ASSERT_TRUE(level1_files[0]->largest.user_key() == Slice("key1"));
7480
+
7481
+ // Set up files in cf1 to recover in later step
7482
+ ASSERT_OK(Put(1 /* cf */, "cf1_key1", "epoch1"));
7483
+ ASSERT_OK(Flush(1 /* cf */));
7484
+
7485
+ std::vector<FileMetaData*> level0_files_cf1 =
7486
+ GetLevelFileMetadatas(0 /* level*/, 1 /* cf*/);
7487
+ ASSERT_EQ(level0_files_cf1.size(), 1);
7488
+ ASSERT_EQ(level0_files_cf1[0]->epoch_number,
7489
+ allow_ingest_behind
7490
+ ? 1 + kReservedEpochNumberForFileIngestedBehind
7491
+ : 1);
7492
+ ASSERT_EQ(level0_files_cf1[0]->num_entries, 1);
7493
+ ASSERT_TRUE(level0_files_cf1[0]->largest.user_key() == Slice("cf1_key1"));
7494
+
7495
+ ASSERT_EQ(default_cf->GetNextEpochNumber(),
7496
+ allow_ingest_behind
7497
+ ? 3 + kReservedEpochNumberForFileIngestedBehind
7498
+ : 3);
7499
+ ASSERT_EQ(cf1->GetNextEpochNumber(),
7500
+ allow_ingest_behind
7501
+ ? 2 + kReservedEpochNumberForFileIngestedBehind
7502
+ : 2);
7503
+
7504
+ // To verify epoch_number of files of different levels/CFs are
7505
+ // persisted and recovered correctly
7506
+ ReopenWithColumnFamilies({"default", "cf1"}, options);
7507
+ versions = dbfull()->GetVersionSet();
7508
+ assert(versions);
7509
+ default_cf = versions->GetColumnFamilySet()->GetDefault();
7510
+ cf1 = versions->GetColumnFamilySet()->GetColumnFamily("cf1");
7511
+
7512
+ level0_files = GetLevelFileMetadatas(0 /* level*/);
7513
+ ASSERT_EQ(level0_files.size(), 1);
7514
+ EXPECT_EQ(level0_files[0]->epoch_number,
7515
+ allow_ingest_behind
7516
+ ? 2 + kReservedEpochNumberForFileIngestedBehind
7517
+ : 2);
7518
+ ASSERT_EQ(level0_files[0]->num_entries, 1);
7519
+ ASSERT_TRUE(level0_files[0]->largest.user_key() == Slice("key2"));
7520
+
7521
+ level1_files = GetLevelFileMetadatas(1 /* level*/);
7522
+ ASSERT_EQ(level1_files.size(), 1);
7523
+ EXPECT_EQ(level1_files[0]->epoch_number,
7524
+ allow_ingest_behind
7525
+ ? 1 + kReservedEpochNumberForFileIngestedBehind
7526
+ : 1);
7527
+ ASSERT_EQ(level1_files[0]->num_entries, 1);
7528
+ ASSERT_TRUE(level1_files[0]->largest.user_key() == Slice("key1"));
7529
+
7530
+ level0_files_cf1 = GetLevelFileMetadatas(0 /* level*/, 1 /* cf*/);
7531
+ ASSERT_EQ(level0_files_cf1.size(), 1);
7532
+ EXPECT_EQ(level0_files_cf1[0]->epoch_number,
7533
+ allow_ingest_behind
7534
+ ? 1 + kReservedEpochNumberForFileIngestedBehind
7535
+ : 1);
7536
+ ASSERT_EQ(level0_files_cf1[0]->num_entries, 1);
7537
+ ASSERT_TRUE(level0_files_cf1[0]->largest.user_key() == Slice("cf1_key1"));
7538
+
7539
+ // To verify next epoch number is recovered correctly
7540
+ EXPECT_EQ(default_cf->GetNextEpochNumber(),
7541
+ allow_ingest_behind
7542
+ ? 3 + kReservedEpochNumberForFileIngestedBehind
7543
+ : 3);
7544
+ EXPECT_EQ(cf1->GetNextEpochNumber(),
7545
+ allow_ingest_behind
7546
+ ? 2 + kReservedEpochNumberForFileIngestedBehind
7547
+ : 2);
7548
+ }
7549
+ }
7550
+
7551
+ #endif // ROCKSDB_LITE
7552
+
7328
7553
  TEST_F(DBTest2, RenameDirectory) {
7329
7554
  Options options = CurrentOptions();
7330
7555
  DestroyAndReopen(options);
@@ -7509,6 +7734,80 @@ TEST_F(DBTest2, SstUniqueIdVerifyMultiCFs) {
7509
7734
  ASSERT_TRUE(s.IsCorruption());
7510
7735
  }
7511
7736
 
7737
+ TEST_F(DBTest2, BestEffortsRecoveryWithSstUniqueIdVerification) {
7738
+ const auto tamper_with_uniq_id = [&](void* arg) {
7739
+ auto props = static_cast<TableProperties*>(arg);
7740
+ assert(props);
7741
+ // update table property session_id to a different one
7742
+ props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
7743
+ };
7744
+
7745
+ const auto assert_db = [&](size_t expected_count,
7746
+ const std::string& expected_v) {
7747
+ std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
7748
+ size_t cnt = 0;
7749
+ for (it->SeekToFirst(); it->Valid(); it->Next(), ++cnt) {
7750
+ ASSERT_EQ(std::to_string(cnt), it->key());
7751
+ ASSERT_EQ(expected_v, it->value());
7752
+ }
7753
+ ASSERT_EQ(expected_count, cnt);
7754
+ };
7755
+
7756
+ const int num_l0_compaction_trigger = 8;
7757
+ const int num_l0 = num_l0_compaction_trigger - 1;
7758
+ Options options = CurrentOptions();
7759
+ options.level0_file_num_compaction_trigger = num_l0_compaction_trigger;
7760
+
7761
+ for (int k = 0; k < num_l0; ++k) {
7762
+ // Allow mismatch for now
7763
+ options.verify_sst_unique_id_in_manifest = false;
7764
+
7765
+ DestroyAndReopen(options);
7766
+
7767
+ constexpr size_t num_keys_per_file = 10;
7768
+ for (int i = 0; i < num_l0; ++i) {
7769
+ for (size_t j = 0; j < num_keys_per_file; ++j) {
7770
+ ASSERT_OK(Put(std::to_string(j), "v" + std::to_string(i)));
7771
+ }
7772
+ if (i == k) {
7773
+ SyncPoint::GetInstance()->DisableProcessing();
7774
+ SyncPoint::GetInstance()->SetCallBack(
7775
+ "PropertyBlockBuilder::AddTableProperty:Start",
7776
+ tamper_with_uniq_id);
7777
+ SyncPoint::GetInstance()->EnableProcessing();
7778
+ }
7779
+ ASSERT_OK(Flush());
7780
+ }
7781
+
7782
+ options.verify_sst_unique_id_in_manifest = true;
7783
+ Status s = TryReopen(options);
7784
+ ASSERT_TRUE(s.IsCorruption());
7785
+
7786
+ options.best_efforts_recovery = true;
7787
+ Reopen(options);
7788
+ assert_db(k == 0 ? 0 : num_keys_per_file, "v" + std::to_string(k - 1));
7789
+
7790
+ // Reopen with regular recovery
7791
+ options.best_efforts_recovery = false;
7792
+ Reopen(options);
7793
+ assert_db(k == 0 ? 0 : num_keys_per_file, "v" + std::to_string(k - 1));
7794
+
7795
+ SyncPoint::GetInstance()->DisableProcessing();
7796
+ SyncPoint::GetInstance()->ClearAllCallBacks();
7797
+
7798
+ for (size_t i = 0; i < num_keys_per_file; ++i) {
7799
+ ASSERT_OK(Put(std::to_string(i), "v"));
7800
+ }
7801
+ ASSERT_OK(Flush());
7802
+ Reopen(options);
7803
+ {
7804
+ for (size_t i = 0; i < num_keys_per_file; ++i) {
7805
+ ASSERT_EQ("v", Get(std::to_string(i)));
7806
+ }
7807
+ }
7808
+ }
7809
+ }
7810
+
7512
7811
  #ifndef ROCKSDB_LITE
7513
7812
  TEST_F(DBTest2, GetLatestSeqAndTsForKey) {
7514
7813
  Destroy(last_options_);