@nxtedition/rocksdb 8.1.17 → 8.2.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/binding.cc +32 -2
  2. package/binding.gyp +8 -0
  3. package/deps/liburing/liburing.gyp +20 -0
  4. package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +7 -0
  6. package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
  8. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
  9. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
  10. package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
  11. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
  12. package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
  13. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
  14. package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
  15. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
  16. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
  18. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
  19. package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
  20. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
  21. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
  22. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
  26. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
  27. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
  28. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
  29. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
  30. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
  31. package/deps/rocksdb/rocksdb/db/c.cc +90 -1
  32. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
  33. package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
  34. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
  41. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
  42. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
  43. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
  44. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
  45. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
  52. package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
  53. package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
  54. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
  55. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
  57. package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
  58. package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
  59. package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
  61. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
  62. package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
  63. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  64. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
  65. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
  66. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
  67. package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
  68. package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
  69. package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
  70. package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
  71. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
  72. package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
  73. package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
  74. package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
  75. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
  76. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
  77. package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
  78. package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
  79. package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
  80. package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
  82. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
  89. package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
  90. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
  91. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
  92. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
  94. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
  95. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
  96. package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
  97. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
  98. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
  99. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
  100. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
  101. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
  102. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
  104. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
  105. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
  106. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
  108. package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
  109. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
  110. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
  112. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  113. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
  114. package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
  115. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
  116. package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
  117. package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
  118. package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
  119. package/deps/rocksdb/rocksdb/src.mk +4 -0
  120. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
  121. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
  122. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
  123. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
  124. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
  125. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
  126. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
  127. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
  128. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
  129. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  130. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
  131. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
  132. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
  133. package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
  134. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
  135. package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
  136. package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
  137. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
  138. package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
  139. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
  140. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
  141. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
  142. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
  143. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
  144. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
  145. package/deps/rocksdb/rocksdb.gyp +7 -1
  146. package/package.json +1 -1
  147. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -1661,6 +1661,217 @@ TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) {
1661
1661
  ASSERT_EQ(1, num_range_deletions);
1662
1662
  }
1663
1663
 
1664
+ TEST_F(DBRangeDelTest, LevelCompactOutputCutAtRangeTombstoneForTtlFiles) {
1665
+ Options options = CurrentOptions();
1666
+ options.compression = kNoCompression;
1667
+ options.compaction_pri = kMinOverlappingRatio;
1668
+ options.disable_auto_compactions = true;
1669
+ options.ttl = 24 * 60 * 60; // 24 hours
1670
+ options.target_file_size_base = 8 << 10;
1671
+ env_->SetMockSleep();
1672
+ options.env = env_;
1673
+ DestroyAndReopen(options);
1674
+
1675
+ Random rnd(301);
1676
+ // Fill some data so that future compactions are not bottommost level
1677
+ // compaction, and hence they would try cut around files for ttl
1678
+ for (int i = 5; i < 10; ++i) {
1679
+ ASSERT_OK(Put(Key(i), rnd.RandomString(1 << 10)));
1680
+ }
1681
+ ASSERT_OK(Flush());
1682
+ MoveFilesToLevel(3);
1683
+ ASSERT_EQ("0,0,0,1", FilesPerLevel());
1684
+
1685
+ for (int i = 5; i < 10; ++i) {
1686
+ ASSERT_OK(Put(Key(i), rnd.RandomString(1 << 10)));
1687
+ }
1688
+ ASSERT_OK(Flush());
1689
+ MoveFilesToLevel(1);
1690
+ ASSERT_EQ("0,1,0,1", FilesPerLevel());
1691
+
1692
+ env_->MockSleepForSeconds(20 * 60 * 60);
1693
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
1694
+ Key(11), Key(12)));
1695
+ ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10)));
1696
+ ASSERT_OK(Flush());
1697
+ ASSERT_EQ("1,1,0,1", FilesPerLevel());
1698
+ // L0 file is new, L1 and L3 file are old and qualified for TTL
1699
+ env_->MockSleepForSeconds(10 * 60 * 60);
1700
+ MoveFilesToLevel(1);
1701
+ // L1 output should be cut into 3 files:
1702
+ // File 0: Key(0)
1703
+ // File 1: (qualified for TTL): Key(5) - Key(10)
1704
+ // File 1: DeleteRange [11, 12)
1705
+ ASSERT_EQ("0,3,0,1", FilesPerLevel());
1706
+ }
1707
+
1708
+ // Test SST partitioner cut after every single key
1709
+ class SingleKeySstPartitioner : public SstPartitioner {
1710
+ public:
1711
+ const char* Name() const override { return "SingleKeySstPartitioner"; }
1712
+
1713
+ PartitionerResult ShouldPartition(
1714
+ const PartitionerRequest& /*request*/) override {
1715
+ return kRequired;
1716
+ }
1717
+
1718
+ bool CanDoTrivialMove(const Slice& /*smallest_user_key*/,
1719
+ const Slice& /*largest_user_key*/) override {
1720
+ return false;
1721
+ }
1722
+ };
1723
+
1724
+ class SingleKeySstPartitionerFactory : public SstPartitionerFactory {
1725
+ public:
1726
+ static const char* kClassName() { return "SingleKeySstPartitionerFactory"; }
1727
+ const char* Name() const override { return kClassName(); }
1728
+
1729
+ std::unique_ptr<SstPartitioner> CreatePartitioner(
1730
+ const SstPartitioner::Context& /* context */) const override {
1731
+ return std::unique_ptr<SstPartitioner>(new SingleKeySstPartitioner());
1732
+ }
1733
+ };
1734
+
1735
+ TEST_F(DBRangeDelTest, CompactionEmitRangeTombstoneToSSTPartitioner) {
1736
+ Options options = CurrentOptions();
1737
+ auto factory = std::make_shared<SingleKeySstPartitionerFactory>();
1738
+ options.sst_partitioner_factory = factory;
1739
+ options.disable_auto_compactions = true;
1740
+ DestroyAndReopen(options);
1741
+
1742
+ Random rnd(301);
1743
+ // range deletion keys are not processed when compacting to bottommost level,
1744
+ // so creating a file at older level to make the next compaction not
1745
+ // bottommost level
1746
+ ASSERT_OK(db_->Put(WriteOptions(), Key(4), rnd.RandomString(10)));
1747
+ ASSERT_OK(Flush());
1748
+ MoveFilesToLevel(5);
1749
+
1750
+ ASSERT_OK(db_->Put(WriteOptions(), Key(1), rnd.RandomString(10)));
1751
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2),
1752
+ Key(5)));
1753
+ ASSERT_OK(Flush());
1754
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1755
+ MoveFilesToLevel(1);
1756
+ // SSTPartitioner decides to cut when range tombstone start key is passed to
1757
+ // it. Note that the range tombstone [2, 5) itself span multiple keys, but we
1758
+ // are not able to partition within its range yet.
1759
+ ASSERT_EQ(2, NumTableFilesAtLevel(1));
1760
+ }
1761
+
1762
+ TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenPointKeyAndTombstone) {
1763
+ // L2 has 2 files
1764
+ // L2_0: 0, 1, 2, 3, 4
1765
+ // L2_1: 5, 6, 7
1766
+ // L0 has 1 file
1767
+ // L0: 0, [5, 6), 8
1768
+ // max_compaction_bytes is less than the size of L2_0 and L2_1.
1769
+ // When compacting L0 into L1, it should split into 3 files:
1770
+ // compaction output should cut before key 5 and key 8 to
1771
+ // limit future compaction size.
1772
+ const int kNumPerFile = 4, kNumFiles = 2;
1773
+ Options options = CurrentOptions();
1774
+ options.disable_auto_compactions = true;
1775
+ options.target_file_size_base = 9 * 1024;
1776
+ options.max_compaction_bytes = 9 * 1024;
1777
+ DestroyAndReopen(options);
1778
+ Random rnd(301);
1779
+ for (int i = 0; i < kNumFiles; ++i) {
1780
+ std::vector<std::string> values;
1781
+ for (int j = 0; j < kNumPerFile; j++) {
1782
+ values.push_back(rnd.RandomString(3 << 10));
1783
+ ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1784
+ }
1785
+ }
1786
+ ASSERT_OK(db_->Flush(FlushOptions()));
1787
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1788
+ MoveFilesToLevel(2);
1789
+ ASSERT_EQ(2, NumTableFilesAtLevel(2));
1790
+ ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10)));
1791
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(5),
1792
+ Key(6)));
1793
+ ASSERT_OK(Put(Key(8), rnd.RandomString(1 << 10)));
1794
+ ASSERT_OK(db_->Flush(FlushOptions()));
1795
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1796
+
1797
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1798
+ true /* disallow_trivial_move */));
1799
+ ASSERT_EQ(3, NumTableFilesAtLevel(1));
1800
+ }
1801
+
1802
+ TEST_F(DBRangeDelTest, OversizeCompactionGapBetweenTombstone) {
1803
+ // L2 has two files
1804
+ // L2_0: 0, 1, 2, 3, 4. L2_1: 5, 6, 7
1805
+ // L0 has two range tombstones [0, 1), [7, 8).
1806
+ // max_compaction_bytes is less than the size of L2_0.
1807
+ // When compacting L0 into L1, the two range tombstones should be
1808
+ // split into two files.
1809
+ const int kNumPerFile = 4, kNumFiles = 2;
1810
+ Options options = CurrentOptions();
1811
+ options.disable_auto_compactions = true;
1812
+ options.target_file_size_base = 9 * 1024;
1813
+ options.max_compaction_bytes = 9 * 1024;
1814
+ DestroyAndReopen(options);
1815
+ Random rnd(301);
1816
+ for (int i = 0; i < kNumFiles; ++i) {
1817
+ std::vector<std::string> values;
1818
+ // Write 12K (4 values, each 3K)
1819
+ for (int j = 0; j < kNumPerFile; j++) {
1820
+ values.push_back(rnd.RandomString(3 << 10));
1821
+ ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
1822
+ }
1823
+ }
1824
+ ASSERT_OK(db_->Flush(FlushOptions()));
1825
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1826
+ MoveFilesToLevel(2);
1827
+ ASSERT_EQ(2, NumTableFilesAtLevel(2));
1828
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1829
+ Key(1)));
1830
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(7),
1831
+ Key(8)));
1832
+ ASSERT_OK(db_->Flush(FlushOptions()));
1833
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1834
+
1835
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1836
+ true /* disallow_trivial_move */));
1837
+ // This is L0 -> L1 compaction
1838
+ // The two range tombstones are broken up into two output files
1839
+ // to limit compaction size.
1840
+ ASSERT_EQ(2, NumTableFilesAtLevel(1));
1841
+ }
1842
+
1843
+ TEST_F(DBRangeDelTest, OversizeCompactionPointKeyWithinRangetombstone) {
1844
+ // L2 has two files
1845
+ // L2_0: 0, 1, 2, 3, 4. L2_1: 6, 7, 8
1846
+ // L0 has [0, 9) and point key 5
1847
+ // max_compaction_bytes is less than the size of L2_0.
1848
+ // When compacting L0 into L1, the compaction should cut at point key 5.
1849
+ Options options = CurrentOptions();
1850
+ options.disable_auto_compactions = true;
1851
+ options.target_file_size_base = 9 * 1024;
1852
+ options.max_compaction_bytes = 9 * 1024;
1853
+ DestroyAndReopen(options);
1854
+ Random rnd(301);
1855
+ for (int i = 0; i < 9; ++i) {
1856
+ if (i == 5) {
1857
+ ++i;
1858
+ }
1859
+ ASSERT_OK(Put(Key(i), rnd.RandomString(3 << 10)));
1860
+ }
1861
+ ASSERT_OK(db_->Flush(FlushOptions()));
1862
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1863
+ MoveFilesToLevel(2);
1864
+ ASSERT_EQ(2, NumTableFilesAtLevel(2));
1865
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
1866
+ Key(9)));
1867
+ ASSERT_OK(Put(Key(5), rnd.RandomString(1 << 10)));
1868
+ ASSERT_OK(db_->Flush(FlushOptions()));
1869
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
1870
+ ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
1871
+ true /* disallow_trivial_move */));
1872
+ ASSERT_EQ(2, NumTableFilesAtLevel(1));
1873
+ }
1874
+
1664
1875
  TEST_F(DBRangeDelTest, OverlappedTombstones) {
1665
1876
  const int kNumPerFile = 4, kNumFiles = 2;
1666
1877
  Options options = CurrentOptions();
@@ -2093,6 +2304,7 @@ TEST_F(DBRangeDelTest, NonOverlappingTombstonAtBoundary) {
2093
2304
  options.compression = kNoCompression;
2094
2305
  options.disable_auto_compactions = true;
2095
2306
  options.target_file_size_base = 2 * 1024;
2307
+ options.level_compaction_dynamic_file_size = false;
2096
2308
  DestroyAndReopen(options);
2097
2309
 
2098
2310
  Random rnd(301);
@@ -2508,7 +2720,7 @@ TEST_F(DBRangeDelTest, LeftSentinelKeyTest) {
2508
2720
  options.compression = kNoCompression;
2509
2721
  options.disable_auto_compactions = true;
2510
2722
  options.target_file_size_base = 3 * 1024;
2511
- options.max_compaction_bytes = 1024;
2723
+ options.max_compaction_bytes = 2048;
2512
2724
 
2513
2725
  DestroyAndReopen(options);
2514
2726
  // L2
@@ -2554,7 +2766,7 @@ TEST_F(DBRangeDelTest, LeftSentinelKeyTestWithNewerKey) {
2554
2766
  options.compression = kNoCompression;
2555
2767
  options.disable_auto_compactions = true;
2556
2768
  options.target_file_size_base = 3 * 1024;
2557
- options.max_compaction_bytes = 1024;
2769
+ options.max_compaction_bytes = 3 * 1024;
2558
2770
 
2559
2771
  DestroyAndReopen(options);
2560
2772
  // L2
@@ -3015,6 +3227,183 @@ TEST_F(DBRangeDelTest, DoubleCountRangeTombstoneCompensatedSize) {
3015
3227
  db_->ReleaseSnapshot(snapshot);
3016
3228
  }
3017
3229
 
3230
+ TEST_F(DBRangeDelTest, AddRangeDelsSameLowerAndUpperBound) {
3231
+ // Test for an edge case where CompactionOutputs::AddRangeDels()
3232
+ // is called with an empty range: `range_tombstone_lower_bound_` is not empty
3233
+ // and have the same user_key and sequence number as `next_table_min_key.
3234
+ // This used to cause file's smallest and largest key to be incorrectly set
3235
+ // such that smallest > largest, and fail some assertions in iterator and/or
3236
+ // assertion in VersionSet::ApproximateSize().
3237
+ Options opts = CurrentOptions();
3238
+ opts.disable_auto_compactions = true;
3239
+ opts.target_file_size_base = 1 << 10;
3240
+ opts.level_compaction_dynamic_file_size = false;
3241
+ DestroyAndReopen(opts);
3242
+
3243
+ Random rnd(301);
3244
+ // Create file at bottommost level so the manual compaction below is
3245
+ // non-bottommost level and goes through code path like compensate range
3246
+ // tombstone size.
3247
+ ASSERT_OK(Put(Key(1), "v1"));
3248
+ ASSERT_OK(Put(Key(4), "v2"));
3249
+ ASSERT_OK(Flush());
3250
+ MoveFilesToLevel(6);
3251
+
3252
+ ASSERT_OK(Put(Key(1), rnd.RandomString(4 << 10)));
3253
+ ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10)));
3254
+ // So Key(3) does not get dropped.
3255
+ const Snapshot* snapshot = db_->GetSnapshot();
3256
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2),
3257
+ Key(4)));
3258
+ ASSERT_OK(Flush());
3259
+
3260
+ ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10)));
3261
+ ASSERT_OK(Put(Key(4), rnd.RandomString(4 << 10)));
3262
+ ASSERT_OK(Flush());
3263
+
3264
+ MoveFilesToLevel(1);
3265
+ // Each file will have two keys, with Key(3) straddle between two files.
3266
+ // File 1: Key(1)@1, Key(3)@6, DeleteRange ends at Key(3)@6
3267
+ // File 2: Key(3)@4, Key(4)@7, DeleteRange start from Key(3)@4
3268
+ ASSERT_EQ(NumTableFilesAtLevel(1), 2);
3269
+
3270
+ // Manually update compaction output file cutting decisions
3271
+ // to cut before range tombstone sentinel Key(3)@4
3272
+ // and the point key Key(3)@4 itself
3273
+ SyncPoint::GetInstance()->SetCallBack(
3274
+ "CompactionOutputs::ShouldStopBefore::manual_decision", [opts](void* p) {
3275
+ auto* pair = (std::pair<bool*, const Slice>*)p;
3276
+ if ((opts.comparator->Compare(ExtractUserKey(pair->second), Key(3)) ==
3277
+ 0) &&
3278
+ (GetInternalKeySeqno(pair->second) <= 4)) {
3279
+ *(pair->first) = true;
3280
+ }
3281
+ });
3282
+ SyncPoint::GetInstance()->EnableProcessing();
3283
+ std::string begin_key = Key(0);
3284
+ std::string end_key = Key(5);
3285
+ Slice begin_slice{begin_key};
3286
+ Slice end_slice{end_key};
3287
+ ASSERT_OK(dbfull()->RunManualCompaction(
3288
+ static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
3289
+ ->cfd(),
3290
+ 1, 2, CompactRangeOptions(), &begin_slice, &end_slice, true,
3291
+ true /* disallow_trivial_move */,
3292
+ std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
3293
+ "" /*trim_ts*/));
3294
+ // iterate through to check if any assertion breaks
3295
+ std::unique_ptr<Iterator> iter{db_->NewIterator(ReadOptions())};
3296
+ iter->SeekToFirst();
3297
+ std::vector<int> expected{1, 3, 4};
3298
+ for (auto i : expected) {
3299
+ ASSERT_TRUE(iter->Valid());
3300
+ ASSERT_EQ(iter->key(), Key(i));
3301
+ iter->Next();
3302
+ }
3303
+ ASSERT_TRUE(iter->status().ok() && !iter->Valid());
3304
+ db_->ReleaseSnapshot(snapshot);
3305
+ }
3306
+
3307
+ TEST_F(DBRangeDelTest, AddRangeDelsSingleUserKeyTombstoneOnlyFile) {
3308
+ // Test for an edge case where CompactionOutputs::AddRangeDels()
3309
+ // is called with an SST file that has no point keys, and that
3310
+ // the lower bound and upper bound have the same user key.
3311
+ // This could cause a file's smallest and largest key to be incorrectly set
3312
+ // such that smallest > largest, and fail some assertions in iterator and/or
3313
+ // assertion in VersionSet::ApproximateSize().
3314
+ Options opts = CurrentOptions();
3315
+ opts.disable_auto_compactions = true;
3316
+ opts.target_file_size_base = 1 << 10;
3317
+ opts.level_compaction_dynamic_file_size = false;
3318
+ DestroyAndReopen(opts);
3319
+
3320
+ Random rnd(301);
3321
+ // Create file at bottommost level so the manual compaction below is
3322
+ // non-bottommost level and goes through code path like compensate range
3323
+ // tombstone size.
3324
+ ASSERT_OK(Put(Key(1), "v1"));
3325
+ ASSERT_OK(Put(Key(4), "v2"));
3326
+ ASSERT_OK(Flush());
3327
+ MoveFilesToLevel(6);
3328
+
3329
+ ASSERT_OK(Put(Key(1), rnd.RandomString(10)));
3330
+ // Key(3)@4
3331
+ ASSERT_OK(Put(Key(3), rnd.RandomString(10)));
3332
+ const Snapshot* snapshot1 = db_->GetSnapshot();
3333
+ // Key(3)@5
3334
+ ASSERT_OK(Put(Key(3), rnd.RandomString(10)));
3335
+ const Snapshot* snapshot2 = db_->GetSnapshot();
3336
+ ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(2),
3337
+ Key(4)));
3338
+ // Key(3)@7
3339
+ ASSERT_OK(Put(Key(3), rnd.RandomString(10)));
3340
+ ASSERT_OK(Flush());
3341
+
3342
+ // L0 -> L1 compaction: cut output into two files:
3343
+ // File 1: Key(1), Key(3)@7, Range tombstone ends at Key(3)@7
3344
+ // File 2: Key(3)@5, Key(3)@4, Range tombstone starts from Key(3)@5
3345
+ SyncPoint::GetInstance()->SetCallBack(
3346
+ "CompactionOutputs::ShouldStopBefore::manual_decision", [opts](void* p) {
3347
+ auto* pair = (std::pair<bool*, const Slice>*)p;
3348
+ if ((opts.comparator->Compare(ExtractUserKey(pair->second), Key(3)) ==
3349
+ 0) &&
3350
+ (GetInternalKeySeqno(pair->second) <= 6)) {
3351
+ *(pair->first) = true;
3352
+ SyncPoint::GetInstance()->DisableProcessing();
3353
+ }
3354
+ });
3355
+ SyncPoint::GetInstance()->EnableProcessing();
3356
+ std::string begin_key = Key(0);
3357
+ std::string end_key = Key(5);
3358
+ Slice begin_slice{begin_key};
3359
+ Slice end_slice{end_key};
3360
+ ASSERT_OK(dbfull()->RunManualCompaction(
3361
+ static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
3362
+ ->cfd(),
3363
+ 0, 1, CompactRangeOptions(), &begin_slice, &end_slice, true,
3364
+ true /* disallow_trivial_move */,
3365
+ std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
3366
+ "" /*trim_ts*/));
3367
+ ASSERT_EQ(NumTableFilesAtLevel(1), 2);
3368
+
3369
+ // L1 -> L2 compaction, drop the snapshot protecting Key(3)@5.
3370
+ // Let ShouldStopBefore() return true for Key(3)@5 (delete range sentinel)
3371
+ // and Key(3)@4.
3372
+ // Output should have two files:
3373
+ // File 1: Key(1), Key(3)@7, range tombstone ends at Key(3)@7
3374
+ // File dropped: range tombstone only file (from Key(3)@5 to Key(3)@4)
3375
+ // File 2: Range tombstone starting from Key(3)@4, Key(3)@4
3376
+ db_->ReleaseSnapshot(snapshot2);
3377
+ SyncPoint::GetInstance()->SetCallBack(
3378
+ "CompactionOutputs::ShouldStopBefore::manual_decision", [opts](void* p) {
3379
+ auto* pair = (std::pair<bool*, const Slice>*)p;
3380
+ if ((opts.comparator->Compare(ExtractUserKey(pair->second), Key(3)) ==
3381
+ 0) &&
3382
+ (GetInternalKeySeqno(pair->second) <= 6)) {
3383
+ *(pair->first) = true;
3384
+ }
3385
+ });
3386
+ SyncPoint::GetInstance()->EnableProcessing();
3387
+ ASSERT_OK(dbfull()->RunManualCompaction(
3388
+ static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
3389
+ ->cfd(),
3390
+ 1, 2, CompactRangeOptions(), &begin_slice, &end_slice, true,
3391
+ true /* disallow_trivial_move */,
3392
+ std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
3393
+ "" /*trim_ts*/));
3394
+ ASSERT_EQ(NumTableFilesAtLevel(2), 2);
3395
+ // iterate through to check if any assertion breaks
3396
+ std::unique_ptr<Iterator> iter{db_->NewIterator(ReadOptions())};
3397
+ iter->SeekToFirst();
3398
+ std::vector<int> expected{1, 3, 4};
3399
+ for (auto i : expected) {
3400
+ ASSERT_TRUE(iter->Valid());
3401
+ ASSERT_EQ(iter->key(), Key(i));
3402
+ iter->Next();
3403
+ }
3404
+ ASSERT_TRUE(iter->status().ok() && !iter->Valid());
3405
+ db_->ReleaseSnapshot(snapshot1);
3406
+ }
3018
3407
 
3019
3408
  } // namespace ROCKSDB_NAMESPACE
3020
3409
 
@@ -3892,6 +3892,32 @@ TEST_F(DBBasicTestWithTimestamp, RangeTombstoneApproximateSize) {
3892
3892
  std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
3893
3893
  "" /*trim_ts*/));
3894
3894
  }
3895
+
3896
+ TEST_F(DBBasicTestWithTimestamp, IterSeekToLastWithIterateUpperbound) {
3897
+ // Test for a bug fix where DBIter::SeekToLast() could fail when
3898
+ // iterate_upper_bound and iter_start_ts are both set.
3899
+ Options options = CurrentOptions();
3900
+ const size_t kTimestampSize = Timestamp(0, 0).size();
3901
+ TestComparator test_cmp(kTimestampSize);
3902
+ options.comparator = &test_cmp;
3903
+ DestroyAndReopen(options);
3904
+
3905
+ ASSERT_OK(db_->Put(WriteOptions(), Key(1), Timestamp(2, 0), "val"));
3906
+ ReadOptions ro;
3907
+ std::string k = Key(1);
3908
+ Slice k_slice = k;
3909
+ ro.iterate_upper_bound = &k_slice;
3910
+ std::string ts = Timestamp(3, 0);
3911
+ Slice read_ts = ts;
3912
+ ro.timestamp = &read_ts;
3913
+ std::string start_ts = Timestamp(0, 0);
3914
+ Slice start_ts_slice = start_ts;
3915
+ ro.iter_start_ts = &start_ts_slice;
3916
+ std::unique_ptr<Iterator> iter{db_->NewIterator(ro)};
3917
+ iter->SeekToLast();
3918
+ ASSERT_FALSE(iter->Valid());
3919
+ ASSERT_OK(iter->status());
3920
+ }
3895
3921
  } // namespace ROCKSDB_NAMESPACE
3896
3922
 
3897
3923
  int main(int argc, char** argv) {
@@ -19,6 +19,7 @@
19
19
  #include "util/random.h"
20
20
  #include "util/string_util.h"
21
21
  #include "utilities/fault_injection_env.h"
22
+ #include "utilities/fault_injection_fs.h"
22
23
 
23
24
  namespace ROCKSDB_NAMESPACE {
24
25
 
@@ -608,12 +609,18 @@ TEST_P(DBWriteTest, IOErrorOnSwitchMemtable) {
608
609
 
609
610
  // Test that db->LockWAL() flushes the WAL after locking, which can fail
610
611
  TEST_P(DBWriteTest, LockWALInEffect) {
612
+ if (mem_env_ || encrypted_env_) {
613
+ ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
614
+ return;
615
+ }
611
616
  Options options = GetOptions();
612
- std::unique_ptr<FaultInjectionTestEnv> mock_env(
613
- new FaultInjectionTestEnv(env_));
614
- options.env = mock_env.get();
617
+ std::shared_ptr<FaultInjectionTestFS> fault_fs(
618
+ new FaultInjectionTestFS(FileSystem::Default()));
619
+ std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
620
+ options.env = fault_fs_env.get();
615
621
  options.disable_auto_compactions = true;
616
622
  options.paranoid_checks = false;
623
+ options.max_bgerror_resume_count = 0; // manual Resume()
617
624
  Reopen(options);
618
625
  // try the 1st WAL created during open
619
626
  ASSERT_OK(Put("key0", "value"));
@@ -630,7 +637,7 @@ TEST_P(DBWriteTest, LockWALInEffect) {
630
637
  ASSERT_OK(db_->UnlockWAL());
631
638
 
632
639
  // Fail the WAL flush if applicable
633
- mock_env->SetFilesystemActive(false);
640
+ fault_fs->SetFilesystemActive(false);
634
641
  Status s = Put("key2", "value");
635
642
  if (options.manual_wal_flush) {
636
643
  ASSERT_OK(s);
@@ -642,7 +649,8 @@ TEST_P(DBWriteTest, LockWALInEffect) {
642
649
  ASSERT_OK(db_->LockWAL());
643
650
  ASSERT_OK(db_->UnlockWAL());
644
651
  }
645
- mock_env->SetFilesystemActive(true);
652
+ fault_fs->SetFilesystemActive(true);
653
+ ASSERT_OK(db_->Resume());
646
654
  // Writes should work again
647
655
  ASSERT_OK(Put("key3", "value"));
648
656
  ASSERT_EQ(Get("key3"), "value");
@@ -86,8 +86,10 @@ inline bool IsValueType(ValueType t) {
86
86
 
87
87
  // Checks whether a type is from user operation
88
88
  // kTypeRangeDeletion is in meta block so this API is separated from above
89
+ // kTypeMaxValid can be from keys generated by
90
+ // TruncatedRangeDelIterator::start_key()
89
91
  inline bool IsExtendedValueType(ValueType t) {
90
- return IsValueType(t) || t == kTypeRangeDeletion;
92
+ return IsValueType(t) || t == kTypeRangeDeletion || t == kTypeMaxValid;
91
93
  }
92
94
 
93
95
  // We leave eight bits empty at the bottom so a type and sequence#
@@ -2860,4 +2860,3 @@ int main(int argc, char** argv) {
2860
2860
  ::testing::InitGoogleTest(&argc, argv);
2861
2861
  return RUN_ALL_TESTS();
2862
2862
  }
2863
-
@@ -746,12 +746,6 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
746
746
 
747
747
  ParsedInternalKey key;
748
748
  ReadOptions ro;
749
- // During reading the external file we can cache blocks that we read into
750
- // the block cache, if we later change the global seqno of this file, we will
751
- // have block in cache that will include keys with wrong seqno.
752
- // We need to disable fill_cache so that we read from the file without
753
- // updating the block cache.
754
- ro.fill_cache = false;
755
749
  std::unique_ptr<InternalIterator> iter(table_reader->NewIterator(
756
750
  ro, sv->mutable_cf_options.prefix_extractor.get(), /*arena=*/nullptr,
757
751
  /*skip_filters=*/false, TableReaderCaller::kExternalSSTIngestion));
@@ -238,6 +238,9 @@ ForwardIterator::ForwardIterator(DBImpl* db, const ReadOptions& read_options,
238
238
  if (sv_) {
239
239
  RebuildIterators(false);
240
240
  }
241
+ if (!cfd_->ioptions()->env->GetFileSystem()->use_async_io()) {
242
+ read_options_.async_io = false;
243
+ }
241
244
 
242
245
  // immutable_status_ is a local aggregation of the
243
246
  // status of the immutable Iterators.
@@ -122,7 +122,7 @@ class ForwardIterator : public InternalIterator {
122
122
  void DeleteIterator(InternalIterator* iter, bool is_arena = false);
123
123
 
124
124
  DBImpl* const db_;
125
- const ReadOptions read_options_;
125
+ ReadOptions read_options_;
126
126
  ColumnFamilyData* const cfd_;
127
127
  const SliceTransform* const prefix_extractor_;
128
128
  const Comparator* user_comparator_;
@@ -82,6 +82,10 @@ class HistoryTrimmingIterator : public InternalIterator {
82
82
 
83
83
  bool IsValuePinned() const override { return input_->IsValuePinned(); }
84
84
 
85
+ bool IsDeleteRangeSentinelKey() const override {
86
+ return input_->IsDeleteRangeSentinelKey();
87
+ }
88
+
85
89
  private:
86
90
  InternalIterator* input_;
87
91
  const std::string filter_ts_;