@nxtedition/rocksdb 8.1.4 → 8.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +21 -0
  2. package/deps/rocksdb/rocksdb/Makefile +15 -3
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
  5. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
  6. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
  8. package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
  9. package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
  10. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
  11. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
  12. package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
  13. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
  14. package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
  15. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
  16. package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
  17. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
  18. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
  19. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
  20. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
  21. package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
  22. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
  23. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
  24. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
  25. package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
  26. package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
  27. package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
  28. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
  29. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
  30. package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
  31. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
  32. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
  33. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
  34. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
  35. package/deps/rocksdb/rocksdb/db/builder.cc +24 -10
  36. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  37. package/deps/rocksdb/rocksdb/db/c.cc +15 -0
  38. package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
  39. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -6
  40. package/deps/rocksdb/rocksdb/db/column_family.h +20 -6
  41. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +31 -34
  42. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +3 -0
  43. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +21 -3
  44. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +1 -0
  45. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +4 -0
  46. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  47. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +9 -6
  48. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +275 -82
  49. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  50. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -18
  51. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +17 -16
  52. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +19 -6
  53. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +5 -5
  54. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -22
  55. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -5
  56. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +81 -52
  57. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  58. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -5
  59. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  60. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +3 -0
  61. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  62. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  63. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
  64. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
  65. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  66. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1022 -123
  67. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
  68. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +32 -21
  69. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +32 -24
  70. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +199 -77
  71. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  72. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +3 -2
  73. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  74. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -4
  75. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +43 -23
  76. package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
  77. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  78. package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
  79. package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
  80. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +230 -2
  81. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  82. package/deps/rocksdb/rocksdb/db/db_test2.cc +233 -8
  83. package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
  84. package/deps/rocksdb/rocksdb/db/db_test_util.h +39 -24
  85. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  86. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
  87. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  88. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  89. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  90. package/deps/rocksdb/rocksdb/db/experimental.cc +3 -2
  91. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +3 -0
  92. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +92 -13
  93. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  94. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  95. package/deps/rocksdb/rocksdb/db/flush_job.cc +12 -10
  96. package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
  97. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
  98. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  99. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  100. package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
  101. package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
  102. package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
  103. package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
  104. package/deps/rocksdb/rocksdb/db/memtable.cc +31 -6
  105. package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -29
  106. package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
  107. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  108. package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
  109. package/deps/rocksdb/rocksdb/db/repair.cc +65 -22
  110. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  111. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  112. package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
  113. package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
  114. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
  115. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  116. package/deps/rocksdb/rocksdb/db/version_builder.cc +102 -52
  117. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  118. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +218 -93
  119. package/deps/rocksdb/rocksdb/db/version_edit.cc +27 -1
  120. package/deps/rocksdb/rocksdb/db/version_edit.h +34 -9
  121. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +13 -6
  122. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +17 -6
  123. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +19 -17
  124. package/deps/rocksdb/rocksdb/db/version_set.cc +160 -28
  125. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  126. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
  127. package/deps/rocksdb/rocksdb/db/version_set_test.cc +65 -31
  128. package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
  129. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  130. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  131. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -32
  132. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h +2 -1
  133. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
  134. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  135. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +11 -4
  136. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +16 -15
  137. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
  138. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
  139. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +286 -217
  140. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +8 -0
  141. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
  142. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  143. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  144. package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
  145. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  146. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  147. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
  148. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  149. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  150. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  151. package/deps/rocksdb/rocksdb/memory/arena.cc +23 -87
  152. package/deps/rocksdb/rocksdb/memory/arena.h +25 -31
  153. package/deps/rocksdb/rocksdb/memory/arena_test.cc +90 -0
  154. package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
  155. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  156. package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
  157. package/deps/rocksdb/rocksdb/port/mmap.cc +98 -0
  158. package/deps/rocksdb/rocksdb/port/mmap.h +70 -0
  159. package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
  160. package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
  161. package/deps/rocksdb/rocksdb/src.mk +3 -0
  162. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  163. package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
  164. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
  165. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
  166. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
  167. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +159 -225
  168. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
  169. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +52 -20
  170. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
  171. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  172. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
  173. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
  174. package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
  175. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
  176. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
  177. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
  178. package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
  179. package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
  180. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
  181. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
  182. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
  183. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
  184. package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
  185. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  186. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  187. package/deps/rocksdb/rocksdb/table/format.h +6 -3
  188. package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
  189. package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
  190. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +69 -35
  191. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  192. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  193. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  194. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  195. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
  196. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
  197. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
  198. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  199. package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
  200. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
  201. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  202. package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
  203. package/deps/rocksdb/rocksdb/util/compression.h +11 -2
  204. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  205. package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
  206. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +250 -74
  207. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +199 -4
  208. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
  209. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
  210. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  211. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
  212. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
  213. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
  214. package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
  215. package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
  216. package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
  217. package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
  218. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
  219. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +111 -0
  220. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
  221. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
  222. package/package.json +1 -1
  223. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
@@ -9,8 +9,10 @@
9
9
 
10
10
  #include <tuple>
11
11
 
12
+ #include "compaction/compaction_picker_universal.h"
12
13
  #include "db/blob/blob_index.h"
13
14
  #include "db/db_test_util.h"
15
+ #include "db/dbformat.h"
14
16
  #include "env/mock_env.h"
15
17
  #include "port/port.h"
16
18
  #include "port/stack_trace.h"
@@ -1024,6 +1026,70 @@ TEST_F(DBCompactionTest, CompactionSstPartitioner) {
1024
1026
  ASSERT_EQ("B", Get("bbbb1"));
1025
1027
  }
1026
1028
 
1029
+ TEST_F(DBCompactionTest, CompactionSstPartitionWithManualCompaction) {
1030
+ Options options = CurrentOptions();
1031
+ options.compaction_style = kCompactionStyleLevel;
1032
+ options.level0_file_num_compaction_trigger = 3;
1033
+
1034
+ DestroyAndReopen(options);
1035
+
1036
+ // create first file and flush to l0
1037
+ ASSERT_OK(Put("000015", "A"));
1038
+ ASSERT_OK(Put("000025", "B"));
1039
+ ASSERT_OK(Flush());
1040
+ ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
1041
+
1042
+ // create second file and flush to l0
1043
+ ASSERT_OK(Put("000015", "A2"));
1044
+ ASSERT_OK(Put("000025", "B2"));
1045
+ ASSERT_OK(Flush());
1046
+ ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
1047
+
1048
+ // CONTROL 1: compact without partitioner
1049
+ CompactRangeOptions compact_options;
1050
+ compact_options.bottommost_level_compaction =
1051
+ BottommostLevelCompaction::kForceOptimized;
1052
+ ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1053
+
1054
+ // Check (compacted but no partitioning yet)
1055
+ std::vector<LiveFileMetaData> files;
1056
+ dbfull()->GetLiveFilesMetaData(&files);
1057
+ ASSERT_EQ(1, files.size());
1058
+
1059
+ // Install partitioner
1060
+ std::shared_ptr<SstPartitionerFactory> factory(
1061
+ NewSstPartitionerFixedPrefixFactory(5));
1062
+ options.sst_partitioner_factory = factory;
1063
+ Reopen(options);
1064
+
1065
+ // CONTROL 2: request compaction on range with no partition boundary and no
1066
+ // overlap with actual entries
1067
+ Slice from("000017");
1068
+ Slice to("000019");
1069
+ ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));
1070
+
1071
+ // Check (no partitioning yet)
1072
+ files.clear();
1073
+ dbfull()->GetLiveFilesMetaData(&files);
1074
+ ASSERT_EQ(1, files.size());
1075
+ ASSERT_EQ("A2", Get("000015"));
1076
+ ASSERT_EQ("B2", Get("000025"));
1077
+
1078
+ // TEST: request compaction overlapping with partition boundary but no
1079
+ // actual entries
1080
+ // NOTE: `to` is INCLUSIVE
1081
+ from = Slice("000019");
1082
+ to = Slice("000020");
1083
+ ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));
1084
+
1085
+ // Check (must be partitioned)
1086
+ files.clear();
1087
+ dbfull()->GetLiveFilesMetaData(&files);
1088
+ ASSERT_EQ(2, files.size());
1089
+ ASSERT_EQ("A2", Get("000015"));
1090
+ ASSERT_EQ("B2", Get("000025"));
1091
+ }
1092
+
1027
1093
  TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) {
1028
1094
  Options options = CurrentOptions();
1029
1095
  options.compaction_style = kCompactionStyleLevel;
@@ -3519,6 +3585,59 @@ TEST_P(DBCompactionTestWithParam, FullCompactionInBottomPriThreadPool) {
3519
3585
  Env::Default()->SetBackgroundThreads(0, Env::Priority::BOTTOM);
3520
3586
  }
3521
3587
 
3588
+ TEST_F(DBCompactionTest, CancelCompactionWaitingOnConflict) {
3589
+ // This test verifies cancellation of a compaction waiting to be scheduled due
3590
+ // to conflict with a running compaction.
3591
+ //
3592
+ // A `CompactRange()` in universal compacts all files, waiting for files to
3593
+ // become available if they are locked for another compaction. This test
3594
+ // triggers an automatic compaction that blocks a `CompactRange()`, and
3595
+ // verifies that `DisableManualCompaction()` can successfully cancel the
3596
+ // `CompactRange()` without waiting for the automatic compaction to finish.
3597
+ const int kNumSortedRuns = 4;
3598
+
3599
+ Options options = CurrentOptions();
3600
+ options.compaction_style = kCompactionStyleUniversal;
3601
+ options.level0_file_num_compaction_trigger = kNumSortedRuns;
3602
+ options.memtable_factory.reset(
3603
+ test::NewSpecialSkipListFactory(KNumKeysByGenerateNewFile - 1));
3604
+ Reopen(options);
3605
+
3606
+ test::SleepingBackgroundTask auto_compaction_sleeping_task;
3607
+ // Block automatic compaction when it runs in the callback
3608
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
3609
+ "CompactionJob::Run():Start",
3610
+ [&](void* /*arg*/) { auto_compaction_sleeping_task.DoSleep(); });
3611
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
3612
+
3613
+ // Fill overlapping files in L0 to trigger an automatic compaction
3614
+ Random rnd(301);
3615
+ for (int i = 0; i < kNumSortedRuns; ++i) {
3616
+ int key_idx = 0;
3617
+ GenerateNewFile(&rnd, &key_idx, true /* nowait */);
3618
+ }
3619
+ auto_compaction_sleeping_task.WaitUntilSleeping();
3620
+
3621
+ // Make sure the manual compaction has seen the conflict before being canceled
3622
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
3623
+ {{"ColumnFamilyData::CompactRange:Return",
3624
+ "DBCompactionTest::CancelCompactionWaitingOnConflict:"
3625
+ "PreDisableManualCompaction"}});
3626
+ auto manual_compaction_thread = port::Thread([this]() {
3627
+ ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr)
3628
+ .IsIncomplete());
3629
+ });
3630
+
3631
+ // Cancel it. Thread should be joinable, i.e., manual compaction was unblocked
3632
+ // despite finding a conflict with an automatic compaction that is still
3633
+ // running
3634
+ TEST_SYNC_POINT(
3635
+ "DBCompactionTest::CancelCompactionWaitingOnConflict:"
3636
+ "PreDisableManualCompaction");
3637
+ db_->DisableManualCompaction();
3638
+ manual_compaction_thread.join();
3639
+ }
3640
+
3522
3641
  TEST_F(DBCompactionTest, OptimizedDeletionObsoleting) {
3523
3642
  // Deletions can be dropped when compacted to non-last level if they fall
3524
3643
  // outside the lower-level files' key-ranges.
@@ -4153,6 +4272,78 @@ TEST_F(DBCompactionTest, LevelCompactExpiredTtlFiles) {
4153
4272
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
4154
4273
  }
4155
4274
 
4275
+ TEST_F(DBCompactionTest, LevelTtlCompactionOutputCuttingIteractingWithOther) {
4276
+ // This test is for a bug fix in CompactionOutputs::ShouldStopBefore() where
4277
+ // TTL states were not being updated for keys that ShouldStopBefore() would
4278
+ // return true for reasons other than TTL.
4279
+ Options options = CurrentOptions();
4280
+ options.compression = kNoCompression;
4281
+ options.ttl = 24 * 60 * 60; // 24 hours
4282
+ options.max_open_files = -1;
4283
+ options.compaction_pri = kMinOverlappingRatio;
4284
+ env_->SetMockSleep();
4285
+ options.env = env_;
4286
+ options.target_file_size_base = 4 << 10;
4287
+ options.disable_auto_compactions = true;
4288
+ options.level_compaction_dynamic_file_size = false;
4289
+
4290
+ DestroyAndReopen(options);
4291
+ Random rnd(301);
4292
+
4293
+ // This makes sure the manual compaction below
4294
+ // is not a bottommost compaction as TTL is only
4295
+ // for non-bottommost compactions.
4296
+ ASSERT_OK(Put(Key(3), rnd.RandomString(1 << 10)));
4297
+ ASSERT_OK(Put(Key(0), rnd.RandomString(1 << 10)));
4298
+ ASSERT_OK(Flush());
4299
+ MoveFilesToLevel(6);
4300
+
4301
+ // L2:
4302
+ ASSERT_OK(Put(Key(2), rnd.RandomString(4 << 10)));
4303
+ ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10)));
4304
+ ASSERT_OK(Flush());
4305
+ MoveFilesToLevel(2);
4306
+
4307
+ // L1, overlaps in range with the file in L2 so
4308
+ // that they compact together.
4309
+ ASSERT_OK(Put(Key(0), rnd.RandomString(4 << 10)));
4310
+ ASSERT_OK(Put(Key(1), rnd.RandomString(4 << 10)));
4311
+ ASSERT_OK(Put(Key(3), rnd.RandomString(4 << 10)));
4312
+ ASSERT_OK(Flush());
4313
+ MoveFilesToLevel(1);
4314
+
4315
+ ASSERT_EQ("0,1,1,0,0,0,1", FilesPerLevel());
4316
+ // 36 hours so that the file in L2 is eligible for TTL
4317
+ env_->MockSleepForSeconds(36 * 60 * 60);
4318
+
4319
+ CompactRangeOptions compact_range_opts;
4320
+
4321
+ ASSERT_OK(dbfull()->RunManualCompaction(
4322
+ static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
4323
+ ->cfd(),
4324
+ 1 /* input_level */, 2 /* output_level */, compact_range_opts,
4325
+ nullptr /* begin */, nullptr /* end */, true /* exclusive */,
4326
+ true /* disallow_trivial_move */,
4327
+ std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
4328
+ "" /*trim_ts*/));
4329
+
4330
+ // L2 should have 2 files:
4331
+ // file 1: Key(0), Key(1)
4332
+ // ShouldStopBefore(Key(2)) return true due to TTL or output file size
4333
+ // file 2: Key(2), Key(3)
4334
+ //
4335
+ // Before the fix in this PR, L2 would have 3 files:
4336
+ // file 1: Key(0), Key(1)
4337
+ // CompactionOutputs::ShouldStopBefore(Key(2)) returns true due to output file
4338
+ // size.
4339
+ // file 2: Key(2)
4340
+ // CompactionOutput::ShouldStopBefore(Key(3)) returns true
4341
+ // due to TTL cutting and that TTL states were not updated
4342
+ // for Key(2).
4343
+ // file 3: Key(3)
4344
+ ASSERT_EQ("0,0,2,0,0,0,1", FilesPerLevel());
4345
+ }
4346
+
4156
4347
  TEST_F(DBCompactionTest, LevelTtlCascadingCompactions) {
4157
4348
  env_->SetMockSleep();
4158
4349
  const int kValueSize = 100;
@@ -6179,6 +6370,231 @@ TEST_P(DBCompactionTestWithParam, FixFileIngestionCompactionDeadlock) {
6179
6370
  Close();
6180
6371
  }
6181
6372
 
6373
+ class DBCompactionTestWithOngoingFileIngestionParam
6374
+ : public DBCompactionTest,
6375
+ public testing::WithParamInterface<std::string> {
6376
+ public:
6377
+ DBCompactionTestWithOngoingFileIngestionParam() : DBCompactionTest() {
6378
+ compaction_path_to_test_ = GetParam();
6379
+ }
6380
+ void SetupOptions() {
6381
+ options_ = CurrentOptions();
6382
+ options_.create_if_missing = true;
6383
+
6384
+ if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6385
+ options_.num_levels = 7;
6386
+ } else {
6387
+ options_.num_levels = 3;
6388
+ }
6389
+ options_.compaction_style = CompactionStyle::kCompactionStyleLevel;
6390
+ if (compaction_path_to_test_ == "AutoCompaction") {
6391
+ options_.disable_auto_compactions = false;
6392
+ options_.level0_file_num_compaction_trigger = 1;
6393
+ } else {
6394
+ options_.disable_auto_compactions = true;
6395
+ }
6396
+ }
6397
+
6398
+ void PauseCompactionThread() {
6399
+ sleeping_task_.reset(new test::SleepingBackgroundTask());
6400
+ env_->SetBackgroundThreads(1, Env::LOW);
6401
+ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
6402
+ sleeping_task_.get(), Env::Priority::LOW);
6403
+ sleeping_task_->WaitUntilSleeping();
6404
+ }
6405
+
6406
+ void ResumeCompactionThread() {
6407
+ if (sleeping_task_) {
6408
+ sleeping_task_->WakeUp();
6409
+ sleeping_task_->WaitUntilDone();
6410
+ }
6411
+ }
6412
+
6413
+ void SetupFilesToForceFutureFilesIngestedToCertainLevel() {
6414
+ SstFileWriter sst_file_writer(EnvOptions(), options_);
6415
+ std::string dummy = dbname_ + "/dummy.sst";
6416
+ ASSERT_OK(sst_file_writer.Open(dummy));
6417
+ ASSERT_OK(sst_file_writer.Put("k2", "dummy"));
6418
+ ASSERT_OK(sst_file_writer.Finish());
6419
+ ASSERT_OK(db_->IngestExternalFile({dummy}, IngestExternalFileOptions()));
6420
+ // L2 is made to contain a file overlapped with files to be ingested in
6421
+ // later steps on key "k2". This will force future files ingested to L1 or
6422
+ // above.
6423
+ ASSERT_EQ("0,0,1", FilesPerLevel(0));
6424
+ }
6425
+
6426
+ void SetupSyncPoints() {
6427
+ if (compaction_path_to_test_ == "AutoCompaction") {
6428
+ SyncPoint::GetInstance()->SetCallBack(
6429
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6430
+ SyncPoint::GetInstance()->LoadDependency(
6431
+ {{"DBImpl::BackgroundCompaction():AfterPickCompaction",
6432
+ "VersionSet::LogAndApply:WriteManifest"}});
6433
+ });
6434
+ } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") {
6435
+ SyncPoint::GetInstance()->SetCallBack(
6436
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6437
+ SyncPoint::GetInstance()->LoadDependency(
6438
+ {{"ColumnFamilyData::CompactRange:Return",
6439
+ "VersionSet::LogAndApply:WriteManifest"}});
6440
+ });
6441
+ } else if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6442
+ SyncPoint::GetInstance()->SetCallBack(
6443
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6444
+ SyncPoint::GetInstance()->LoadDependency(
6445
+ {{"DBImpl::CompactRange:PostRefitLevel",
6446
+ "VersionSet::LogAndApply:WriteManifest"}});
6447
+ });
6448
+ } else if (compaction_path_to_test_ == "CompactFiles") {
6449
+ SyncPoint::GetInstance()->SetCallBack(
6450
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6451
+ SyncPoint::GetInstance()->LoadDependency(
6452
+ {{"DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles",
6453
+ "VersionSet::LogAndApply:WriteManifest"}});
6454
+ });
6455
+ } else {
6456
+ assert(false);
6457
+ }
6458
+ SyncPoint::GetInstance()->LoadDependency(
6459
+ {{"ExternalSstFileIngestionJob::Run", "PreCompaction"}});
6460
+ SyncPoint::GetInstance()->EnableProcessing();
6461
+ }
6462
+
6463
+ void RunCompactionOverlappedWithFileIngestion() {
6464
+ if (compaction_path_to_test_ == "AutoCompaction") {
6465
+ TEST_SYNC_POINT("PreCompaction");
6466
+ ResumeCompactionThread();
6467
+ // Without proper range conflict check,
6468
+ // this would have been `Status::Corruption` about overlapping ranges
6469
+ Status s = dbfull()->TEST_WaitForCompact();
6470
+ EXPECT_OK(s);
6471
+ } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") {
6472
+ CompactRangeOptions cro;
6473
+ cro.change_level = false;
6474
+ std::string start_key = "k1";
6475
+ Slice start(start_key);
6476
+ std::string end_key = "k4";
6477
+ Slice end(end_key);
6478
+ TEST_SYNC_POINT("PreCompaction");
6479
+ // Without proper range conflict check,
6480
+ // this would have been `Status::Corruption` about overlapping ranges
6481
+ Status s = dbfull()->CompactRange(cro, &start, &end);
6482
+ EXPECT_OK(s);
6483
+ } else if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6484
+ CompactRangeOptions cro;
6485
+ cro.change_level = true;
6486
+ cro.target_level = 5;
6487
+ std::string start_key = "k1";
6488
+ Slice start(start_key);
6489
+ std::string end_key = "k4";
6490
+ Slice end(end_key);
6491
+ TEST_SYNC_POINT("PreCompaction");
6492
+ Status s = dbfull()->CompactRange(cro, &start, &end);
6493
+ // Without proper range conflict check,
6494
+ // this would have been `Status::Corruption` about overlapping ranges
6495
+ // To see this, remove the fix AND replace
6496
+ // `DBImpl::CompactRange:PostRefitLevel` in sync point dependency with
6497
+ // `DBImpl::ReFitLevel:PostRegisterCompaction`
6498
+ EXPECT_TRUE(s.IsNotSupported());
6499
+ EXPECT_TRUE(s.ToString().find("some ongoing compaction's output") !=
6500
+ std::string::npos);
6501
+ } else if (compaction_path_to_test_ == "CompactFiles") {
6502
+ ColumnFamilyMetaData cf_meta_data;
6503
+ db_->GetColumnFamilyMetaData(&cf_meta_data);
6504
+ ASSERT_EQ(cf_meta_data.levels[0].files.size(), 1);
6505
+ std::vector<std::string> input_files;
6506
+ for (const auto& file : cf_meta_data.levels[0].files) {
6507
+ input_files.push_back(file.name);
6508
+ }
6509
+ TEST_SYNC_POINT("PreCompaction");
6510
+ Status s = db_->CompactFiles(CompactionOptions(), input_files, 1);
6511
+ // Without proper range conflict check,
6512
+ // this would have been `Status::Corruption` about overlapping ranges
6513
+ EXPECT_TRUE(s.IsAborted());
6514
+ EXPECT_TRUE(
6515
+ s.ToString().find(
6516
+ "A running compaction is writing to the same output level") !=
6517
+ std::string::npos);
6518
+ } else {
6519
+ assert(false);
6520
+ }
6521
+ }
6522
+
6523
+ void DisableSyncPoints() {
6524
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
6525
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6526
+ }
6527
+
6528
+ protected:
6529
+ std::string compaction_path_to_test_;
6530
+ Options options_;
6531
+ std::shared_ptr<test::SleepingBackgroundTask> sleeping_task_;
6532
+ };
6533
+
6534
+ INSTANTIATE_TEST_CASE_P(DBCompactionTestWithOngoingFileIngestionParam,
6535
+ DBCompactionTestWithOngoingFileIngestionParam,
6536
+ ::testing::Values("AutoCompaction",
6537
+ "NonRefitLevelCompactRange",
6538
+ "RefitLevelCompactRange",
6539
+ "CompactFiles"));
6540
+
6541
+ TEST_P(DBCompactionTestWithOngoingFileIngestionParam, RangeConflictCheck) {
6542
+ SetupOptions();
6543
+ DestroyAndReopen(options_);
6544
+
6545
+ if (compaction_path_to_test_ == "AutoCompaction") {
6546
+ PauseCompactionThread();
6547
+ }
6548
+
6549
+ if (compaction_path_to_test_ != "RefitLevelCompactRange") {
6550
+ SetupFilesToForceFutureFilesIngestedToCertainLevel();
6551
+ }
6552
+
6553
+ // Create s1
6554
+ ASSERT_OK(Put("k1", "v"));
6555
+ ASSERT_OK(Put("k4", "v"));
6556
+ ASSERT_OK(Flush());
6557
+ if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6558
+ MoveFilesToLevel(6 /* level */);
6559
+ ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel(0));
6560
+ } else {
6561
+ ASSERT_EQ("1,0,1", FilesPerLevel(0));
6562
+ }
6563
+
6564
+ // To coerce following sequence of events
6565
+ // Timeline Thread 1 (Ingest s2) Thread 2 (Compact s1)
6566
+ // t0 | Decide to output to Lk
6567
+ // t1 | Release lock in LogAndApply()
6568
+ // t2 | Acquire lock
6569
+ // t3 | Decides to compact to Lk
6570
+ // | Expected to fail due to range
6571
+ // | conflict check with file
6572
+ // | ingestion
6573
+ // t4 | Release lock in LogAndApply()
6574
+ // t5 | Acquire lock again and finish
6575
+ // t6 | Acquire lock again and finish
6576
+ SetupSyncPoints();
6577
+
6578
+ // Ingest s2
6579
+ port::Thread thread1([&] {
6580
+ SstFileWriter sst_file_writer(EnvOptions(), options_);
6581
+ std::string s2 = dbname_ + "/ingested_s2.sst";
6582
+ ASSERT_OK(sst_file_writer.Open(s2));
6583
+ ASSERT_OK(sst_file_writer.Put("k2", "v2"));
6584
+ ASSERT_OK(sst_file_writer.Put("k3", "v2"));
6585
+ ASSERT_OK(sst_file_writer.Finish());
6586
+ ASSERT_OK(db_->IngestExternalFile({s2}, IngestExternalFileOptions()));
6587
+ });
6588
+
6589
+ // Compact s1. Without proper range conflict check,
6590
+ // this will encounter overlapping file corruption.
6591
+ port::Thread thread2([&] { RunCompactionOverlappedWithFileIngestion(); });
6592
+
6593
+ thread1.join();
6594
+ thread2.join();
6595
+ DisableSyncPoints();
6596
+ }
6597
+
6182
6598
  TEST_F(DBCompactionTest, ConsistencyFailTest) {
6183
6599
  Options options = CurrentOptions();
6184
6600
  options.force_consistency_checks = true;
@@ -6271,172 +6687,655 @@ void IngestOneKeyValue(DBImpl* db, const std::string& key,
6271
6687
  ASSERT_OK(db->IngestExternalFile({info.file_path}, ingest_opt));
6272
6688
  }
6273
6689
 
6274
- TEST_P(DBCompactionTestWithParam,
6275
- FlushAfterIntraL0CompactionCheckConsistencyFail) {
6276
- Options options = CurrentOptions();
6277
- options.force_consistency_checks = true;
6278
- options.compression = kNoCompression;
6279
- options.level0_file_num_compaction_trigger = 5;
6280
- options.max_background_compactions = 2;
6281
- options.max_subcompactions = max_subcompactions_;
6282
- DestroyAndReopen(options);
6690
+ class DBCompactionTestL0FilesMisorderCorruption : public DBCompactionTest {
6691
+ public:
6692
+ DBCompactionTestL0FilesMisorderCorruption() : DBCompactionTest() {}
6693
+ void SetupOptions(const CompactionStyle compaciton_style,
6694
+ const std::string& compaction_path_to_test = "") {
6695
+ options_ = CurrentOptions();
6696
+ options_.create_if_missing = true;
6697
+ options_.compression = kNoCompression;
6698
+
6699
+ options_.force_consistency_checks = true;
6700
+ options_.compaction_style = compaciton_style;
6701
+
6702
+ if (compaciton_style == CompactionStyle::kCompactionStyleLevel) {
6703
+ options_.num_levels = 7;
6704
+ // Level compaction's PickIntraL0Compaction() impl detail requires
6705
+ // `options.level0_file_num_compaction_trigger` to be
6706
+ // at least 2 files less than the actual number of level 0 files
6707
+ // (i.e, 7 by design in this test)
6708
+ options_.level0_file_num_compaction_trigger = 5;
6709
+ options_.max_background_compactions = 2;
6710
+ options_.write_buffer_size = 2 << 20;
6711
+ options_.max_write_buffer_number = 6;
6712
+ } else if (compaciton_style == CompactionStyle::kCompactionStyleUniversal) {
6713
+ // TODO: expand test coverage to num_lvels > 1 for universal compacion,
6714
+ // which requires careful unit test design to compact to level 0 despite
6715
+ // num_levels > 1
6716
+ options_.num_levels = 1;
6717
+ options_.level0_file_num_compaction_trigger = 5;
6718
+
6719
+ CompactionOptionsUniversal universal_options;
6720
+ if (compaction_path_to_test == "PickCompactionToReduceSizeAmp") {
6721
+ universal_options.max_size_amplification_percent = 50;
6722
+ } else if (compaction_path_to_test ==
6723
+ "PickCompactionToReduceSortedRuns") {
6724
+ universal_options.max_size_amplification_percent = 400;
6725
+ } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") {
6726
+ universal_options.max_size_amplification_percent = 400;
6727
+ universal_options.min_merge_width = 6;
6728
+ }
6729
+ options_.compaction_options_universal = universal_options;
6730
+ } else if (compaciton_style == CompactionStyle::kCompactionStyleFIFO) {
6731
+ options_.max_open_files = -1;
6732
+ options_.num_levels = 1;
6733
+ options_.level0_file_num_compaction_trigger = 3;
6734
+
6735
+ CompactionOptionsFIFO fifo_options;
6736
+ if (compaction_path_to_test == "FindIntraL0Compaction" ||
6737
+ compaction_path_to_test == "CompactRange") {
6738
+ fifo_options.allow_compaction = true;
6739
+ fifo_options.age_for_warm = 0;
6740
+ } else if (compaction_path_to_test == "CompactFile") {
6741
+ fifo_options.allow_compaction = false;
6742
+ fifo_options.age_for_warm = 0;
6743
+ }
6744
+ options_.compaction_options_fifo = fifo_options;
6745
+ }
6283
6746
 
6284
- const size_t kValueSize = 1 << 20;
6285
- Random rnd(301);
6286
- std::atomic<int> pick_intra_l0_count(0);
6287
- std::string value(rnd.RandomString(kValueSize));
6747
+ if (compaction_path_to_test == "CompactFile" ||
6748
+ compaction_path_to_test == "CompactRange") {
6749
+ options_.disable_auto_compactions = true;
6750
+ } else {
6751
+ options_.disable_auto_compactions = false;
6752
+ }
6753
+ }
6288
6754
 
6289
- // The L0->L1 must be picked before we begin ingesting files to trigger
6290
- // intra-L0 compaction, and must not finish until after an intra-L0
6291
- // compaction has been picked.
6292
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6293
- {{"LevelCompactionPicker::PickCompaction:Return",
6294
- "DBCompactionTestWithParam::"
6295
- "FlushAfterIntraL0CompactionCheckConsistencyFail:L0ToL1Ready"},
6296
- {"LevelCompactionPicker::PickCompactionBySize:0",
6297
- "CompactionJob::Run():Start"}});
6298
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6299
- "FindIntraL0Compaction",
6300
- [&](void* /*arg*/) { pick_intra_l0_count.fetch_add(1); });
6755
+ void Destroy(const Options& options) {
6756
+ if (snapshot_) {
6757
+ assert(db_);
6758
+ db_->ReleaseSnapshot(snapshot_);
6759
+ snapshot_ = nullptr;
6760
+ }
6761
+ DBTestBase::Destroy(options);
6762
+ }
6301
6763
 
6302
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6764
+ void Reopen(const Options& options) {
6765
+ DBTestBase::Reopen(options);
6766
+ if (options.compaction_style != CompactionStyle::kCompactionStyleLevel) {
6767
+ // To force assigning the global seqno to ingested file
6768
+ // for our test purpose.
6769
+ assert(snapshot_ == nullptr);
6770
+ snapshot_ = db_->GetSnapshot();
6771
+ }
6772
+ }
6303
6773
 
6304
- // prevents trivial move
6305
- for (int i = 0; i < 10; ++i) {
6306
- ASSERT_OK(Put(Key(i), "")); // prevents trivial move
6774
+ void DestroyAndReopen(Options& options) {
6775
+ Destroy(options);
6776
+ Reopen(options);
6307
6777
  }
6308
- ASSERT_OK(Flush());
6309
- Compact("", Key(99));
6310
- ASSERT_EQ(0, NumTableFilesAtLevel(0));
6311
6778
 
6312
- // Flush 5 L0 sst.
6313
- for (int i = 0; i < 5; ++i) {
6314
- ASSERT_OK(Put(Key(i + 1), value));
6315
- ASSERT_OK(Flush());
6779
+ void PauseCompactionThread() {
6780
+ sleeping_task_.reset(new test::SleepingBackgroundTask());
6781
+ env_->SetBackgroundThreads(1, Env::LOW);
6782
+ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
6783
+ sleeping_task_.get(), Env::Priority::LOW);
6784
+ sleeping_task_->WaitUntilSleeping();
6316
6785
  }
6317
- ASSERT_EQ(5, NumTableFilesAtLevel(0));
6318
6786
 
6319
- // Put one key, to make smallest log sequence number in this memtable is less
6320
- // than sst which would be ingested in next step.
6321
- ASSERT_OK(Put(Key(0), "a"));
6787
+ void ResumeCompactionThread() {
6788
+ if (sleeping_task_) {
6789
+ sleeping_task_->WakeUp();
6790
+ sleeping_task_->WaitUntilDone();
6791
+ }
6792
+ }
6322
6793
 
6323
- ASSERT_EQ(5, NumTableFilesAtLevel(0));
6324
- TEST_SYNC_POINT(
6325
- "DBCompactionTestWithParam::"
6326
- "FlushAfterIntraL0CompactionCheckConsistencyFail:L0ToL1Ready");
6794
+ void AddFilesMarkedForPeriodicCompaction(const size_t num_files) {
6795
+ assert(options_.compaction_style ==
6796
+ CompactionStyle::kCompactionStyleUniversal);
6797
+ VersionSet* const versions = dbfull()->GetVersionSet();
6798
+ assert(versions);
6799
+ ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
6800
+ assert(cfd);
6801
+ Version* const current = cfd->current();
6802
+ assert(current);
6327
6803
 
6328
- // Ingest 5 L0 sst. And this files would trigger PickIntraL0Compaction.
6329
- for (int i = 5; i < 10; i++) {
6330
- ASSERT_EQ(i, NumTableFilesAtLevel(0));
6331
- IngestOneKeyValue(dbfull(), Key(i), value, options);
6804
+ VersionStorageInfo* const storage_info = current->storage_info();
6805
+ assert(storage_info);
6806
+
6807
+ const std::vector<FileMetaData*> level0_files = storage_info->LevelFiles(0);
6808
+ assert(level0_files.size() == num_files);
6809
+
6810
+ for (FileMetaData* f : level0_files) {
6811
+ storage_info->TEST_AddFileMarkedForPeriodicCompaction(0, f);
6812
+ }
6332
6813
  }
6333
6814
 
6334
- // Put one key, to make biggest log sequence number in this memtable is bigger
6335
- // than sst which would be ingested in next step.
6336
- ASSERT_OK(Put(Key(2), "b"));
6337
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
6338
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6339
- std::vector<std::vector<FileMetaData>> level_to_files;
6340
- dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
6341
- &level_to_files);
6342
- ASSERT_GT(level_to_files[0].size(), 0);
6343
- ASSERT_GT(pick_intra_l0_count.load(), 0);
6815
+ void AddFilesMarkedForCompaction(const size_t num_files) {
6816
+ assert(options_.compaction_style ==
6817
+ CompactionStyle::kCompactionStyleUniversal);
6818
+ VersionSet* const versions = dbfull()->GetVersionSet();
6819
+ assert(versions);
6820
+ ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
6821
+ assert(cfd);
6822
+ Version* const current = cfd->current();
6823
+ assert(current);
6344
6824
 
6345
- ASSERT_OK(Flush());
6346
- }
6825
+ VersionStorageInfo* const storage_info = current->storage_info();
6826
+ assert(storage_info);
6347
6827
 
6348
- TEST_P(DBCompactionTestWithParam,
6349
- IntraL0CompactionAfterFlushCheckConsistencyFail) {
6350
- Options options = CurrentOptions();
6351
- options.force_consistency_checks = true;
6352
- options.compression = kNoCompression;
6353
- options.level0_file_num_compaction_trigger = 5;
6354
- options.max_background_compactions = 2;
6355
- options.max_subcompactions = max_subcompactions_;
6356
- options.write_buffer_size = 2 << 20;
6357
- options.max_write_buffer_number = 6;
6358
- DestroyAndReopen(options);
6828
+ const std::vector<FileMetaData*> level0_files = storage_info->LevelFiles(0);
6829
+ assert(level0_files.size() == num_files);
6359
6830
 
6360
- const size_t kValueSize = 1 << 20;
6361
- Random rnd(301);
6362
- std::string value(rnd.RandomString(kValueSize));
6363
- std::string value2(rnd.RandomString(kValueSize));
6364
- std::string bigvalue = value + value;
6831
+ for (FileMetaData* f : level0_files) {
6832
+ storage_info->TEST_AddFileMarkedForCompaction(0, f);
6833
+ }
6834
+ }
6365
6835
 
6366
- // prevents trivial move
6836
+ void SetupSyncPoints(const std::string& compaction_path_to_test) {
6837
+ compaction_path_sync_point_called_.store(false);
6838
+ if (compaction_path_to_test == "FindIntraL0Compaction" &&
6839
+ options_.compaction_style == CompactionStyle::kCompactionStyleLevel) {
6840
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6841
+ "PostPickFileToCompact", [&](void* arg) {
6842
+ bool* picked_file_to_compact = (bool*)arg;
6843
+ // To trigger intra-L0 compaction specifically,
6844
+ // we mock PickFileToCompact()'s result to be false
6845
+ *picked_file_to_compact = false;
6846
+ });
6847
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6848
+ "FindIntraL0Compaction", [&](void* /*arg*/) {
6849
+ compaction_path_sync_point_called_.store(true);
6850
+ });
6851
+
6852
+ } else if (compaction_path_to_test == "PickPeriodicCompaction") {
6853
+ assert(options_.compaction_style ==
6854
+ CompactionStyle::kCompactionStyleUniversal);
6855
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6856
+ "PostPickPeriodicCompaction", [&](void* compaction_arg) {
6857
+ Compaction* compaction = (Compaction*)compaction_arg;
6858
+ if (compaction != nullptr) {
6859
+ compaction_path_sync_point_called_.store(true);
6860
+ }
6861
+ });
6862
+ } else if (compaction_path_to_test == "PickCompactionToReduceSizeAmp") {
6863
+ assert(options_.compaction_style ==
6864
+ CompactionStyle::kCompactionStyleUniversal);
6865
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6866
+ "PickCompactionToReduceSizeAmpReturnNonnullptr", [&](void* /*arg*/) {
6867
+ compaction_path_sync_point_called_.store(true);
6868
+ });
6869
+ } else if (compaction_path_to_test == "PickCompactionToReduceSortedRuns") {
6870
+ assert(options_.compaction_style ==
6871
+ CompactionStyle::kCompactionStyleUniversal);
6872
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6873
+ "PickCompactionToReduceSortedRunsReturnNonnullptr",
6874
+ [&](void* /*arg*/) {
6875
+ compaction_path_sync_point_called_.store(true);
6876
+ });
6877
+ } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") {
6878
+ assert(options_.compaction_style ==
6879
+ CompactionStyle::kCompactionStyleUniversal);
6880
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6881
+ "PickDeleteTriggeredCompactionReturnNonnullptr", [&](void* /*arg*/) {
6882
+ compaction_path_sync_point_called_.store(true);
6883
+ });
6884
+ } else if ((compaction_path_to_test == "FindIntraL0Compaction" ||
6885
+ compaction_path_to_test == "CompactRange") &&
6886
+ options_.compaction_style ==
6887
+ CompactionStyle::kCompactionStyleFIFO) {
6888
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6889
+ "FindIntraL0Compaction", [&](void* /*arg*/) {
6890
+ compaction_path_sync_point_called_.store(true);
6891
+ });
6892
+ }
6893
+
6894
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6895
+ }
6896
+
6897
+ bool SyncPointsCalled() { return compaction_path_sync_point_called_.load(); }
6898
+
6899
+ void DisableSyncPoints() {
6900
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
6901
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6902
+ }
6903
+
6904
+ // Return the largest seqno of the latest L0 file based on file number
6905
+ SequenceNumber GetLatestL0FileLargestSeqnoHelper() {
6906
+ VersionSet* const versions = dbfull()->GetVersionSet();
6907
+ assert(versions);
6908
+ ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
6909
+ assert(cfd);
6910
+ Version* const current = cfd->current();
6911
+ assert(current);
6912
+ VersionStorageInfo* const storage_info = current->storage_info();
6913
+ assert(storage_info);
6914
+ const std::vector<FileMetaData*> level0_files = storage_info->LevelFiles(0);
6915
+ assert(level0_files.size() >= 1);
6916
+
6917
+ uint64_t latest_file_num = 0;
6918
+ uint64_t latest_file_largest_seqno = 0;
6919
+ for (FileMetaData* f : level0_files) {
6920
+ if (f->fd.GetNumber() > latest_file_num) {
6921
+ latest_file_num = f->fd.GetNumber();
6922
+ latest_file_largest_seqno = f->fd.largest_seqno;
6923
+ }
6924
+ }
6925
+
6926
+ return latest_file_largest_seqno;
6927
+ }
6928
+
6929
+ protected:
6930
+ Options options_;
6931
+
6932
+ private:
6933
+ const Snapshot* snapshot_ = nullptr;
6934
+ std::atomic<bool> compaction_path_sync_point_called_;
6935
+ std::shared_ptr<test::SleepingBackgroundTask> sleeping_task_;
6936
+ };
6937
+
6938
+ TEST_F(DBCompactionTestL0FilesMisorderCorruption,
6939
+ FlushAfterIntraL0LevelCompactionWithIngestedFile) {
6940
+ SetupOptions(CompactionStyle::kCompactionStyleLevel, "");
6941
+ DestroyAndReopen(options_);
6942
+ // Prevents trivial move
6367
6943
  for (int i = 0; i < 10; ++i) {
6368
- ASSERT_OK(Put(Key(i), "")); // prevents trivial move
6944
+ ASSERT_OK(Put(Key(i), "")); // Prevents trivial move
6369
6945
  }
6370
6946
  ASSERT_OK(Flush());
6371
6947
  Compact("", Key(99));
6372
6948
  ASSERT_EQ(0, NumTableFilesAtLevel(0));
6373
6949
 
6374
- std::atomic<int> pick_intra_l0_count(0);
6375
- // The L0->L1 must be picked before we begin ingesting files to trigger
6376
- // intra-L0 compaction, and must not finish until after an intra-L0
6377
- // compaction has been picked.
6378
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
6379
- {{"LevelCompactionPicker::PickCompaction:Return",
6380
- "DBCompactionTestWithParam::"
6381
- "IntraL0CompactionAfterFlushCheckConsistencyFail:L0ToL1Ready"},
6382
- {"LevelCompactionPicker::PickCompactionBySize:0",
6383
- "CompactionJob::Run():Start"}});
6384
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
6385
- "FindIntraL0Compaction",
6386
- [&](void* /*arg*/) { pick_intra_l0_count.fetch_add(1); });
6387
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
6388
- // Make 6 L0 sst.
6950
+ // To get accurate NumTableFilesAtLevel(0) when the number reaches
6951
+ // options_.level0_file_num_compaction_trigger
6952
+ PauseCompactionThread();
6953
+
6954
+ // To create below LSM tree
6955
+ // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
6956
+ //
6957
+ // memtable: m1[ 5:new@12 .. 1:new@8, 0:new@7]
6958
+ // L0: s6[6:new@13], s5[5:old@6] ... s1[1:old@2],s0[0:old@1]
6959
+ //
6960
+ // (1) Make 6 L0 sst (i.e, s0 - s5)
6389
6961
  for (int i = 0; i < 6; ++i) {
6390
6962
  if (i % 2 == 0) {
6391
- IngestOneKeyValue(dbfull(), Key(i), value, options);
6963
+ IngestOneKeyValue(dbfull(), Key(i), "old", options_);
6392
6964
  } else {
6393
- ASSERT_OK(Put(Key(i), value));
6965
+ ASSERT_OK(Put(Key(i), "old"));
6394
6966
  ASSERT_OK(Flush());
6395
6967
  }
6396
6968
  }
6397
-
6398
6969
  ASSERT_EQ(6, NumTableFilesAtLevel(0));
6399
6970
 
6400
- // Stop run flush job
6401
- env_->SetBackgroundThreads(1, Env::HIGH);
6402
- test::SleepingBackgroundTask sleeping_tasks;
6403
- env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask, &sleeping_tasks,
6404
- Env::Priority::HIGH);
6405
- sleeping_tasks.WaitUntilSleeping();
6406
-
6407
- // Put many keys to make memtable request to flush
6971
+ // (2) Create m1
6408
6972
  for (int i = 0; i < 6; ++i) {
6409
- ASSERT_OK(Put(Key(i), bigvalue));
6973
+ ASSERT_OK(Put(Key(i), "new"));
6410
6974
  }
6411
-
6412
6975
  ASSERT_EQ(6, NumTableFilesAtLevel(0));
6413
- TEST_SYNC_POINT(
6414
- "DBCompactionTestWithParam::"
6415
- "IntraL0CompactionAfterFlushCheckConsistencyFail:L0ToL1Ready");
6416
- // ingest file to trigger IntraL0Compaction
6417
- for (int i = 6; i < 10; ++i) {
6976
+
6977
+ // (3) Ingest file (i.e, s6) to trigger IntraL0Compaction()
6978
+ for (int i = 6; i < 7; ++i) {
6418
6979
  ASSERT_EQ(i, NumTableFilesAtLevel(0));
6419
- IngestOneKeyValue(dbfull(), Key(i), value2, options);
6980
+ IngestOneKeyValue(dbfull(), Key(i), "new", options_);
6420
6981
  }
6421
6982
 
6422
- // Wake up flush job
6423
- sleeping_tasks.WakeUp();
6424
- sleeping_tasks.WaitUntilDone();
6983
+ SetupSyncPoints("FindIntraL0Compaction");
6984
+ ResumeCompactionThread();
6985
+
6425
6986
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
6426
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6427
6987
 
6428
- uint64_t error_count = 0;
6429
- db_->GetIntProperty("rocksdb.background-errors", &error_count);
6430
- ASSERT_EQ(error_count, 0);
6431
- ASSERT_GT(pick_intra_l0_count.load(), 0);
6988
+ ASSERT_TRUE(SyncPointsCalled());
6989
+ DisableSyncPoints();
6990
+
6991
+ // After compaction, we have LSM tree:
6992
+ //
6993
+ // memtable: m1[ 5:new@12 .. 1:new@8, 0:new@7]
6994
+ // L0: s7[6:new@13, 5:old@6 .. 0:old@1]
6995
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
6996
+ SequenceNumber compact_output_file_largest_seqno =
6997
+ GetLatestL0FileLargestSeqnoHelper();
6998
+
6999
+ ASSERT_OK(Flush());
7000
+ // After flush, we have LSM tree:
7001
+ //
7002
+ // L0: s8[5:new@12 .. 0:new@7],s7[6:new@13, 5:old@5 .. 0:old@1]
7003
+ ASSERT_EQ(2, NumTableFilesAtLevel(0));
7004
+ SequenceNumber flushed_file_largest_seqno =
7005
+ GetLatestL0FileLargestSeqnoHelper();
7006
+
7007
+ // To verify there isn't any file misorder leading to returning a old value
7008
+ // of Key(0) - Key(5) , which is caused by flushed table s8 has a
7009
+ // smaller largest seqno than the compaction output file s7's largest seqno
7010
+ // while the flushed table has the newer version of the values than the
7011
+ // compaction output file's.
7012
+ ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno);
6432
7013
  for (int i = 0; i < 6; ++i) {
6433
- ASSERT_EQ(bigvalue, Get(Key(i)));
7014
+ ASSERT_EQ("new", Get(Key(i)));
6434
7015
  }
6435
- for (int i = 6; i < 10; ++i) {
6436
- ASSERT_EQ(value2, Get(Key(i)));
7016
+ for (int i = 6; i < 7; ++i) {
7017
+ ASSERT_EQ("new", Get(Key(i)));
6437
7018
  }
6438
7019
  }
6439
7020
 
7021
+ TEST_F(DBCompactionTestL0FilesMisorderCorruption,
7022
+ FlushAfterIntraL0UniversalCompactionWithIngestedFile) {
7023
+ for (const std::string compaction_path_to_test :
7024
+ {"PickPeriodicCompaction", "PickCompactionToReduceSizeAmp",
7025
+ "PickCompactionToReduceSortedRuns", "PickDeleteTriggeredCompaction"}) {
7026
+ SetupOptions(CompactionStyle::kCompactionStyleUniversal,
7027
+ compaction_path_to_test);
7028
+ DestroyAndReopen(options_);
7029
+
7030
+ // To get accurate NumTableFilesAtLevel(0) when the number reaches
7031
+ // options_.level0_file_num_compaction_trigger
7032
+ PauseCompactionThread();
7033
+
7034
+ // To create below LSM tree
7035
+ // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
7036
+ //
7037
+ // memtable: m1 [ k2:new@8, k1:new@7]
7038
+ // L0: s4[k9:dummy@10], s3[k8:dummy@9],
7039
+ // s2[k7:old@6, k6:old@5].. s0[k3:old@2, k1:old@1]
7040
+ //
7041
+ // (1) Create 3 existing SST file (i.e, s0 - s2)
7042
+ ASSERT_OK(Put("k1", "old"));
7043
+ ASSERT_OK(Put("k3", "old"));
7044
+ ASSERT_OK(Flush());
7045
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
7046
+ ASSERT_OK(Put("k4", "old"));
7047
+ ASSERT_OK(Put("k5", "old"));
7048
+ ASSERT_OK(Flush());
7049
+ ASSERT_EQ(2, NumTableFilesAtLevel(0));
7050
+ ASSERT_OK(Put("k6", "old"));
7051
+ ASSERT_OK(Put("k7", "old"));
7052
+ ASSERT_OK(Flush());
7053
+ ASSERT_EQ(3, NumTableFilesAtLevel(0));
7054
+
7055
+ // (2) Create m1. Noted that it contains a overlaped key with s0
7056
+ ASSERT_OK(Put("k1", "new")); // overlapped key
7057
+ ASSERT_OK(Put("k2", "new"));
7058
+
7059
+ // (3) Ingest two SST files s3, s4
7060
+ IngestOneKeyValue(dbfull(), "k8", "dummy", options_);
7061
+ IngestOneKeyValue(dbfull(), "k9", "dummy", options_);
7062
+ // Up to now, L0 contains s0 - s4
7063
+ ASSERT_EQ(5, NumTableFilesAtLevel(0));
7064
+
7065
+ if (compaction_path_to_test == "PickPeriodicCompaction") {
7066
+ AddFilesMarkedForPeriodicCompaction(5);
7067
+ } else if (compaction_path_to_test == "PickDeleteTriggeredCompaction") {
7068
+ AddFilesMarkedForCompaction(5);
7069
+ }
7070
+
7071
+ SetupSyncPoints(compaction_path_to_test);
7072
+ ResumeCompactionThread();
7073
+
7074
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
7075
+
7076
+ ASSERT_TRUE(SyncPointsCalled())
7077
+ << "failed for compaction path to test: " << compaction_path_to_test;
7078
+ DisableSyncPoints();
7079
+
7080
+ // After compaction, we have LSM tree:
7081
+ //
7082
+ // memtable: m1[ k2:new@8, k1:new@7]
7083
+ // L0: s5[k9:dummy@10, k8@dummy@9, k7:old@6 .. k3:old@2, k1:old@1]
7084
+ ASSERT_EQ(1, NumTableFilesAtLevel(0))
7085
+ << "failed for compaction path to test: " << compaction_path_to_test;
7086
+ SequenceNumber compact_output_file_largest_seqno =
7087
+ GetLatestL0FileLargestSeqnoHelper();
7088
+
7089
+ ASSERT_OK(Flush()) << "failed for compaction path to test: "
7090
+ << compaction_path_to_test;
7091
+ // After flush, we have LSM tree:
7092
+ //
7093
+ // L0: s6[k2:new@8, k1:new@7],
7094
+ // s5[k9:dummy@10, k8@dummy@9, k7:old@6 .. k3:old@2, k1:old@1]
7095
+ ASSERT_EQ(2, NumTableFilesAtLevel(0))
7096
+ << "failed for compaction path to test: " << compaction_path_to_test;
7097
+ SequenceNumber flushed_file_largest_seqno =
7098
+ GetLatestL0FileLargestSeqnoHelper();
7099
+
7100
+ // To verify there isn't any file misorder leading to returning a old
7101
+ // value of "k1" , which is caused by flushed table s6 has a
7102
+ // smaller largest seqno than the compaction output file s5's largest seqno
7103
+ // while the flushed table has the newer version of the value
7104
+ // than the compaction output file's.
7105
+ ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno)
7106
+ << "failed for compaction path to test: " << compaction_path_to_test;
7107
+ EXPECT_EQ(Get("k1"), "new")
7108
+ << "failed for compaction path to test: " << compaction_path_to_test;
7109
+ }
7110
+
7111
+ Destroy(options_);
7112
+ }
7113
+
7114
+ TEST_F(DBCompactionTestL0FilesMisorderCorruption,
7115
+ FlushAfterIntraL0FIFOCompactionWithIngestedFile) {
7116
+ for (const std::string compaction_path_to_test : {"FindIntraL0Compaction"}) {
7117
+ SetupOptions(CompactionStyle::kCompactionStyleFIFO,
7118
+ compaction_path_to_test);
7119
+ DestroyAndReopen(options_);
7120
+
7121
+ // To create below LSM tree
7122
+ // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
7123
+ //
7124
+ // memtable: m1 [ k2:new@4, k1:new@3]
7125
+ // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1]
7126
+ //
7127
+ // (1) Create an existing SST file s0
7128
+ ASSERT_OK(Put("k1", "old"));
7129
+ ASSERT_OK(Put("k3", "old"));
7130
+ ASSERT_OK(Flush());
7131
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
7132
+
7133
+ // (2) Create memtable m1. Noted that it contains a overlaped key with s0
7134
+ ASSERT_OK(Put("k1", "new")); // overlapped key
7135
+ ASSERT_OK(Put("k2", "new"));
7136
+
7137
+ // To get accurate NumTableFilesAtLevel(0) when the number reaches
7138
+ // options_.level0_file_num_compaction_trigger
7139
+ PauseCompactionThread();
7140
+
7141
+ // (3) Ingest two SST files s1, s2
7142
+ IngestOneKeyValue(dbfull(), "k4", "dummy", options_);
7143
+ IngestOneKeyValue(dbfull(), "k5", "dummy", options_);
7144
+ // Up to now, L0 contains s0, s1, s2
7145
+ ASSERT_EQ(3, NumTableFilesAtLevel(0));
7146
+
7147
+ SetupSyncPoints(compaction_path_to_test);
7148
+ ResumeCompactionThread();
7149
+
7150
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
7151
+
7152
+ ASSERT_TRUE(SyncPointsCalled())
7153
+ << "failed for compaction path to test: " << compaction_path_to_test;
7154
+ DisableSyncPoints();
7155
+ // After compaction, we have LSM tree:
7156
+ //
7157
+ // memtable: m1 [ k2:new@4, k1:new@3]
7158
+ // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1]
7159
+ ASSERT_EQ(1, NumTableFilesAtLevel(0))
7160
+ << "failed for compaction path to test: " << compaction_path_to_test;
7161
+ SequenceNumber compact_output_file_largest_seqno =
7162
+ GetLatestL0FileLargestSeqnoHelper();
7163
+
7164
+ ASSERT_OK(Flush()) << "failed for compaction path to test: "
7165
+ << compaction_path_to_test;
7166
+ // After flush, we have LSM tree:
7167
+ //
7168
+ // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2,
7169
+ // k1:old@1]
7170
+ ASSERT_EQ(2, NumTableFilesAtLevel(0))
7171
+ << "failed for compaction path to test: " << compaction_path_to_test;
7172
+ SequenceNumber flushed_file_largest_seqno =
7173
+ GetLatestL0FileLargestSeqnoHelper();
7174
+
7175
+ // To verify there isn't any file misorder leading to returning a old
7176
+ // value of "k1" , which is caused by flushed table s4 has a
7177
+ // smaller largest seqno than the compaction output file s3's largest seqno
7178
+ // while the flushed table has the newer version of the value
7179
+ // than the compaction output file's.
7180
+ ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno)
7181
+ << "failed for compaction path to test: " << compaction_path_to_test;
7182
+ EXPECT_EQ(Get("k1"), "new")
7183
+ << "failed for compaction path to test: " << compaction_path_to_test;
7184
+ }
7185
+
7186
+ Destroy(options_);
7187
+ }
7188
+
7189
+ class DBCompactionTestL0FilesMisorderCorruptionWithParam
7190
+ : public DBCompactionTestL0FilesMisorderCorruption,
7191
+ public testing::WithParamInterface<CompactionStyle> {
7192
+ public:
7193
+ DBCompactionTestL0FilesMisorderCorruptionWithParam()
7194
+ : DBCompactionTestL0FilesMisorderCorruption() {}
7195
+ };
7196
+
7197
+ // TODO: add `CompactionStyle::kCompactionStyleLevel` to testing parameter,
7198
+ // which requires careful unit test
7199
+ // design for ingesting file to L0 and CompactRange()/CompactFile() to L0
7200
+ INSTANTIATE_TEST_CASE_P(
7201
+ DBCompactionTestL0FilesMisorderCorruptionWithParam,
7202
+ DBCompactionTestL0FilesMisorderCorruptionWithParam,
7203
+ ::testing::Values(CompactionStyle::kCompactionStyleUniversal,
7204
+ CompactionStyle::kCompactionStyleFIFO));
7205
+
7206
+ TEST_P(DBCompactionTestL0FilesMisorderCorruptionWithParam,
7207
+ FlushAfterIntraL0CompactFileWithIngestedFile) {
7208
+ SetupOptions(GetParam(), "CompactFile");
7209
+ DestroyAndReopen(options_);
7210
+
7211
+ // To create below LSM tree
7212
+ // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
7213
+ //
7214
+ // memtable: m1 [ k2:new@4, k1:new@3]
7215
+ // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1]
7216
+ //
7217
+ // (1) Create an existing SST file s0
7218
+ ASSERT_OK(Put("k1", "old"));
7219
+ ASSERT_OK(Put("k3", "old"));
7220
+ ASSERT_OK(Flush());
7221
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
7222
+
7223
+ // (2) Create memtable m1. Noted that it contains a overlaped key with s0
7224
+ ASSERT_OK(Put("k1", "new")); // overlapped key
7225
+ ASSERT_OK(Put("k2", "new"));
7226
+
7227
+ // (3) Ingest two SST files s1, s2
7228
+ IngestOneKeyValue(dbfull(), "k4", "dummy", options_);
7229
+ IngestOneKeyValue(dbfull(), "k5", "dummy", options_);
7230
+ // Up to now, L0 contains s0, s1, s2
7231
+ ASSERT_EQ(3, NumTableFilesAtLevel(0));
7232
+
7233
+ ColumnFamilyMetaData cf_meta_data;
7234
+ db_->GetColumnFamilyMetaData(&cf_meta_data);
7235
+ ASSERT_EQ(cf_meta_data.levels[0].files.size(), 3);
7236
+ std::vector<std::string> input_files;
7237
+ for (const auto& file : cf_meta_data.levels[0].files) {
7238
+ input_files.push_back(file.name);
7239
+ }
7240
+ ASSERT_EQ(input_files.size(), 3);
7241
+
7242
+ Status s = db_->CompactFiles(CompactionOptions(), input_files, 0);
7243
+ // After compaction, we have LSM tree:
7244
+ //
7245
+ // memtable: m1 [ k2:new@4, k1:new@3]
7246
+ // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1]
7247
+ ASSERT_OK(s);
7248
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
7249
+ SequenceNumber compact_output_file_largest_seqno =
7250
+ GetLatestL0FileLargestSeqnoHelper();
7251
+
7252
+ ASSERT_OK(Flush());
7253
+ // After flush, we have LSM tree:
7254
+ //
7255
+ // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2,
7256
+ // k1:old@1]
7257
+ ASSERT_EQ(2, NumTableFilesAtLevel(0));
7258
+ SequenceNumber flushed_file_largest_seqno =
7259
+ GetLatestL0FileLargestSeqnoHelper();
7260
+
7261
+ // To verify there isn't any file misorder leading to returning a old value
7262
+ // of "1" , which is caused by flushed table s4 has a smaller
7263
+ // largest seqno than the compaction output file s3's largest seqno while the
7264
+ // flushed table has the newer version of the value than the
7265
+ // compaction output file's.
7266
+ ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno);
7267
+ EXPECT_EQ(Get("k1"), "new");
7268
+
7269
+ Destroy(options_);
7270
+ }
7271
+
7272
+ TEST_P(DBCompactionTestL0FilesMisorderCorruptionWithParam,
7273
+ FlushAfterIntraL0CompactRangeWithIngestedFile) {
7274
+ SetupOptions(GetParam(), "CompactRange");
7275
+ DestroyAndReopen(options_);
7276
+
7277
+ // To create below LSM tree
7278
+ // (key:value@n indicates key-value pair has seqno "n", L0 is sorted):
7279
+ //
7280
+ // memtable: m1 [ k2:new@4, k1:new@3]
7281
+ // L0: s2[k5:dummy@6], s1[k4:dummy@5], s0[k3:old@2, k1:old@1]
7282
+ //
7283
+ // (1) Create an existing SST file s0
7284
+ ASSERT_OK(Put("k1", "old"));
7285
+ ASSERT_OK(Put("k3", "old"));
7286
+ ASSERT_OK(Flush());
7287
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
7288
+
7289
+ // (2) Create memtable m1. Noted that it contains a overlaped key with s0
7290
+ ASSERT_OK(Put("k1", "new")); // overlapped key
7291
+ ASSERT_OK(Put("k2", "new"));
7292
+
7293
+ // (3) Ingest two SST files s1, s2
7294
+ IngestOneKeyValue(dbfull(), "k4", "dummy", options_);
7295
+ IngestOneKeyValue(dbfull(), "k5", "dummy", options_);
7296
+ // Up to now, L0 contains s0, s1, s2
7297
+ ASSERT_EQ(3, NumTableFilesAtLevel(0));
7298
+
7299
+ if (options_.compaction_style == CompactionStyle::kCompactionStyleFIFO) {
7300
+ SetupSyncPoints("CompactRange");
7301
+ }
7302
+ // `start` and `end` is carefully chosen so that compact range:
7303
+ // (1) doesn't overlap with memtable therefore the memtable won't be flushed
7304
+ // (2) should target at compacting s0 with s1 and s2
7305
+ Slice start("k3"), end("k5");
7306
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &end));
7307
+ // After compaction, we have LSM tree:
7308
+ //
7309
+ // memtable: m1 [ k2:new@4, k1:new@3]
7310
+ // L0: s3[k5:dummy@6, k4:dummy@5, k3:old@2, k1:old@1]
7311
+ if (options_.compaction_style == CompactionStyle::kCompactionStyleFIFO) {
7312
+ ASSERT_TRUE(SyncPointsCalled());
7313
+ DisableSyncPoints();
7314
+ }
7315
+ ASSERT_EQ(1, NumTableFilesAtLevel(0));
7316
+ SequenceNumber compact_output_file_largest_seqno =
7317
+ GetLatestL0FileLargestSeqnoHelper();
7318
+
7319
+ ASSERT_OK(Flush());
7320
+ // After flush, we have LSM tree:
7321
+ //
7322
+ // L0: s4[k2:new@4, k1:new@3], s3[k5:dummy@6, k4:dummy@5, k3:old@2,
7323
+ // k1:old@1]
7324
+ ASSERT_EQ(2, NumTableFilesAtLevel(0));
7325
+ SequenceNumber flushed_file_largest_seqno =
7326
+ GetLatestL0FileLargestSeqnoHelper();
7327
+
7328
+ // To verify there isn't any file misorder leading to returning a old value
7329
+ // of "k1" , which is caused by flushed table s4 has a smaller
7330
+ // largest seqno than the compaction output file s3's largest seqno while the
7331
+ // flushed table has the newer version of the value than the
7332
+ // compaction output file's.
7333
+ ASSERT_TRUE(flushed_file_largest_seqno < compact_output_file_largest_seqno);
7334
+ EXPECT_EQ(Get("k1"), "new");
7335
+
7336
+ Destroy(options_);
7337
+ }
7338
+
6440
7339
  TEST_P(DBCompactionTestWithBottommostParam, SequenceKeysManualCompaction) {
6441
7340
  constexpr int kSstNum = 10;
6442
7341
  Options options = CurrentOptions();