@nxtedition/rocksdb 13.5.13 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +33 -2
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +3 -3
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -7,6 +7,7 @@
7
7
 
8
8
  #include <functional>
9
9
  #include <memory>
10
+ #include <sstream>
10
11
 
11
12
  #include "db/db_test_util.h"
12
13
  #include "db/dbformat.h"
@@ -2417,102 +2418,130 @@ TEST_F(ExternalSSTFileTest, SnapshotInconsistencyBug) {
2417
2418
  }
2418
2419
 
2419
2420
  TEST_P(ExternalSSTFileTest, IngestBehind) {
2420
- Options options = CurrentOptions();
2421
- options.compaction_style = kCompactionStyleUniversal;
2422
- options.num_levels = 3;
2423
- options.disable_auto_compactions = false;
2424
- DestroyAndReopen(options);
2425
- std::vector<std::pair<std::string, std::string>> file_data;
2426
- std::map<std::string, std::string> true_data;
2421
+ for (bool cf_option : {false, true}) {
2422
+ SCOPED_TRACE("cf_option = " + std::to_string(cf_option));
2423
+ Options options = CurrentOptions();
2424
+ options.compaction_style = kCompactionStyleUniversal;
2425
+ options.num_levels = 3;
2426
+ options.disable_auto_compactions = false;
2427
+ DestroyAndReopen(options);
2428
+ std::vector<std::pair<std::string, std::string>> file_data;
2429
+ std::map<std::string, std::string> true_data;
2427
2430
 
2428
- // Insert 100 -> 200 into the memtable
2429
- for (int i = 100; i <= 200; i++) {
2430
- ASSERT_OK(Put(Key(i), "memtable"));
2431
- }
2431
+ // Insert 100 -> 200 into the memtable
2432
+ for (int i = 100; i <= 200; i++) {
2433
+ ASSERT_OK(Put(Key(i), "memtable"));
2434
+ }
2432
2435
 
2433
- // Insert 100 -> 200 using IngestExternalFile
2434
- file_data.clear();
2435
- for (int i = 0; i <= 20; i++) {
2436
- file_data.emplace_back(Key(i), "ingest_behind");
2437
- true_data[Key(i)] = "ingest_behind";
2438
- }
2436
+ // Insert 100 -> 200 using IngestExternalFile
2437
+ file_data.clear();
2438
+ for (int i = 0; i <= 20; i++) {
2439
+ file_data.emplace_back(Key(i), "ingest_behind");
2440
+ true_data[Key(i)] = "ingest_behind";
2441
+ }
2439
2442
 
2440
- bool allow_global_seqno = true;
2441
- bool ingest_behind = true;
2442
- bool write_global_seqno = std::get<0>(GetParam());
2443
- bool verify_checksums_before_ingest = std::get<1>(GetParam());
2443
+ bool allow_global_seqno = true;
2444
+ bool ingest_behind = true;
2445
+ bool write_global_seqno = std::get<0>(GetParam());
2446
+ bool verify_checksums_before_ingest = std::get<1>(GetParam());
2444
2447
 
2445
- // Can't ingest behind since allow_ingest_behind isn't set to true
2446
- ASSERT_NOK(GenerateAndAddExternalFile(
2447
- options, file_data, -1, allow_global_seqno, write_global_seqno,
2448
- verify_checksums_before_ingest, ingest_behind, false /*sort_data*/,
2449
- &true_data));
2448
+ // Can't ingest behind since allow_ingest_behind isn't set to true
2449
+ ASSERT_NOK(GenerateAndAddExternalFile(
2450
+ options, file_data, -1, allow_global_seqno, write_global_seqno,
2451
+ verify_checksums_before_ingest, ingest_behind, false /*sort_data*/,
2452
+ &true_data));
2450
2453
 
2451
- options.allow_ingest_behind = true;
2452
- // check that we still can open the DB, as num_levels should be
2453
- // sanitized to 3
2454
- options.num_levels = 2;
2455
- DestroyAndReopen(options);
2454
+ if (cf_option) {
2455
+ options.cf_allow_ingest_behind = true;
2456
+ } else {
2457
+ options.allow_ingest_behind = true;
2458
+ }
2459
+ // check that we still can open the DB, as num_levels should be
2460
+ // sanitized to 3
2461
+ options.num_levels = 2;
2462
+ DestroyAndReopen(options);
2456
2463
 
2457
- options.num_levels = 3;
2458
- DestroyAndReopen(options);
2459
- true_data.clear();
2460
- // Insert 100 -> 200 into the memtable
2461
- for (int i = 100; i <= 200; i++) {
2462
- ASSERT_OK(Put(Key(i), "memtable"));
2463
- true_data[Key(i)] = "memtable";
2464
- }
2465
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2466
- // Universal picker should go at second from the bottom level
2467
- ASSERT_EQ("0,1", FilesPerLevel());
2468
- ASSERT_OK(GenerateAndAddExternalFile(
2469
- options, file_data, -1, allow_global_seqno, write_global_seqno,
2470
- verify_checksums_before_ingest, true /*ingest_behind*/,
2471
- false /*sort_data*/, &true_data));
2472
- ASSERT_EQ("0,1,1", FilesPerLevel());
2473
- // this time ingest should fail as the file doesn't fit to the bottom level
2474
- ASSERT_NOK(GenerateAndAddExternalFile(
2475
- options, file_data, -1, allow_global_seqno, write_global_seqno,
2476
- verify_checksums_before_ingest, true /*ingest_behind*/,
2477
- false /*sort_data*/, &true_data));
2478
- ASSERT_EQ("0,1,1", FilesPerLevel());
2479
- std::vector<std::vector<FileMetaData>> level_to_files;
2480
- dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2481
- uint64_t ingested_file_number = level_to_files[2][0].fd.GetNumber();
2482
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2483
- // Last level should not be compacted
2484
- ASSERT_EQ("0,1,1", FilesPerLevel());
2485
- dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2486
- ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2487
- size_t kcnt = 0;
2488
- VerifyDBFromMap(true_data, &kcnt, false);
2464
+ options.num_levels = 3;
2465
+ DestroyAndReopen(options);
2466
+ true_data.clear();
2467
+ // Insert 100 -> 200 into the memtable
2468
+ for (int i = 100; i <= 200; i++) {
2469
+ ASSERT_OK(Put(Key(i), "memtable"));
2470
+ true_data[Key(i)] = "memtable";
2471
+ }
2472
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2473
+ // Universal picker should go at second from the bottom level
2474
+ ASSERT_EQ("0,1", FilesPerLevel());
2475
+ ASSERT_OK(GenerateAndAddExternalFile(
2476
+ options, file_data, -1, allow_global_seqno, write_global_seqno,
2477
+ verify_checksums_before_ingest, true /*ingest_behind*/,
2478
+ false /*sort_data*/, &true_data));
2479
+ ASSERT_EQ("0,1,1", FilesPerLevel());
2480
+ // this time ingest should fail as the file doesn't fit to the bottom level
2481
+ ASSERT_NOK(GenerateAndAddExternalFile(
2482
+ options, file_data, -1, allow_global_seqno, write_global_seqno,
2483
+ verify_checksums_before_ingest, true /*ingest_behind*/,
2484
+ false /*sort_data*/, &true_data));
2485
+ ASSERT_EQ("0,1,1", FilesPerLevel());
2486
+ std::vector<std::vector<FileMetaData>> level_to_files;
2487
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(),
2488
+ &level_to_files);
2489
+ uint64_t ingested_file_number = level_to_files[2][0].fd.GetNumber();
2490
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2491
+ // Last level should not be compacted
2492
+ ASSERT_EQ("0,1,1", FilesPerLevel());
2493
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(),
2494
+ &level_to_files);
2495
+ ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2496
+ size_t kcnt = 0;
2497
+ VerifyDBFromMap(true_data, &kcnt, false);
2489
2498
 
2490
- // Auto-compaction should not include the last level.
2491
- // Trigger compaction if size amplification exceeds 110%.
2492
- options.compaction_options_universal.max_size_amplification_percent = 110;
2493
- options.level0_file_num_compaction_trigger = 4;
2494
- ASSERT_OK(TryReopen(options));
2495
- Random rnd(301);
2496
- for (int i = 0; i < 4; ++i) {
2497
- for (int j = 0; j < 10; j++) {
2498
- true_data[Key(j)] = rnd.RandomString(1000);
2499
- ASSERT_OK(Put(Key(j), true_data[Key(j)]));
2499
+ // Auto-compaction should not include the last level.
2500
+ // Trigger compaction if size amplification exceeds 110%.
2501
+ options.compaction_options_universal.max_size_amplification_percent = 110;
2502
+ options.level0_file_num_compaction_trigger = 4;
2503
+ ASSERT_OK(TryReopen(options));
2504
+ Random rnd(301);
2505
+ for (int i = 0; i < 4; ++i) {
2506
+ for (int j = 0; j < 10; j++) {
2507
+ true_data[Key(j)] = rnd.RandomString(1000);
2508
+ ASSERT_OK(Put(Key(j), true_data[Key(j)]));
2509
+ }
2510
+ ASSERT_OK(Flush());
2500
2511
  }
2501
- ASSERT_OK(Flush());
2512
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
2513
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(),
2514
+ &level_to_files);
2515
+ ASSERT_EQ(1, level_to_files[2].size());
2516
+ ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2517
+
2518
+ // Turning off the option allows DB to compact ingested files.
2519
+ if (cf_option) {
2520
+ // Test that another CF does not allow ingest behind
2521
+ ColumnFamilyHandle* new_cfh;
2522
+ Options new_cf_option;
2523
+ ASSERT_OK(db_->CreateColumnFamily(new_cf_option, "new_cf", &new_cfh));
2524
+ ASSERT_TRUE(GenerateAndAddExternalFile(
2525
+ new_cf_option, file_data, -1, allow_global_seqno,
2526
+ write_global_seqno, verify_checksums_before_ingest,
2527
+ true /*ingest_behind*/, false /*sort_data*/, nullptr,
2528
+ /*cfh=*/new_cfh)
2529
+ .IsInvalidArgument());
2530
+ ASSERT_OK(db_->DropColumnFamily(new_cfh));
2531
+ ASSERT_OK(db_->DestroyColumnFamilyHandle(new_cfh));
2532
+
2533
+ options.cf_allow_ingest_behind = false;
2534
+ } else {
2535
+ options.allow_ingest_behind = false;
2536
+ }
2537
+ ASSERT_OK(TryReopen(options));
2538
+ ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2539
+ dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(),
2540
+ &level_to_files);
2541
+ ASSERT_EQ(1, level_to_files[2].size());
2542
+ ASSERT_NE(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2543
+ VerifyDBFromMap(true_data, &kcnt, false);
2502
2544
  }
2503
- ASSERT_OK(dbfull()->TEST_WaitForCompact());
2504
- dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2505
- ASSERT_EQ(1, level_to_files[2].size());
2506
- ASSERT_EQ(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2507
-
2508
- // Turning off the option allows DB to compact ingested files.
2509
- options.allow_ingest_behind = false;
2510
- ASSERT_OK(TryReopen(options));
2511
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2512
- dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &level_to_files);
2513
- ASSERT_EQ(1, level_to_files[2].size());
2514
- ASSERT_NE(ingested_file_number, level_to_files[2][0].fd.GetNumber());
2515
- VerifyDBFromMap(true_data, &kcnt, false);
2516
2545
  }
2517
2546
 
2518
2547
  TEST_F(ExternalSSTFileTest, SkipBloomFilter) {
@@ -3514,19 +3543,26 @@ TEST_F(ExternalSSTFileWithTimestampTest, SanityCheck) {
3514
3543
  // overlapping key ranges.
3515
3544
  ASSERT_TRUE(IngestExternalUDTFile({file1, file2}).IsNotSupported());
3516
3545
 
3517
- options.allow_ingest_behind = true;
3518
- DestroyAndReopen(options);
3519
- IngestExternalFileOptions opts;
3546
+ for (bool cf_option : {false, true}) {
3547
+ SCOPED_TRACE("cf_option = " + std::to_string(cf_option));
3548
+ if (cf_option) {
3549
+ options.cf_allow_ingest_behind = true;
3550
+ } else {
3551
+ options.allow_ingest_behind = true;
3552
+ }
3553
+ DestroyAndReopen(options);
3554
+ IngestExternalFileOptions opts;
3520
3555
 
3521
- // TODO(yuzhangyu): support ingestion behind for user-defined timestamps?
3522
- // Ingesting external files with user-defined timestamps requires searching
3523
- // through the whole lsm tree to make sure there is no key range overlap with
3524
- // the db. Ingestion behind currently is doing a simply placing it at the
3525
- // bottom level step without a search, so we don't allow it either.
3526
- opts.ingest_behind = true;
3527
- ASSERT_TRUE(db_->IngestExternalFile({file1}, opts).IsNotSupported());
3556
+ // TODO(yuzhangyu): support ingestion behind for user-defined timestamps?
3557
+ // Ingesting external files with user-defined timestamps requires searching
3558
+ // through the whole lsm tree to make sure there is no key range overlap
3559
+ // with the db. Ingestion behind currently is doing a simply placing it at
3560
+ // the bottom level step without a search, so we don't allow it either.
3561
+ opts.ingest_behind = true;
3562
+ ASSERT_TRUE(db_->IngestExternalFile({file1}, opts).IsNotSupported());
3528
3563
 
3529
- DestroyAndRecreateExternalSSTFilesDir();
3564
+ DestroyAndRecreateExternalSSTFilesDir();
3565
+ }
3530
3566
  }
3531
3567
 
3532
3568
  TEST_F(ExternalSSTFileWithTimestampTest, UDTSettingsCompatibilityCheck) {
@@ -3818,99 +3854,32 @@ TEST_P(IngestDBGeneratedFileTest, FailureCase) {
3818
3854
  ASSERT_OK(Put(1, Key(k), "cf1_" + Key(k)));
3819
3855
  }
3820
3856
  ASSERT_OK(Flush(/*cf=*/1));
3821
- {
3822
- // Verify that largest key of the file has non-zero seqno.
3823
- std::vector<std::vector<FileMetaData>> metadata;
3824
- dbfull()->TEST_GetFilesMetaData(handles_[1], &metadata, nullptr);
3825
- const FileMetaData& file = metadata[0][0];
3826
- ValueType vtype;
3827
- SequenceNumber seq;
3828
- UnPackSequenceAndType(ExtractInternalKeyFooter(file.largest.Encode()),
3829
- &seq, &vtype);
3830
- ASSERT_GE(seq, 0);
3831
- }
3832
- std::vector<LiveFileMetaData> live_meta;
3833
- db_->GetLiveFilesMetaData(&live_meta);
3834
- ASSERT_EQ(live_meta.size(), 1);
3835
- std::vector<std::string> to_ingest_files;
3836
- to_ingest_files.emplace_back(live_meta[0].directory + "/" +
3837
- live_meta[0].relative_filename);
3838
- // Ingesting a file whose boundary key has non-zero seqno.
3839
- Status s = db_->IngestExternalFile(to_ingest_files, ingest_opts);
3840
- // This error msg is from checking seqno of boundary keys.
3841
- ASSERT_TRUE(
3842
- s.ToString().find("External file has non zero sequence number") !=
3843
- std::string::npos);
3844
- ASSERT_NOK(s);
3845
-
3846
- {
3847
- // Only non-boundary key with non-zero seqno.
3848
- const Snapshot* snapshot = db_->GetSnapshot();
3849
- ASSERT_OK(Put(1, Key(70), "cf1_" + Key(70)));
3850
- ASSERT_OK(Flush(1));
3851
- CompactRangeOptions cro;
3852
- cro.bottommost_level_compaction =
3853
- BottommostLevelCompaction::kForceOptimized;
3854
- ASSERT_OK(db_->CompactRange(cro, handles_[1], nullptr, nullptr));
3855
-
3856
- // Verify that only the non-boundary key of the file has non-zero seqno.
3857
- std::vector<std::vector<FileMetaData>> metadata;
3858
- // File may be at different level for different options.
3859
- dbfull()->TEST_GetFilesMetaData(handles_[1], &metadata, nullptr);
3860
- bool found_file = false;
3861
- for (const auto& level : metadata) {
3862
- if (level.empty()) {
3863
- continue;
3864
- }
3865
- ASSERT_FALSE(found_file);
3866
- found_file = true;
3867
- ASSERT_EQ(1, level.size());
3868
- const FileMetaData& file = level[0];
3869
- ValueType vtype;
3870
- SequenceNumber seq;
3871
- UnPackSequenceAndType(ExtractInternalKeyFooter(file.largest.Encode()),
3872
- &seq, &vtype);
3873
- ASSERT_EQ(seq, 0);
3874
- UnPackSequenceAndType(ExtractInternalKeyFooter(file.smallest.Encode()),
3875
- &seq, &vtype);
3876
- ASSERT_EQ(seq, 0);
3877
- ASSERT_GT(file.fd.largest_seqno, 0);
3878
- }
3879
- ASSERT_TRUE(found_file);
3880
- live_meta.clear();
3881
- db_->GetLiveFilesMetaData(&live_meta);
3882
- ASSERT_EQ(live_meta.size(), 1);
3883
- to_ingest_files[0] =
3884
- live_meta[0].directory + "/" + live_meta[0].relative_filename;
3885
- s = db_->IngestExternalFile(to_ingest_files, ingest_opts);
3886
- ASSERT_NOK(s);
3887
- // This error msg is from checking largest seqno in table property.
3888
- ASSERT_TRUE(s.ToString().find("non zero largest sequence number") !=
3889
- std::string::npos);
3890
- db_->ReleaseSnapshot(snapshot);
3891
- }
3892
3857
 
3858
+ Status s;
3893
3859
  CompactRangeOptions cro;
3894
3860
  cro.bottommost_level_compaction =
3895
3861
  BottommostLevelCompaction::kForceOptimized;
3896
3862
  ASSERT_OK(db_->CompactRange(cro, handles_[1], nullptr, nullptr));
3897
- live_meta.clear();
3863
+
3864
+ std::vector<LiveFileMetaData> live_meta;
3865
+ std::vector<std::string> to_ingest_files;
3898
3866
  db_->GetLiveFilesMetaData(&live_meta);
3899
3867
  ASSERT_EQ(live_meta.size(), 1);
3868
+ ASSERT_EQ(live_meta[0].column_family_name, "toto");
3900
3869
  ASSERT_EQ(0, live_meta[0].largest_seqno);
3901
- to_ingest_files[0] =
3902
- live_meta[0].directory + "/" + live_meta[0].relative_filename;
3870
+ to_ingest_files.emplace_back(live_meta[0].directory + "/" +
3871
+ live_meta[0].relative_filename);
3903
3872
 
3873
+ // Ingesting a DB generated file with allow_db_generated_files = false
3904
3874
  ingest_opts.allow_db_generated_files = false;
3905
- // Ingesting a DB genrate file with allow_db_generated_files = false;
3906
3875
  s = db_->IngestExternalFile(to_ingest_files, ingest_opts);
3907
3876
  ASSERT_TRUE(s.ToString().find("External file version not found") !=
3908
3877
  std::string::npos);
3909
3878
  ASSERT_NOK(s);
3910
3879
 
3911
3880
  const std::string err =
3912
- "An ingested file is assigned to a non-zero sequence number, which is "
3913
- "incompatible with ingestion option allow_db_generated_files";
3881
+ "An ingested file overlaps with existing data in the DB and has been "
3882
+ "assigned a non-zero sequence number";
3914
3883
  ingest_opts.allow_db_generated_files = true;
3915
3884
  s = db_->IngestExternalFile(to_ingest_files, ingest_opts);
3916
3885
  ASSERT_TRUE(s.ToString().find(err) != std::string::npos);
@@ -4111,6 +4080,453 @@ TEST_P(IngestDBGeneratedFileTest2, NotOverlapWithDB) {
4111
4080
  }
4112
4081
  } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction));
4113
4082
  }
4083
+
4084
+ TEST_P(IngestDBGeneratedFileTest2, NonZeroSeqno) {
4085
+ // Test ingestion of DB-generated SST files that contain non-zero sequence
4086
+ // numbers.
4087
+ IngestExternalFileOptions ingest_opts;
4088
+ ingest_opts.allow_db_generated_files = true;
4089
+ // This only works since we are ingesting without snapshot
4090
+ // Failure case will be tested below.
4091
+ ingest_opts.snapshot_consistency = std::get<0>(GetParam());
4092
+ ingest_opts.allow_global_seqno = std::get<1>(GetParam());
4093
+ ingest_opts.allow_blocking_flush = std::get<2>(GetParam());
4094
+ ingest_opts.fail_if_not_bottommost_level = std::get<3>(GetParam());
4095
+ ingest_opts.link_files = std::get<4>(GetParam());
4096
+ Random* rnd = Random::GetTLSInstance();
4097
+
4098
+ do {
4099
+ SCOPED_TRACE("option_config_ = " + std::to_string(option_config_));
4100
+
4101
+ Options options = CurrentOptions();
4102
+ options.statistics = CreateDBStatistics();
4103
+ options.allow_concurrent_memtable_write =
4104
+ false; // Required for VectorRepFactory
4105
+ CreateAndReopenWithCF({"non_overlap", "overlap"}, options);
4106
+
4107
+ ColumnFamilyHandle* non_overlap_cf = handles_[1];
4108
+ ColumnFamilyHandle* overlap_cf = handles_[2];
4109
+
4110
+ std::vector<std::string> expected_values;
4111
+ expected_values.resize(100);
4112
+ WriteOptions wo;
4113
+ // Setup target CF with non-overlapping base data Key1 and Key99
4114
+ // Will ingest keys [1, 98] below.
4115
+ expected_values[0] = rnd->RandomString(100);
4116
+ ASSERT_OK(db_->Put(wo, non_overlap_cf, Key(0), expected_values[0]));
4117
+ ASSERT_OK(db_->Flush({}, non_overlap_cf));
4118
+ expected_values[99] = rnd->RandomString(100);
4119
+ ASSERT_OK(db_->Put(wo, non_overlap_cf, Key(99), expected_values[99]));
4120
+
4121
+ // Set up overlapping cf
4122
+ ASSERT_OK(db_->Put(wo, overlap_cf, Key(50), rnd->RandomString(100)));
4123
+
4124
+ // Create temp CF/DB
4125
+ Options temp_cf_opts;
4126
+ ColumnFamilyHandle* temp_cfh = nullptr;
4127
+ DB* from_db = nullptr;
4128
+ std::string temp_db_name;
4129
+ // Using a separate DB also validates that latest sequence number
4130
+ // of target db is updated after ingestion (to the max sequence number
4131
+ // in ingested files).
4132
+ const bool use_temp_db = rnd->OneIn(2);
4133
+ SCOPED_TRACE("use_temp_db: " + std::to_string(use_temp_db));
4134
+
4135
+ std::vector<std::string> sst_file_paths;
4136
+ // optional L5: files in key range [70, 98]
4137
+ // L6: files in key range [1, 79]
4138
+ temp_cf_opts.target_file_size_base =
4139
+ 20 << 10; // Small files to create multiple SSTs
4140
+ temp_cf_opts.num_levels = 7;
4141
+ temp_cf_opts.disable_auto_compactions = true; // Manually set up LSM
4142
+ temp_cf_opts.env = options.env;
4143
+
4144
+ if (use_temp_db) {
4145
+ temp_cf_opts.create_if_missing = true;
4146
+ temp_db_name = dbname_ + "/temp_db_" + std::to_string(rnd->Next());
4147
+ ASSERT_OK(DB::Open(temp_cf_opts, temp_db_name, &from_db));
4148
+ temp_cfh = from_db->DefaultColumnFamily();
4149
+ } else {
4150
+ from_db = db_;
4151
+ ASSERT_OK(
4152
+ from_db->CreateColumnFamily(temp_cf_opts, "temp_cf", &temp_cfh));
4153
+ }
4154
+
4155
+ // Use snapshot to ensure non-zero sequence numbers after compaction
4156
+ const Snapshot* snapshot = from_db->GetSnapshot();
4157
+
4158
+ for (int k = 1; k < 99; ++k) {
4159
+ expected_values[k] = rnd->RandomString(2000);
4160
+ ASSERT_OK(from_db->Put(wo, temp_cfh, Key(k), expected_values[k]));
4161
+ }
4162
+ ASSERT_OK(from_db->Flush({}, temp_cfh));
4163
+ CompactRangeOptions cro;
4164
+ cro.bottommost_level_compaction =
4165
+ BottommostLevelCompaction::kForceOptimized;
4166
+ ASSERT_OK(from_db->CompactRange(cro, temp_cfh, nullptr, nullptr));
4167
+
4168
+ ASSERT_GT(NumTableFilesAtLevel(6, temp_cfh, from_db), 1);
4169
+
4170
+ const bool multi_level_ingestion = rnd->OneIn(2);
4171
+ SCOPED_TRACE("Multi-level ingestion: " +
4172
+ std::to_string(multi_level_ingestion));
4173
+ if (multi_level_ingestion) {
4174
+ for (int k = 80; k < 99; ++k) {
4175
+ expected_values[k] = rnd->RandomString(500);
4176
+ ASSERT_OK(from_db->Put(wo, temp_cfh, Key(k), expected_values[k]));
4177
+ }
4178
+ ASSERT_OK(from_db->Flush({}, temp_cfh));
4179
+
4180
+ // Do some overwrites, and overlap with previous L0 to avoid trivial move
4181
+ for (int k = 70; k < 82; ++k) {
4182
+ expected_values[k] = rnd->RandomString(500);
4183
+ ASSERT_OK(from_db->Put(wo, temp_cfh, Key(k), expected_values[k]));
4184
+ }
4185
+ ASSERT_OK(from_db->Flush({}, temp_cfh));
4186
+
4187
+ if (rnd->OneIn(2)) {
4188
+ MoveFilesToLevel(5, temp_cfh, from_db);
4189
+ ASSERT_GT(NumTableFilesAtLevel(5, temp_cfh, from_db), 0);
4190
+ }
4191
+ ASSERT_GT(NumTableFilesAtLevel(6, temp_cfh, from_db), 0);
4192
+ }
4193
+ SCOPED_TRACE("LSM of from_db " + FilesPerLevel(temp_cfh, from_db));
4194
+
4195
+ ColumnFamilyMetaData cf_meta;
4196
+ from_db->GetColumnFamilyMetaData(temp_cfh, &cf_meta);
4197
+
4198
+ // Iterate in reverse since IngestExternalFiles expect files to be ordered
4199
+ // from old to new
4200
+ for (auto level_meta = cf_meta.levels.rbegin();
4201
+ level_meta != cf_meta.levels.rend(); ++level_meta) {
4202
+ // L0 files need to be added in reverse order.
4203
+ for (auto file_meta = level_meta->files.rbegin();
4204
+ file_meta != level_meta->files.rend(); ++file_meta) {
4205
+ // Validate that files contain non-zero sequence numbers
4206
+ ASSERT_GT(file_meta->smallest_seqno, 0);
4207
+ ASSERT_GE(file_meta->largest_seqno, file_meta->smallest_seqno);
4208
+ sst_file_paths.emplace_back(file_meta->directory + "/" +
4209
+ file_meta->relative_filename);
4210
+ }
4211
+ }
4212
+ from_db->ReleaseSnapshot(snapshot);
4213
+
4214
+ Status s;
4215
+ // Perform ingestion and validate results
4216
+ if (multi_level_ingestion && options.num_levels > 1) {
4217
+ // fail_if_bottommost requres ingesting all files into the last level,
4218
+ // so it fails if we are assiging files to multiple levels.
4219
+ ingest_opts.fail_if_not_bottommost_level = true;
4220
+ s = db_->IngestExternalFile(non_overlap_cf, sst_file_paths, ingest_opts);
4221
+ ASSERT_NOK(s);
4222
+ ASSERT_TRUE(s.ToString().find("Files cannot be ingested to Lmax") !=
4223
+ std::string::npos);
4224
+ ingest_opts.fail_if_not_bottommost_level = false;
4225
+ }
4226
+ if (ingest_opts.snapshot_consistency) {
4227
+ // snapshot_consisteny requires global sequence number assignment to
4228
+ // ingested files if there is any live snapshot.
4229
+ snapshot = db_->GetSnapshot();
4230
+ s = db_->IngestExternalFile(non_overlap_cf, sst_file_paths, ingest_opts);
4231
+ ASSERT_NOK(s);
4232
+ ASSERT_TRUE(s.ToString().find(
4233
+ "An ingested file overlaps with existing data in the DB and has been "
4234
+ "assigned a non-zero sequence number"));
4235
+ db_->ReleaseSnapshot(snapshot);
4236
+ }
4237
+
4238
+ std::atomic<int> file_scan_count{0};
4239
+ SyncPoint::GetInstance()->SetCallBack(
4240
+ "ExternalSstFileIngestionJob::GetSeqnoBoundaryForFile:FileScan",
4241
+ [&](void* /*arg*/) { file_scan_count++; });
4242
+ SyncPoint::GetInstance()->EnableProcessing();
4243
+
4244
+ ASSERT_OK(
4245
+ db_->IngestExternalFile(non_overlap_cf, sst_file_paths, ingest_opts));
4246
+
4247
+ SyncPoint::GetInstance()->DisableProcessing();
4248
+ SyncPoint::GetInstance()->ClearAllCallBacks();
4249
+
4250
+ EXPECT_EQ(file_scan_count, 0);
4251
+
4252
+ // Validate ingested data.
4253
+ ReadOptions ro;
4254
+ std::string val;
4255
+ for (int k = 0; k < 100; ++k) {
4256
+ s = db_->Get(ro, handles_[1], Key(k), &val);
4257
+ ASSERT_OK(s) << "Should find ingested key " << Key(k);
4258
+ ASSERT_EQ(val, expected_values[k]) << "key: " << Key(k);
4259
+ }
4260
+
4261
+ // Overlap with data in the CF
4262
+ if (ingest_opts.allow_blocking_flush) {
4263
+ s = db_->IngestExternalFile(overlap_cf, sst_file_paths, ingest_opts);
4264
+
4265
+ ASSERT_NOK(s);
4266
+ ASSERT_TRUE(s.ToString().find("An ingested file overlaps with existing "
4267
+ "data in the DB and has been "
4268
+ "assigned a non-zero sequence number") !=
4269
+ std::string::npos)
4270
+ << s.ToString();
4271
+ }
4272
+
4273
+ // Cleanup
4274
+ // FIXME: Without this, the test triggers some data race between dropping
4275
+ // CF and background compaction.
4276
+ ASSERT_OK(db_->WaitForCompact({}));
4277
+ if (use_temp_db) {
4278
+ ASSERT_OK(from_db->Close());
4279
+ delete from_db;
4280
+ ASSERT_OK(DestroyDB(temp_db_name, temp_cf_opts));
4281
+ } else {
4282
+ ASSERT_OK(db_->DropColumnFamily(temp_cfh));
4283
+ ASSERT_OK(db_->DestroyColumnFamilyHandle(temp_cfh));
4284
+ }
4285
+ } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction));
4286
+ }
4287
+
4288
+ std::string GenSecondaryKey(const std::string& pk, const std::string& val) {
4289
+ return "index_" + val + "_" + pk;
4290
+ };
4291
+
4292
+ TEST_P(IngestDBGeneratedFileTest2, ZeroAndNonZeroSeqno) {
4293
+ // Test ingestion of SST files with zero and with non-zero sequence numbers.
4294
+ // Generate data using a temp CF and a temp DB:
4295
+ // 1. Temp CF with cf_allow_ingest_behind enabled to preserve non-zero seqno.
4296
+ // 2. Temp DB with everything compacted to have zero seqno.
4297
+ // Then ingest both types of files together into a target CF.
4298
+ // This mimics a user case where temp DB contains data read from a
4299
+ // snapshot while temp CF contains live writes after a snapshot is taken.
4300
+ IngestExternalFileOptions ingest_opts;
4301
+ ingest_opts.allow_db_generated_files = true;
4302
+ ingest_opts.snapshot_consistency = std::get<0>(GetParam());
4303
+ ingest_opts.allow_global_seqno = std::get<1>(GetParam());
4304
+ ingest_opts.allow_blocking_flush = std::get<2>(GetParam());
4305
+ ingest_opts.fail_if_not_bottommost_level = std::get<3>(GetParam());
4306
+ ingest_opts.link_files = std::get<4>(GetParam());
4307
+
4308
+ Random* rnd = Random::GetTLSInstance();
4309
+
4310
+ do {
4311
+ SCOPED_TRACE("option_config_ = " + std::to_string(option_config_));
4312
+ Options options = CurrentOptions();
4313
+ options.allow_concurrent_memtable_write = false;
4314
+ // Force more flushes/compactions and more files to be generated
4315
+ options.target_file_size_base = 1 << 10; // 1KB
4316
+ options.max_bytes_for_level_base = 2 << 10; // 2KB
4317
+ options.max_bytes_for_level_multiplier = 2;
4318
+ options.level0_file_num_compaction_trigger = 2;
4319
+ options.level_compaction_dynamic_level_bytes = true;
4320
+ DestroyAndReopen(options);
4321
+ CreateAndReopenWithCF({"target_cf"}, options);
4322
+ auto* target_cfh = handles_[1];
4323
+
4324
+ Options live_write_cf_opts = options;
4325
+ live_write_cf_opts.memtable_factory.reset(new VectorRepFactory());
4326
+ live_write_cf_opts.compaction_style = kCompactionStyleUniversal;
4327
+ live_write_cf_opts.cf_allow_ingest_behind = true;
4328
+ live_write_cf_opts.num_levels = 50;
4329
+ ColumnFamilyHandle* live_write_cfh;
4330
+ ASSERT_OK(db_->CreateColumnFamily(live_write_cf_opts, "live_write_cf",
4331
+ &live_write_cfh));
4332
+
4333
+ // Expected value and key
4334
+ std::map<std::string, std::string> expected;
4335
+ std::unordered_set<std::string> deleted;
4336
+ std::stringstream debug_info;
4337
+
4338
+ // Setup base data in target CF, will ingest keys with different prefixes
4339
+ // so they don't overlap with the base data.
4340
+ WriteOptions wo;
4341
+ for (int k = 0; k < 100; ++k) {
4342
+ int random_val = rnd->Uniform(20);
4343
+ expected[Key(k)] = std::to_string(random_val);
4344
+ ASSERT_OK(db_->Put(wo, target_cfh, Key(k), expected[Key(k)]));
4345
+
4346
+ // Force flush every 20 keys to create multiple SST files
4347
+ if (rnd->OneIn(20)) {
4348
+ ASSERT_OK(db_->Flush({}, target_cfh));
4349
+ debug_info << "Flush after " << k
4350
+ << ", LSM state: " << FilesPerLevel(target_cfh) << "\n";
4351
+ }
4352
+ }
4353
+
4354
+ // Temp DB for snapshot data
4355
+ Options temp_db_opts;
4356
+ temp_db_opts.create_if_missing = true;
4357
+ temp_db_opts.target_file_size_base = 1 << 10;
4358
+ temp_db_opts.write_buffer_size = 1 << 10;
4359
+ temp_db_opts.memtable_factory.reset(new VectorRepFactory());
4360
+ temp_db_opts.allow_concurrent_memtable_write = false;
4361
+ temp_db_opts.compaction_style = kCompactionStyleUniversal;
4362
+ temp_db_opts.env = env_;
4363
+ temp_db_opts.num_levels = 7;
4364
+
4365
+ std::string temp_db_name =
4366
+ dbname_ + "/temp_db_" + std::to_string(rnd->Next());
4367
+ DB* temp_db = nullptr;
4368
+ ASSERT_OK(DB::Open(temp_db_opts, temp_db_name, &temp_db));
4369
+
4370
+ const Snapshot* snapshot = db_->GetSnapshot();
4371
+ ReadOptions ro;
4372
+ ro.snapshot = snapshot;
4373
+ ro.total_order_seek = true;
4374
+ std::unique_ptr<Iterator> iter{db_->NewIterator(ro, target_cfh)};
4375
+ // transform data read from snapshot and write to temp DB
4376
+ // Varying the number of files in temp DB.
4377
+ const int kValSize = rnd->Uniform(200);
4378
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
4379
+ std::string key = iter->key().ToString();
4380
+ std::string value = iter->value().ToString();
4381
+ std::string sk = GenSecondaryKey(key, value);
4382
+ // Usually value is empty, here we use a larger value to generate
4383
+ // multiple SST files in temp_db.
4384
+ std::string sk_val = rnd->RandomString(kValSize);
4385
+ ASSERT_OK(temp_db->Put(wo, sk, sk_val));
4386
+ expected[sk] = sk_val;
4387
+ debug_info << "Snapshot data: " << sk << " -> \n";
4388
+ }
4389
+ ASSERT_OK(iter->status());
4390
+
4391
+ // Do some live writes into target CF and live write CF.
4392
+ for (int i = 0; i < 10; ++i) {
4393
+ WriteBatch wb;
4394
+ for (int j = 0; j < 5; ++j) {
4395
+ std::string key = Key(rnd->Uniform(100));
4396
+ std::string old_val = expected[key];
4397
+ // Value range is 0-19, allow some PK to have the same value.
4398
+ int random_val = rnd->Uniform(20);
4399
+ std::string new_val = std::to_string(random_val);
4400
+ std::string old_index_key = GenSecondaryKey(key, old_val);
4401
+ std::string new_index_key = GenSecondaryKey(key, new_val);
4402
+ ASSERT_OK(wb.SingleDelete(live_write_cfh, old_index_key));
4403
+ std::string sk_val = rnd->RandomString(kValSize);
4404
+ ASSERT_OK(wb.Put(live_write_cfh, new_index_key, sk_val));
4405
+ ASSERT_OK(wb.Put(target_cfh, key, new_val));
4406
+ expected[key] = new_val;
4407
+ expected.erase(old_index_key);
4408
+ expected[new_index_key] = sk_val;
4409
+ deleted.insert(old_index_key);
4410
+ deleted.erase(new_index_key);
4411
+
4412
+ debug_info << "Live write: SD " << old_index_key << "\n";
4413
+ debug_info << "Live write: " << key << " -> " << new_val << "\n";
4414
+ debug_info << "Live write: " << new_index_key << " -> \n";
4415
+ }
4416
+ ASSERT_OK(db_->Write(wo, &wb));
4417
+ if (rnd->OneIn(3)) {
4418
+ debug_info << "Flush after " << i << " live writes\n";
4419
+ ASSERT_OK(db_->Flush({}, live_write_cfh));
4420
+ }
4421
+ }
4422
+ iter.reset();
4423
+ db_->ReleaseSnapshot(snapshot);
4424
+
4425
+ // Compact temp_db to ensure zero sequence numbers
4426
+ CompactRangeOptions cro;
4427
+ cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
4428
+ ASSERT_OK(temp_db->CompactRange(cro, nullptr, nullptr));
4429
+ SCOPED_TRACE("Temp DB LSM: " +
4430
+ FilesPerLevel(temp_db->DefaultColumnFamily(), temp_db));
4431
+
4432
+ // Base data from snapshot
4433
+ std::vector<std::string> sst_file_paths_zero_seqno;
4434
+
4435
+ // Collect SST file paths with zero sequence numbers
4436
+ ASSERT_OK(temp_db->DisableFileDeletions());
4437
+ ColumnFamilyMetaData cf_meta_temp_db;
4438
+ temp_db->GetColumnFamilyMetaData(&cf_meta_temp_db);
4439
+ for (const auto& level_meta : cf_meta_temp_db.levels) {
4440
+ if (level_meta.level == 6) {
4441
+ for (const auto& file_meta : level_meta.files) {
4442
+ // Verify files have zero sequence numbers
4443
+ ASSERT_EQ(0, file_meta.largest_seqno)
4444
+ << "File " << file_meta.relative_filename
4445
+ << " should have zero sequence number\n"
4446
+ << debug_info.str();
4447
+ sst_file_paths_zero_seqno.emplace_back(file_meta.directory + "/" +
4448
+ file_meta.relative_filename);
4449
+ }
4450
+ } else {
4451
+ // All files should be in L6
4452
+ ASSERT_EQ(0, level_meta.files.size()) << debug_info.str();
4453
+ }
4454
+ }
4455
+
4456
+ // Flush remaining catch up writes in memtable
4457
+ ASSERT_OK(db_->Flush({}, live_write_cfh));
4458
+ SCOPED_TRACE("LSM of live write cfh " + FilesPerLevel(live_write_cfh));
4459
+ // Collect SST file paths with non-zero sequence numbers
4460
+ ColumnFamilyMetaData live_write_cf_meta;
4461
+ ASSERT_OK(db_->DisableFileDeletions());
4462
+ db_->GetColumnFamilyMetaData(live_write_cfh, &live_write_cf_meta);
4463
+
4464
+ // Live writes after snapshot
4465
+ std::vector<std::string> sst_file_paths_nonzero_seqno;
4466
+ for (auto level_meta = live_write_cf_meta.levels.rbegin();
4467
+ level_meta != live_write_cf_meta.levels.rend(); ++level_meta) {
4468
+ // Reverse order is important for L0, where recent updates are ordered
4469
+ // first
4470
+ for (auto file_meta = level_meta->files.rbegin();
4471
+ file_meta != level_meta->files.rend(); ++file_meta) {
4472
+ sst_file_paths_nonzero_seqno.emplace_back(file_meta->directory + "/" +
4473
+ file_meta->relative_filename);
4474
+ ASSERT_GT(file_meta->smallest_seqno, 0) << debug_info.str();
4475
+ }
4476
+ if (level_meta->level == 49) {
4477
+ // Ingest behind does not compact to the last level
4478
+ ASSERT_EQ(level_meta->files.size(), 0) << debug_info.str();
4479
+ }
4480
+ }
4481
+
4482
+ ASSERT_GT(sst_file_paths_zero_seqno.size(), 0) << debug_info.str();
4483
+ ASSERT_GT(sst_file_paths_nonzero_seqno.size(), 0) << debug_info.str();
4484
+
4485
+ // Combine all SST file paths.
4486
+ // File ingestion takes files from old to new.
4487
+ std::vector<std::string> all_sst_files;
4488
+ all_sst_files.insert(all_sst_files.end(), sst_file_paths_zero_seqno.begin(),
4489
+ sst_file_paths_zero_seqno.end());
4490
+ all_sst_files.insert(all_sst_files.end(),
4491
+ sst_file_paths_nonzero_seqno.begin(),
4492
+ sst_file_paths_nonzero_seqno.end());
4493
+ if (ingest_opts.fail_if_not_bottommost_level && options.num_levels > 1) {
4494
+ // overlapping files will be ingested into different levels, including non
4495
+ // Lmax
4496
+ Status s =
4497
+ db_->IngestExternalFile(target_cfh, all_sst_files, ingest_opts);
4498
+ ASSERT_NOK(s);
4499
+ ASSERT_TRUE(s.ToString().find("Files cannot be ingested to Lmax") !=
4500
+ std::string::npos);
4501
+ } else {
4502
+ ASSERT_OK(
4503
+ db_->IngestExternalFile(target_cfh, all_sst_files, ingest_opts));
4504
+
4505
+ debug_info << "Zero seqno files: " << sst_file_paths_zero_seqno.size()
4506
+ << "\nNon-zero seqno files: "
4507
+ << sst_file_paths_nonzero_seqno.size() << "\n";
4508
+
4509
+ SCOPED_TRACE("Debug info:\n" + debug_info.str());
4510
+ VerifyDBFromMap(expected, nullptr, false, nullptr, target_cfh, &deleted);
4511
+ }
4512
+
4513
+ // clean up
4514
+ ASSERT_OK(db_->EnableFileDeletions());
4515
+ ASSERT_OK(temp_db->EnableFileDeletions());
4516
+
4517
+ // FIXME: Without this, the test triggers some data race between dropping
4518
+ // CF and background compaction.
4519
+ ASSERT_OK(db_->WaitForCompact({}));
4520
+
4521
+ ASSERT_OK(db_->DropColumnFamily(live_write_cfh));
4522
+ ASSERT_OK(db_->DestroyColumnFamilyHandle(live_write_cfh));
4523
+
4524
+ ASSERT_OK(temp_db->Close());
4525
+ delete temp_db;
4526
+ ASSERT_OK(DestroyDB(temp_db_name, temp_db_opts));
4527
+ } while (ChangeOptions(kSkipPlainTable | kSkipFIFOCompaction));
4528
+ }
4529
+
4114
4530
  } // namespace ROCKSDB_NAMESPACE
4115
4531
 
4116
4532
  int main(int argc, char** argv) {