@nxtedition/rocksdb 8.0.4 → 8.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/BUILDING.md +2 -2
  2. package/binding.cc +7 -2
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -0
  4. package/deps/rocksdb/rocksdb/Makefile +13 -1
  5. package/deps/rocksdb/rocksdb/db/builder.cc +13 -4
  6. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  7. package/deps/rocksdb/rocksdb/db/c.cc +6 -0
  8. package/deps/rocksdb/rocksdb/db/column_family.cc +1 -0
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +18 -4
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -0
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +22 -2
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +5 -1
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +14 -14
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1 -2
  16. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  17. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +225 -0
  18. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +8 -9
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +63 -23
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +12 -8
  23. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +115 -2
  24. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  25. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -0
  26. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +88 -12
  27. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  28. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  29. package/deps/rocksdb/rocksdb/db/flush_job.cc +2 -3
  30. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/repair.cc +2 -1
  32. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +41 -39
  33. package/deps/rocksdb/rocksdb/db/version_edit.cc +12 -0
  34. package/deps/rocksdb/rocksdb/db/version_edit.h +18 -6
  35. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  36. package/deps/rocksdb/rocksdb/db/version_set.cc +12 -6
  37. package/deps/rocksdb/rocksdb/db/version_set_test.cc +23 -9
  38. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -0
  41. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
  42. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  43. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  44. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  45. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +245 -74
  46. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +195 -4
  47. package/max_rev_operator.h +100 -0
  48. package/package.json +1 -1
  49. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  50. package/prebuilds/darwin-x64/node.napi.node +0 -0
  51. package/prebuilds/linux-x64/node.napi.node +0 -0
package/BUILDING.md CHANGED
@@ -10,9 +10,9 @@
10
10
  - Copy `libfolly.a` to `/usr/lib/x86_64-linux-gnu`.
11
11
  - Copy headers to `/usr/lib/x86_64-linux-gnu/include`.
12
12
  - Copy boost headers from folly scratchpad to `/usr/lib/x86_64-linux-gnu/include`.
13
- - `JOBS=8 npx prebuildify -t 18.11.0 --napi --strip --arch x64`
13
+ - `JONS=16 npx prebuildify -t 18.11.0 --napi --strip --arch x64`
14
14
 
15
15
  # OSX
16
16
 
17
17
  - `brew install zstd`
18
- - `JOBS=8 npx prebuildify -t 18.11.0 --napi --strip --arch arm64`
18
+ - `JONS=16 npx prebuildify -t 18.11.0 --napi --strip --arch arm64`
package/binding.cc CHANGED
@@ -26,6 +26,7 @@
26
26
  #include <thread>
27
27
  #include <vector>
28
28
 
29
+ #include "max_rev_operator.h"
29
30
  #include "util.h"
30
31
 
31
32
  class NullLogger : public rocksdb::Logger {
@@ -556,8 +557,12 @@ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
556
557
  std::optional<std::string> mergeOperatorOpt;
557
558
  NAPI_STATUS_RETURN(GetProperty(env, options, "mergeOperator", mergeOperatorOpt));
558
559
  if (mergeOperatorOpt) {
559
- ROCKS_STATUS_RETURN_NAPI(
560
- rocksdb::MergeOperator::CreateFromString(configOptions, *mergeOperatorOpt, &columnOptions.merge_operator));
560
+ if (*mergeOperatorOpt == "maxRev") {
561
+ columnOptions.merge_operator = std::make_shared<MaxRevOperator>();
562
+ } else {
563
+ ROCKS_STATUS_RETURN_NAPI(
564
+ rocksdb::MergeOperator::CreateFromString(configOptions, *mergeOperatorOpt, &columnOptions.merge_operator));
565
+ }
561
566
  }
562
567
 
563
568
  std::optional<std::string> compactionPriority;
@@ -980,6 +980,12 @@ if ( ROCKSDB_PLUGINS )
980
980
  plugin/${plugin}/${src}
981
981
  PROPERTIES COMPILE_FLAGS "${${plugin}_COMPILE_FLAGS}")
982
982
  endforeach()
983
+ foreach (test ${${plugin}_TESTS})
984
+ list(APPEND PLUGIN_TESTS plugin/${plugin}/${test})
985
+ set_source_files_properties(
986
+ plugin/${plugin}/${test}
987
+ PROPERTIES COMPILE_FLAGS "${${plugin}_COMPILE_FLAGS}")
988
+ endforeach()
983
989
  foreach (path ${${plugin}_INCLUDE_PATHS})
984
990
  include_directories(${path})
985
991
  endforeach()
@@ -1471,6 +1477,7 @@ if(WITH_TESTS)
1471
1477
  utilities/ttl/ttl_test.cc
1472
1478
  utilities/util_merge_operators_test.cc
1473
1479
  utilities/write_batch_with_index/write_batch_with_index_test.cc
1480
+ ${PLUGIN_TESTS}
1474
1481
  )
1475
1482
  endif()
1476
1483
 
@@ -266,6 +266,7 @@ ROCKSDB_PLUGIN_EXTERNS = $(foreach p, $(ROCKSDB_PLUGIN_W_FUNCS), int $($(p)_FUNC
266
266
  ROCKSDB_PLUGIN_BUILTINS = $(foreach p, $(ROCKSDB_PLUGIN_W_FUNCS), {\"$(p)\"\, $($(p)_FUNC)}\,)
267
267
  ROCKSDB_PLUGIN_LDFLAGS = $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_LDFLAGS))
268
268
  ROCKSDB_PLUGIN_PKGCONFIG_REQUIRES = $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_PKGCONFIG_REQUIRES))
269
+ ROCKSDB_PLUGIN_TESTS = $(foreach p, $(ROCKSDB_PLUGINS), $(foreach test, $($(p)_TESTS), plugin/$(p)/$(test)))
269
270
 
270
271
  CXXFLAGS += $(foreach plugin, $(ROCKSDB_PLUGINS), $($(plugin)_CXXFLAGS))
271
272
  PLATFORM_LDFLAGS += $(ROCKSDB_PLUGIN_LDFLAGS)
@@ -647,10 +648,12 @@ STRESS_OBJECTS = $(patsubst %.cc, $(OBJ_DIR)/%.o, $(STRESS_LIB_SOURCES))
647
648
  ALL_SOURCES = $(filter-out util/build_version.cc, $(LIB_SOURCES)) $(TEST_LIB_SOURCES) $(MOCK_LIB_SOURCES) $(GTEST_DIR)/gtest/gtest-all.cc
648
649
  ALL_SOURCES += $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(CACHE_BENCH_LIB_SOURCES) $(ANALYZER_LIB_SOURCES) $(STRESS_LIB_SOURCES)
649
650
  ALL_SOURCES += $(TEST_MAIN_SOURCES) $(TOOL_MAIN_SOURCES) $(BENCH_MAIN_SOURCES)
650
- ALL_SOURCES += $(ROCKSDB_PLUGIN_SOURCES)
651
+ ALL_SOURCES += $(ROCKSDB_PLUGIN_SOURCES) $(ROCKSDB_PLUGIN_TESTS)
651
652
 
653
+ PLUGIN_TESTS = $(patsubst %.cc, %, $(notdir $(ROCKSDB_PLUGIN_TESTS)))
652
654
  TESTS = $(patsubst %.cc, %, $(notdir $(TEST_MAIN_SOURCES)))
653
655
  TESTS += $(patsubst %.c, %, $(notdir $(TEST_MAIN_SOURCES_C)))
656
+ TESTS += $(PLUGIN_TESTS)
654
657
 
655
658
  # `make check-headers` to very that each header file includes its own
656
659
  # dependencies
@@ -702,6 +705,7 @@ NON_PARALLEL_TEST = \
702
705
  env_test \
703
706
  deletefile_test \
704
707
  db_bloom_filter_test \
708
+ $(PLUGIN_TESTS) \
705
709
 
706
710
  PARALLEL_TEST = $(filter-out $(NON_PARALLEL_TEST), $(TESTS))
707
711
 
@@ -1355,6 +1359,14 @@ db_sanity_test: $(OBJ_DIR)/tools/db_sanity_test.o $(LIBRARY)
1355
1359
  db_repl_stress: $(OBJ_DIR)/tools/db_repl_stress.o $(LIBRARY)
1356
1360
  $(AM_LINK)
1357
1361
 
1362
+ define MakeTestRule
1363
+ $(notdir $(1:%.cc=%)): $(1:%.cc=$$(OBJ_DIR)/%.o) $$(TEST_LIBRARY) $$(LIBRARY)
1364
+ $$(AM_LINK)
1365
+ endef
1366
+
1367
+ # For each PLUGIN test, create a rule to generate the test executable
1368
+ $(foreach test, $(ROCKSDB_PLUGIN_TESTS), $(eval $(call MakeTestRule, $(test))))
1369
+
1358
1370
  arena_test: $(OBJ_DIR)/memory/arena_test.o $(TEST_LIBRARY) $(LIBRARY)
1359
1371
  $(AM_LINK)
1360
1372
 
@@ -71,8 +71,9 @@ Status BuildTable(
71
71
  int job_id, const Env::IOPriority io_priority,
72
72
  TableProperties* table_properties, Env::WriteLifeTimeHint write_hint,
73
73
  const std::string* full_history_ts_low,
74
- BlobFileCompletionCallback* blob_callback, uint64_t* num_input_entries,
75
- uint64_t* memtable_payload_bytes, uint64_t* memtable_garbage_bytes) {
74
+ BlobFileCompletionCallback* blob_callback, Version* version,
75
+ uint64_t* num_input_entries, uint64_t* memtable_payload_bytes,
76
+ uint64_t* memtable_garbage_bytes) {
76
77
  assert((tboptions.column_family_id ==
77
78
  TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) ==
78
79
  tboptions.column_family_name.empty());
@@ -246,9 +247,17 @@ Status BuildTable(
246
247
  auto tombstone = range_del_it->Tombstone();
247
248
  auto kv = tombstone.Serialize();
248
249
  builder->Add(kv.first.Encode(), kv.second);
249
- meta->UpdateBoundariesForRange(kv.first, tombstone.SerializeEndKey(),
250
- tombstone.seq_,
250
+ InternalKey tombstone_end = tombstone.SerializeEndKey();
251
+ meta->UpdateBoundariesForRange(kv.first, tombstone_end, tombstone.seq_,
251
252
  tboptions.internal_comparator);
253
+ if (version) {
254
+ SizeApproximationOptions approx_opts;
255
+ approx_opts.files_size_error_margin = 0.1;
256
+ meta->compensated_range_deletion_size += versions->ApproximateSize(
257
+ approx_opts, version, kv.first.Encode(), tombstone_end.Encode(),
258
+ 0 /* start_level */, -1 /* end_level */,
259
+ TableReaderCaller::kFlush);
260
+ }
252
261
  }
253
262
  }
254
263
 
@@ -13,6 +13,7 @@
13
13
  #include "db/range_tombstone_fragmenter.h"
14
14
  #include "db/seqno_to_time_mapping.h"
15
15
  #include "db/table_properties_collector.h"
16
+ #include "db/version_set.h"
16
17
  #include "logging/event_logger.h"
17
18
  #include "options/cf_options.h"
18
19
  #include "rocksdb/comparator.h"
@@ -70,7 +71,7 @@ extern Status BuildTable(
70
71
  Env::WriteLifeTimeHint write_hint = Env::WLTH_NOT_SET,
71
72
  const std::string* full_history_ts_low = nullptr,
72
73
  BlobFileCompletionCallback* blob_callback = nullptr,
73
- uint64_t* num_input_entries = nullptr,
74
+ Version* version = nullptr, uint64_t* num_input_entries = nullptr,
74
75
  uint64_t* memtable_payload_bytes = nullptr,
75
76
  uint64_t* memtable_garbage_bytes = nullptr);
76
77
 
@@ -2588,6 +2588,12 @@ void rocksdb_block_based_options_set_partition_filters(
2588
2588
  options->rep.partition_filters = partition_filters;
2589
2589
  }
2590
2590
 
2591
+ void rocksdb_block_based_options_set_optimize_filters_for_memory(
2592
+ rocksdb_block_based_table_options_t* options,
2593
+ unsigned char optimize_filters_for_memory) {
2594
+ options->rep.optimize_filters_for_memory = optimize_filters_for_memory;
2595
+ }
2596
+
2591
2597
  void rocksdb_block_based_options_set_use_delta_encoding(
2592
2598
  rocksdb_block_based_table_options_t* options,
2593
2599
  unsigned char use_delta_encoding) {
@@ -1218,6 +1218,7 @@ Compaction* ColumnFamilyData::CompactRange(
1218
1218
  if (result != nullptr) {
1219
1219
  result->SetInputVersion(current_);
1220
1220
  }
1221
+ TEST_SYNC_POINT("ColumnFamilyData::CompactRange:Return");
1221
1222
  return result;
1222
1223
  }
1223
1224
 
@@ -235,12 +235,19 @@ Compaction::Compaction(
235
235
  inputs_(PopulateWithAtomicBoundaries(vstorage, std::move(_inputs))),
236
236
  grandparents_(std::move(_grandparents)),
237
237
  score_(_score),
238
- bottommost_level_(IsBottommostLevel(output_level_, vstorage, inputs_)),
238
+ bottommost_level_(
239
+ // For simplicity, we don't support the concept of "bottommost level"
240
+ // with
241
+ // `CompactionReason::kExternalSstIngestion` and
242
+ // `CompactionReason::kRefitLevel`
243
+ (_compaction_reason == CompactionReason::kExternalSstIngestion ||
244
+ _compaction_reason == CompactionReason::kRefitLevel)
245
+ ? false
246
+ : IsBottommostLevel(output_level_, vstorage, inputs_)),
239
247
  is_full_compaction_(IsFullCompaction(vstorage, inputs_)),
240
248
  is_manual_compaction_(_manual_compaction),
241
249
  trim_ts_(_trim_ts),
242
250
  is_trivial_move_(false),
243
-
244
251
  compaction_reason_(_compaction_reason),
245
252
  notify_on_compaction_completion_(false),
246
253
  enable_blob_garbage_collection_(
@@ -255,8 +262,15 @@ Compaction::Compaction(
255
262
  _blob_garbage_collection_age_cutoff > 1
256
263
  ? mutable_cf_options()->blob_garbage_collection_age_cutoff
257
264
  : _blob_garbage_collection_age_cutoff),
258
- penultimate_level_(EvaluatePenultimateLevel(
259
- vstorage, immutable_options_, start_level_, output_level_)) {
265
+ penultimate_level_(
266
+ // For simplicity, we don't support the concept of "penultimate level"
267
+ // with `CompactionReason::kExternalSstIngestion` and
268
+ // `CompactionReason::kRefitLevel`
269
+ _compaction_reason == CompactionReason::kExternalSstIngestion ||
270
+ _compaction_reason == CompactionReason::kRefitLevel
271
+ ? Compaction::kInvalidLevel
272
+ : EvaluatePenultimateLevel(vstorage, immutable_options_,
273
+ start_level_, output_level_)) {
260
274
  MarkFilesBeingCompacted(true);
261
275
  if (is_manual_compaction_) {
262
276
  compaction_reason_ = CompactionReason::kManualCompaction;
@@ -99,6 +99,8 @@ const char* GetCompactionReasonString(CompactionReason compaction_reason) {
99
99
  return "ForcedBlobGC";
100
100
  case CompactionReason::kRoundRobinTtl:
101
101
  return "RoundRobinTtl";
102
+ case CompactionReason::kRefitLevel:
103
+ return "RefitLevel";
102
104
  case CompactionReason::kNumOfReasons:
103
105
  // fall through
104
106
  default:
@@ -386,7 +386,8 @@ class CompactionJobTestBase : public testing::Test {
386
386
  oldest_blob_file_number, kUnknownOldestAncesterTime,
387
387
  kUnknownFileCreationTime,
388
388
  versions_->GetColumnFamilySet()->GetDefault()->NewEpochNumber(),
389
- kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2);
389
+ kUnknownFileChecksum, kUnknownFileChecksumFuncName, kNullUniqueId64x2,
390
+ 0);
390
391
 
391
392
  mutex_.Lock();
392
393
  EXPECT_OK(
@@ -525,7 +525,8 @@ Status CompactionOutputs::AddRangeDels(
525
525
  ucmp->CompareWithoutTimestamp(*lower_bound, kv.second) < 0);
526
526
  // Range tombstone is not supported by output validator yet.
527
527
  builder_->Add(kv.first.Encode(), kv.second);
528
- InternalKey smallest_candidate = std::move(kv.first);
528
+ InternalKey tombstone_start = std::move(kv.first);
529
+ InternalKey smallest_candidate{tombstone_start};
529
530
  if (lower_bound != nullptr &&
530
531
  ucmp->CompareWithoutTimestamp(smallest_candidate.user_key(),
531
532
  *lower_bound) <= 0) {
@@ -594,7 +595,8 @@ Status CompactionOutputs::AddRangeDels(
594
595
  smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
595
596
  }
596
597
  }
597
- InternalKey largest_candidate = tombstone.SerializeEndKey();
598
+ InternalKey tombstone_end = tombstone.SerializeEndKey();
599
+ InternalKey largest_candidate{tombstone_end};
598
600
  if (upper_bound != nullptr &&
599
601
  ucmp->CompareWithoutTimestamp(*upper_bound,
600
602
  largest_candidate.user_key()) <= 0) {
@@ -636,6 +638,24 @@ Status CompactionOutputs::AddRangeDels(
636
638
  #endif
637
639
  meta.UpdateBoundariesForRange(smallest_candidate, largest_candidate,
638
640
  tombstone.seq_, icmp);
641
+ if (!bottommost_level) {
642
+ // Range tombstones are truncated at file boundaries
643
+ if (icmp.Compare(tombstone_start, meta.smallest) < 0) {
644
+ tombstone_start = meta.smallest;
645
+ }
646
+ if (icmp.Compare(tombstone_end, meta.largest) > 0) {
647
+ tombstone_end = meta.largest;
648
+ }
649
+ SizeApproximationOptions approx_opts;
650
+ approx_opts.files_size_error_margin = 0.1;
651
+ auto approximate_covered_size =
652
+ compaction_->input_version()->version_set()->ApproximateSize(
653
+ approx_opts, compaction_->input_version(),
654
+ tombstone_start.Encode(), tombstone_end.Encode(),
655
+ compaction_->output_level() + 1 /* start_level */,
656
+ -1 /* end_level */, kCompaction);
657
+ meta.compensated_range_deletion_size += approximate_covered_size;
658
+ }
639
659
  // The smallest key in a file is used for range tombstone truncation, so
640
660
  // it cannot have a seqnum of 0 (unless the smallest data key in a file
641
661
  // has a seqnum of 0). Otherwise, the truncated tombstone may expose
@@ -1126,7 +1126,11 @@ void CompactionPicker::RegisterCompaction(Compaction* c) {
1126
1126
  c->output_level() == 0 ||
1127
1127
  !FilesRangeOverlapWithCompaction(*c->inputs(), c->output_level(),
1128
1128
  c->GetPenultimateLevel()));
1129
- if (c->start_level() == 0 ||
1129
+ // CompactionReason::kExternalSstIngestion's start level is just a placeholder
1130
+ // number without actual meaning as file ingestion technically does not have
1131
+ // an input level like other compactions
1132
+ if ((c->start_level() == 0 &&
1133
+ c->compaction_reason() != CompactionReason::kExternalSstIngestion) ||
1130
1134
  ioptions_.compaction_style == kCompactionStyleUniversal) {
1131
1135
  level0_compactions_in_progress_.insert(c);
1132
1136
  }
@@ -447,21 +447,21 @@ bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() {
447
447
  compaction_inputs_.push_back(output_level_inputs_);
448
448
  }
449
449
 
450
+ // In some edge cases we could pick a compaction that will be compacting
451
+ // a key range that overlap with another running compaction, and both
452
+ // of them have the same output level. This could happen if
453
+ // (1) we are running a non-exclusive manual compaction
454
+ // (2) AddFile ingest a new file into the LSM tree
455
+ // We need to disallow this from happening.
456
+ if (compaction_picker_->FilesRangeOverlapWithCompaction(
457
+ compaction_inputs_, output_level_,
458
+ Compaction::EvaluatePenultimateLevel(
459
+ vstorage_, ioptions_, start_level_, output_level_))) {
460
+ // This compaction output could potentially conflict with the output
461
+ // of a currently running compaction, we cannot run it.
462
+ return false;
463
+ }
450
464
  if (!is_l0_trivial_move_) {
451
- // In some edge cases we could pick a compaction that will be compacting
452
- // a key range that overlap with another running compaction, and both
453
- // of them have the same output level. This could happen if
454
- // (1) we are running a non-exclusive manual compaction
455
- // (2) AddFile ingest a new file into the LSM tree
456
- // We need to disallow this from happening.
457
- if (compaction_picker_->FilesRangeOverlapWithCompaction(
458
- compaction_inputs_, output_level_,
459
- Compaction::EvaluatePenultimateLevel(
460
- vstorage_, ioptions_, start_level_, output_level_))) {
461
- // This compaction output could potentially conflict with the output
462
- // of a currently running compaction, we cannot run it.
463
- return false;
464
- }
465
465
  compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_,
466
466
  output_level_inputs_, &grandparents_);
467
467
  }
@@ -148,7 +148,7 @@ class CompactionPickerTestBase : public testing::Test {
148
148
  smallest_seq, largest_seq, marked_for_compact, temperature,
149
149
  kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
150
150
  kUnknownFileCreationTime, epoch_number, kUnknownFileChecksum,
151
- kUnknownFileChecksumFuncName, kNullUniqueId64x2);
151
+ kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
152
152
  f->compensated_file_size =
153
153
  (compensated_file_size != 0) ? compensated_file_size : file_size;
154
154
  f->oldest_ancester_time = oldest_ancestor_time;
@@ -2873,7 +2873,6 @@ TEST_F(CompactionPickerTest, IntraL0MaxCompactionBytesHit) {
2873
2873
  ASSERT_EQ(0, compaction->output_level());
2874
2874
  }
2875
2875
 
2876
-
2877
2876
  #ifndef ROCKSDB_LITE
2878
2877
  TEST_F(CompactionPickerTest, UniversalMarkedCompactionFullOverlap) {
2879
2878
  const uint64_t kFileSize = 100000;
@@ -1229,7 +1229,7 @@ TEST_P(ChargeFilterConstructionTestWithParam, Basic) {
1229
1229
  *
1230
1230
  * The test is designed in a way such that the reservation for (p1 - b')
1231
1231
  * will trigger at least another dummy entry insertion
1232
- * (or equivelantly to saying, creating another peak).
1232
+ * (or equivalently to saying, creating another peak).
1233
1233
  *
1234
1234
  * kStandard128Ribbon + FullFilter +
1235
1235
  * detect_filter_construct_corruption
@@ -2618,8 +2618,7 @@ TEST_F(DBBloomFilterTest, OptimizeFiltersForHits) {
2618
2618
  BottommostLevelCompaction::kSkip;
2619
2619
  compact_options.change_level = true;
2620
2620
  compact_options.target_level = 7;
2621
- ASSERT_TRUE(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr)
2622
- .IsNotSupported());
2621
+ ASSERT_OK(db_->CompactRange(compact_options, handles_[1], nullptr, nullptr));
2623
2622
 
2624
2623
  ASSERT_EQ(trivial_move, 1);
2625
2624
  ASSERT_EQ(non_trivial_move, 0);
@@ -6245,6 +6245,231 @@ TEST_P(DBCompactionTestWithParam, FixFileIngestionCompactionDeadlock) {
6245
6245
  Close();
6246
6246
  }
6247
6247
 
6248
+ class DBCompactionTestWithOngoingFileIngestionParam
6249
+ : public DBCompactionTest,
6250
+ public testing::WithParamInterface<std::string> {
6251
+ public:
6252
+ DBCompactionTestWithOngoingFileIngestionParam() : DBCompactionTest() {
6253
+ compaction_path_to_test_ = GetParam();
6254
+ }
6255
+ void SetupOptions() {
6256
+ options_ = CurrentOptions();
6257
+ options_.create_if_missing = true;
6258
+
6259
+ if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6260
+ options_.num_levels = 7;
6261
+ } else {
6262
+ options_.num_levels = 3;
6263
+ }
6264
+ options_.compaction_style = CompactionStyle::kCompactionStyleLevel;
6265
+ if (compaction_path_to_test_ == "AutoCompaction") {
6266
+ options_.disable_auto_compactions = false;
6267
+ options_.level0_file_num_compaction_trigger = 1;
6268
+ } else {
6269
+ options_.disable_auto_compactions = true;
6270
+ }
6271
+ }
6272
+
6273
+ void PauseCompactionThread() {
6274
+ sleeping_task_.reset(new test::SleepingBackgroundTask());
6275
+ env_->SetBackgroundThreads(1, Env::LOW);
6276
+ env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
6277
+ sleeping_task_.get(), Env::Priority::LOW);
6278
+ sleeping_task_->WaitUntilSleeping();
6279
+ }
6280
+
6281
+ void ResumeCompactionThread() {
6282
+ if (sleeping_task_) {
6283
+ sleeping_task_->WakeUp();
6284
+ sleeping_task_->WaitUntilDone();
6285
+ }
6286
+ }
6287
+
6288
+ void SetupFilesToForceFutureFilesIngestedToCertainLevel() {
6289
+ SstFileWriter sst_file_writer(EnvOptions(), options_);
6290
+ std::string dummy = dbname_ + "/dummy.sst";
6291
+ ASSERT_OK(sst_file_writer.Open(dummy));
6292
+ ASSERT_OK(sst_file_writer.Put("k2", "dummy"));
6293
+ ASSERT_OK(sst_file_writer.Finish());
6294
+ ASSERT_OK(db_->IngestExternalFile({dummy}, IngestExternalFileOptions()));
6295
+ // L2 is made to contain a file overlapped with files to be ingested in
6296
+ // later steps on key "k2". This will force future files ingested to L1 or
6297
+ // above.
6298
+ ASSERT_EQ("0,0,1", FilesPerLevel(0));
6299
+ }
6300
+
6301
+ void SetupSyncPoints() {
6302
+ if (compaction_path_to_test_ == "AutoCompaction") {
6303
+ SyncPoint::GetInstance()->SetCallBack(
6304
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6305
+ SyncPoint::GetInstance()->LoadDependency(
6306
+ {{"DBImpl::BackgroundCompaction():AfterPickCompaction",
6307
+ "VersionSet::LogAndApply:WriteManifest"}});
6308
+ });
6309
+ } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") {
6310
+ SyncPoint::GetInstance()->SetCallBack(
6311
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6312
+ SyncPoint::GetInstance()->LoadDependency(
6313
+ {{"ColumnFamilyData::CompactRange:Return",
6314
+ "VersionSet::LogAndApply:WriteManifest"}});
6315
+ });
6316
+ } else if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6317
+ SyncPoint::GetInstance()->SetCallBack(
6318
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6319
+ SyncPoint::GetInstance()->LoadDependency(
6320
+ {{"DBImpl::CompactRange:PostRefitLevel",
6321
+ "VersionSet::LogAndApply:WriteManifest"}});
6322
+ });
6323
+ } else if (compaction_path_to_test_ == "CompactFiles") {
6324
+ SyncPoint::GetInstance()->SetCallBack(
6325
+ "ExternalSstFileIngestionJob::Run", [&](void*) {
6326
+ SyncPoint::GetInstance()->LoadDependency(
6327
+ {{"DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles",
6328
+ "VersionSet::LogAndApply:WriteManifest"}});
6329
+ });
6330
+ } else {
6331
+ assert(false);
6332
+ }
6333
+ SyncPoint::GetInstance()->LoadDependency(
6334
+ {{"ExternalSstFileIngestionJob::Run", "PreCompaction"}});
6335
+ SyncPoint::GetInstance()->EnableProcessing();
6336
+ }
6337
+
6338
+ void RunCompactionOverlappedWithFileIngestion() {
6339
+ if (compaction_path_to_test_ == "AutoCompaction") {
6340
+ TEST_SYNC_POINT("PreCompaction");
6341
+ ResumeCompactionThread();
6342
+ // Without proper range conflict check,
6343
+ // this would have been `Status::Corruption` about overlapping ranges
6344
+ Status s = dbfull()->TEST_WaitForCompact();
6345
+ EXPECT_OK(s);
6346
+ } else if (compaction_path_to_test_ == "NonRefitLevelCompactRange") {
6347
+ CompactRangeOptions cro;
6348
+ cro.change_level = false;
6349
+ std::string start_key = "k1";
6350
+ Slice start(start_key);
6351
+ std::string end_key = "k4";
6352
+ Slice end(end_key);
6353
+ TEST_SYNC_POINT("PreCompaction");
6354
+ // Without proper range conflict check,
6355
+ // this would have been `Status::Corruption` about overlapping ranges
6356
+ Status s = dbfull()->CompactRange(cro, &start, &end);
6357
+ EXPECT_OK(s);
6358
+ } else if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6359
+ CompactRangeOptions cro;
6360
+ cro.change_level = true;
6361
+ cro.target_level = 5;
6362
+ std::string start_key = "k1";
6363
+ Slice start(start_key);
6364
+ std::string end_key = "k4";
6365
+ Slice end(end_key);
6366
+ TEST_SYNC_POINT("PreCompaction");
6367
+ Status s = dbfull()->CompactRange(cro, &start, &end);
6368
+ // Without proper range conflict check,
6369
+ // this would have been `Status::Corruption` about overlapping ranges
6370
+ // To see this, remove the fix AND replace
6371
+ // `DBImpl::CompactRange:PostRefitLevel` in sync point dependency with
6372
+ // `DBImpl::ReFitLevel:PostRegisterCompaction`
6373
+ EXPECT_TRUE(s.IsNotSupported());
6374
+ EXPECT_TRUE(s.ToString().find("some ongoing compaction's output") !=
6375
+ std::string::npos);
6376
+ } else if (compaction_path_to_test_ == "CompactFiles") {
6377
+ ColumnFamilyMetaData cf_meta_data;
6378
+ db_->GetColumnFamilyMetaData(&cf_meta_data);
6379
+ ASSERT_EQ(cf_meta_data.levels[0].files.size(), 1);
6380
+ std::vector<std::string> input_files;
6381
+ for (const auto& file : cf_meta_data.levels[0].files) {
6382
+ input_files.push_back(file.name);
6383
+ }
6384
+ TEST_SYNC_POINT("PreCompaction");
6385
+ Status s = db_->CompactFiles(CompactionOptions(), input_files, 1);
6386
+ // Without proper range conflict check,
6387
+ // this would have been `Status::Corruption` about overlapping ranges
6388
+ EXPECT_TRUE(s.IsAborted());
6389
+ EXPECT_TRUE(
6390
+ s.ToString().find(
6391
+ "A running compaction is writing to the same output level") !=
6392
+ std::string::npos);
6393
+ } else {
6394
+ assert(false);
6395
+ }
6396
+ }
6397
+
6398
+ void DisableSyncPoints() {
6399
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
6400
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
6401
+ }
6402
+
6403
+ protected:
6404
+ std::string compaction_path_to_test_;
6405
+ Options options_;
6406
+ std::shared_ptr<test::SleepingBackgroundTask> sleeping_task_;
6407
+ };
6408
+
6409
+ INSTANTIATE_TEST_CASE_P(DBCompactionTestWithOngoingFileIngestionParam,
6410
+ DBCompactionTestWithOngoingFileIngestionParam,
6411
+ ::testing::Values("AutoCompaction",
6412
+ "NonRefitLevelCompactRange",
6413
+ "RefitLevelCompactRange",
6414
+ "CompactFiles"));
6415
+
6416
+ TEST_P(DBCompactionTestWithOngoingFileIngestionParam, RangeConflictCheck) {
6417
+ SetupOptions();
6418
+ DestroyAndReopen(options_);
6419
+
6420
+ if (compaction_path_to_test_ == "AutoCompaction") {
6421
+ PauseCompactionThread();
6422
+ }
6423
+
6424
+ if (compaction_path_to_test_ != "RefitLevelCompactRange") {
6425
+ SetupFilesToForceFutureFilesIngestedToCertainLevel();
6426
+ }
6427
+
6428
+ // Create s1
6429
+ ASSERT_OK(Put("k1", "v"));
6430
+ ASSERT_OK(Put("k4", "v"));
6431
+ ASSERT_OK(Flush());
6432
+ if (compaction_path_to_test_ == "RefitLevelCompactRange") {
6433
+ MoveFilesToLevel(6 /* level */);
6434
+ ASSERT_EQ("0,0,0,0,0,0,1", FilesPerLevel(0));
6435
+ } else {
6436
+ ASSERT_EQ("1,0,1", FilesPerLevel(0));
6437
+ }
6438
+
6439
+ // To coerce following sequence of events
6440
+ // Timeline Thread 1 (Ingest s2) Thread 2 (Compact s1)
6441
+ // t0 | Decide to output to Lk
6442
+ // t1 | Release lock in LogAndApply()
6443
+ // t2 | Acquire lock
6444
+ // t3 | Decides to compact to Lk
6445
+ // | Expected to fail due to range
6446
+ // | conflict check with file
6447
+ // | ingestion
6448
+ // t4 | Release lock in LogAndApply()
6449
+ // t5 | Acquire lock again and finish
6450
+ // t6 | Acquire lock again and finish
6451
+ SetupSyncPoints();
6452
+
6453
+ // Ingest s2
6454
+ port::Thread thread1([&] {
6455
+ SstFileWriter sst_file_writer(EnvOptions(), options_);
6456
+ std::string s2 = dbname_ + "/ingested_s2.sst";
6457
+ ASSERT_OK(sst_file_writer.Open(s2));
6458
+ ASSERT_OK(sst_file_writer.Put("k2", "v2"));
6459
+ ASSERT_OK(sst_file_writer.Put("k3", "v2"));
6460
+ ASSERT_OK(sst_file_writer.Finish());
6461
+ ASSERT_OK(db_->IngestExternalFile({s2}, IngestExternalFileOptions()));
6462
+ });
6463
+
6464
+ // Compact s1. Without proper range conflict check,
6465
+ // this will encounter overlapping file corruption.
6466
+ port::Thread thread2([&] { RunCompactionOverlappedWithFileIngestion(); });
6467
+
6468
+ thread1.join();
6469
+ thread2.join();
6470
+ DisableSyncPoints();
6471
+ }
6472
+
6248
6473
  TEST_F(DBCompactionTest, ConsistencyFailTest) {
6249
6474
  Options options = CurrentOptions();
6250
6475
  options.force_consistency_checks = true;
@@ -5199,8 +5199,9 @@ Status DBImpl::IngestExternalFiles(
5199
5199
  for (const auto& arg : args) {
5200
5200
  auto* cfd = static_cast<ColumnFamilyHandleImpl*>(arg.column_family)->cfd();
5201
5201
  ingestion_jobs.emplace_back(versions_.get(), cfd, immutable_db_options_,
5202
- file_options_, &snapshots_, arg.options,
5203
- &directories_, &event_logger_, io_tracer_);
5202
+ mutable_db_options_, file_options_, &snapshots_,
5203
+ arg.options, &directories_, &event_logger_,
5204
+ io_tracer_);
5204
5205
  }
5205
5206
 
5206
5207
  // TODO(yanqin) maybe make jobs run in parallel
@@ -5333,6 +5334,7 @@ Status DBImpl::IngestExternalFiles(
5333
5334
  if (!status.ok()) {
5334
5335
  break;
5335
5336
  }
5337
+ ingestion_jobs[i].RegisterRange();
5336
5338
  }
5337
5339
  }
5338
5340
  if (status.ok()) {
@@ -5388,6 +5390,10 @@ Status DBImpl::IngestExternalFiles(
5388
5390
  }
5389
5391
  }
5390
5392
 
5393
+ for (auto& job : ingestion_jobs) {
5394
+ job.UnregisterRange();
5395
+ }
5396
+
5391
5397
  if (status.ok()) {
5392
5398
  for (size_t i = 0; i != num_cfs; ++i) {
5393
5399
  auto* cfd =
@@ -5759,13 +5765,6 @@ void DBImpl::NotifyOnExternalFileIngested(
5759
5765
  }
5760
5766
  }
5761
5767
 
5762
- void DBImpl::WaitForIngestFile() {
5763
- mutex_.AssertHeld();
5764
- while (num_running_ingest_file_ > 0) {
5765
- bg_cv_.Wait();
5766
- }
5767
- }
5768
-
5769
5768
  Status DBImpl::StartTrace(const TraceOptions& trace_options,
5770
5769
  std::unique_ptr<TraceWriter>&& trace_writer) {
5771
5770
  InstrumentedMutexLock lock(&trace_mutex_);
@@ -2023,14 +2023,6 @@ class DBImpl : public DB {
2023
2023
  const int output_level, int output_path_id,
2024
2024
  JobContext* job_context, LogBuffer* log_buffer,
2025
2025
  CompactionJobInfo* compaction_job_info);
2026
-
2027
- // Wait for current IngestExternalFile() calls to finish.
2028
- // REQUIRES: mutex_ held
2029
- void WaitForIngestFile();
2030
- #else
2031
- // IngestExternalFile is not supported in ROCKSDB_LITE so this function
2032
- // will be no-op
2033
- void WaitForIngestFile() {}
2034
2026
  #endif // ROCKSDB_LITE
2035
2027
 
2036
2028
  ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);