@nxtedition/rocksdb 7.0.12 → 7.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
  2. package/deps/rocksdb/rocksdb/Makefile +3 -0
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
  5. package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
  6. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
  7. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
  8. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
  9. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
  11. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
  13. package/deps/rocksdb/rocksdb/db/c.cc +159 -5
  14. package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
  15. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
  16. package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
  24. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
  26. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
  36. package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
  37. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
  38. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
  39. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
  41. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
  42. package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
  43. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  44. package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
  45. package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
  46. package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
  47. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
  49. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  50. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
  51. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
  52. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
  53. package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
  54. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  55. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  56. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
  59. package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
  60. package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
  61. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  62. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
  63. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  66. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
  68. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  69. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
  70. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
  72. package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
  73. package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
  74. package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
  75. package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
  76. package/deps/rocksdb/rocksdb/options/options.cc +4 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
  78. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  79. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
  80. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
  81. package/deps/rocksdb/rocksdb/src.mk +1 -0
  82. package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
  88. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
  90. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
  91. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
  92. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
  93. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
  94. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
  95. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
  96. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
  97. package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
  98. package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
  99. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
  100. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
  101. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
  102. package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
  103. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  104. package/package.json +1 -1
  105. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -2467,35 +2467,37 @@ TEST_P(MultiGetPrefixExtractorTest, Batched) {
2467
2467
  SetPerfLevel(kEnableCount);
2468
2468
  get_perf_context()->Reset();
2469
2469
 
2470
- // First key is not in the prefix_extractor domain
2471
2470
  ASSERT_OK(Put("k", "v0"));
2472
2471
  ASSERT_OK(Put("kk1", "v1"));
2473
2472
  ASSERT_OK(Put("kk2", "v2"));
2474
2473
  ASSERT_OK(Put("kk3", "v3"));
2475
2474
  ASSERT_OK(Put("kk4", "v4"));
2476
- std::vector<std::string> mem_keys(
2475
+ std::vector<std::string> keys(
2477
2476
  {"k", "kk1", "kk2", "kk3", "kk4", "rofl", "lmho"});
2478
- std::vector<std::string> inmem_values;
2479
- inmem_values = MultiGet(mem_keys, nullptr);
2480
- ASSERT_EQ(inmem_values[0], "v0");
2481
- ASSERT_EQ(inmem_values[1], "v1");
2482
- ASSERT_EQ(inmem_values[2], "v2");
2483
- ASSERT_EQ(inmem_values[3], "v3");
2484
- ASSERT_EQ(inmem_values[4], "v4");
2477
+ std::vector<std::string> expected(
2478
+ {"v0", "v1", "v2", "v3", "v4", "NOT_FOUND", "NOT_FOUND"});
2479
+ std::vector<std::string> values;
2480
+ values = MultiGet(keys, nullptr);
2481
+ ASSERT_EQ(values, expected);
2482
+ // One key ("k") is not queried against the filter because it is outside
2483
+ // the prefix_extractor domain, leaving 6 keys with queried prefixes.
2485
2484
  ASSERT_EQ(get_perf_context()->bloom_memtable_miss_count, 2);
2486
- ASSERT_EQ(get_perf_context()->bloom_memtable_hit_count, 5);
2485
+ ASSERT_EQ(get_perf_context()->bloom_memtable_hit_count, 4);
2487
2486
  ASSERT_OK(Flush());
2488
2487
 
2489
- std::vector<std::string> keys({"k", "kk1", "kk2", "kk3", "kk4"});
2490
- std::vector<std::string> values;
2491
2488
  get_perf_context()->Reset();
2492
2489
  values = MultiGet(keys, nullptr);
2493
- ASSERT_EQ(values[0], "v0");
2494
- ASSERT_EQ(values[1], "v1");
2495
- ASSERT_EQ(values[2], "v2");
2496
- ASSERT_EQ(values[3], "v3");
2497
- ASSERT_EQ(values[4], "v4");
2498
- // Filter hits for 4 in-domain keys
2490
+ ASSERT_EQ(values, expected);
2491
+ ASSERT_EQ(get_perf_context()->bloom_sst_miss_count, 2);
2492
+ ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4);
2493
+
2494
+ // Also check Get stat
2495
+ get_perf_context()->Reset();
2496
+ for (size_t i = 0; i < keys.size(); ++i) {
2497
+ values[i] = Get(keys[i]);
2498
+ }
2499
+ ASSERT_EQ(values, expected);
2500
+ ASSERT_EQ(get_perf_context()->bloom_sst_miss_count, 2);
2499
2501
  ASSERT_EQ(get_perf_context()->bloom_sst_hit_count, 4);
2500
2502
  }
2501
2503
 
@@ -4143,7 +4145,7 @@ TEST_F(DBBasicTest, FailOpenIfLoggerCreationFail) {
4143
4145
 
4144
4146
  Status s = TryReopen(options);
4145
4147
  ASSERT_EQ(nullptr, options.info_log);
4146
- ASSERT_TRUE(s.IsAborted());
4148
+ ASSERT_TRUE(s.IsIOError());
4147
4149
 
4148
4150
  SyncPoint::GetInstance()->DisableProcessing();
4149
4151
  SyncPoint::GetInstance()->ClearAllCallBacks();
@@ -5225,7 +5225,8 @@ INSTANTIATE_TEST_CASE_P(
5225
5225
  TEST_F(DBCompactionTest, PersistRoundRobinCompactCursor) {
5226
5226
  Options options = CurrentOptions();
5227
5227
  options.write_buffer_size = 16 * 1024;
5228
- options.max_bytes_for_level_base = 64 * 1024;
5228
+ options.max_bytes_for_level_base = 128 * 1024;
5229
+ options.target_file_size_base = 64 * 1024;
5229
5230
  options.level0_file_num_compaction_trigger = 4;
5230
5231
  options.compaction_pri = CompactionPri::kRoundRobin;
5231
5232
  options.max_bytes_for_level_multiplier = 4;
@@ -5241,6 +5242,7 @@ TEST_F(DBCompactionTest, PersistRoundRobinCompactCursor) {
5241
5242
  for (int j = 0; j < 16; j++) {
5242
5243
  ASSERT_OK(Put(rnd.RandomString(24), rnd.RandomString(1000)));
5243
5244
  }
5245
+ ASSERT_OK(Flush());
5244
5246
  }
5245
5247
 
5246
5248
  ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -5290,6 +5292,63 @@ TEST_F(DBCompactionTest, PersistRoundRobinCompactCursor) {
5290
5292
  }
5291
5293
  }
5292
5294
 
5295
+ TEST_F(DBCompactionTest, RoundRobinCutOutputAtCompactCursor) {
5296
+ Options options = CurrentOptions();
5297
+ options.num_levels = 3;
5298
+ options.compression = kNoCompression;
5299
+ options.write_buffer_size = 4 * 1024;
5300
+ options.max_bytes_for_level_base = 64 * 1024;
5301
+ options.max_bytes_for_level_multiplier = 4;
5302
+ options.level0_file_num_compaction_trigger = 4;
5303
+ options.compaction_pri = CompactionPri::kRoundRobin;
5304
+
5305
+ DestroyAndReopen(options);
5306
+
5307
+ VersionSet* const versions = dbfull()->GetVersionSet();
5308
+ assert(versions);
5309
+
5310
+ ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
5311
+ ASSERT_NE(cfd, nullptr);
5312
+
5313
+ Version* const current = cfd->current();
5314
+ ASSERT_NE(current, nullptr);
5315
+
5316
+ VersionStorageInfo* storage_info = current->storage_info();
5317
+ ASSERT_NE(storage_info, nullptr);
5318
+
5319
+ const InternalKey split_cursor = InternalKey(Key(600), 100, kTypeValue);
5320
+ storage_info->AddCursorForOneLevel(2, split_cursor);
5321
+
5322
+ Random rnd(301);
5323
+
5324
+ for (int i = 0; i < 50; i++) {
5325
+ for (int j = 0; j < 50; j++) {
5326
+ ASSERT_OK(Put(Key(j * 2 + i * 100), rnd.RandomString(102)));
5327
+ }
5328
+ }
5329
+ // Add more overlapping files (avoid trivial move) to trigger compaction that
5330
+ // output files in L2. Note that trivial move does not trigger compaction and
5331
+ // in that case the cursor is not necessarily the boundary of file.
5332
+ for (int i = 0; i < 50; i++) {
5333
+ for (int j = 0; j < 50; j++) {
5334
+ ASSERT_OK(Put(Key(j * 2 + 1 + i * 100), rnd.RandomString(1014)));
5335
+ }
5336
+ }
5337
+
5338
+ ASSERT_OK(dbfull()->TEST_WaitForCompact());
5339
+
5340
+ std::vector<std::vector<FileMetaData>> level_to_files;
5341
+ dbfull()->TEST_GetFilesMetaData(dbfull()->DefaultColumnFamily(),
5342
+ &level_to_files);
5343
+ const auto icmp = cfd->current()->storage_info()->InternalComparator();
5344
+ // Files in level 2 should be split by the cursor
5345
+ for (const auto& file : level_to_files[2]) {
5346
+ ASSERT_TRUE(
5347
+ icmp->Compare(file.smallest.Encode(), split_cursor.Encode()) >= 0 ||
5348
+ icmp->Compare(file.largest.Encode(), split_cursor.Encode()) < 0);
5349
+ }
5350
+ }
5351
+
5293
5352
  class NoopMergeOperator : public MergeOperator {
5294
5353
  public:
5295
5354
  NoopMergeOperator() {}
@@ -777,13 +777,25 @@ TEST_F(DBFlushTest, MemPurgeBasic) {
777
777
 
778
778
  // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes).
779
779
  options.write_buffer_size = 1 << 20;
780
- // Activate the MemPurge prototype.
781
- options.experimental_mempurge_threshold = 1.0;
782
780
  #ifndef ROCKSDB_LITE
781
+ // Initially deactivate the MemPurge prototype.
782
+ options.experimental_mempurge_threshold = 0.0;
783
783
  TestFlushListener* listener = new TestFlushListener(options.env, this);
784
784
  options.listeners.emplace_back(listener);
785
+ #else
786
+ // Activate directly the MemPurge prototype.
787
+ // (RocksDB lite does not support dynamic options)
788
+ options.experimental_mempurge_threshold = 1.0;
785
789
  #endif // !ROCKSDB_LITE
786
790
  ASSERT_OK(TryReopen(options));
791
+
792
+ // RocksDB lite does not support dynamic options
793
+ #ifndef ROCKSDB_LITE
794
+ // Dynamically activate the MemPurge prototype without restarting the DB.
795
+ ColumnFamilyHandle* cfh = db_->DefaultColumnFamily();
796
+ ASSERT_OK(db_->SetOptions(cfh, {{"experimental_mempurge_threshold", "1.0"}}));
797
+ #endif
798
+
787
799
  std::atomic<uint32_t> mempurge_count{0};
788
800
  std::atomic<uint32_t> sst_count{0};
789
801
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
@@ -914,6 +926,234 @@ TEST_F(DBFlushTest, MemPurgeBasic) {
914
926
  Close();
915
927
  }
916
928
 
929
+ // RocksDB lite does not support dynamic options
930
+ #ifndef ROCKSDB_LITE
931
+ TEST_F(DBFlushTest, MemPurgeBasicToggle) {
932
+ Options options = CurrentOptions();
933
+
934
+ // The following options are used to enforce several values that
935
+ // may already exist as default values to make this test resilient
936
+ // to default value updates in the future.
937
+ options.statistics = CreateDBStatistics();
938
+
939
+ // Record all statistics.
940
+ options.statistics->set_stats_level(StatsLevel::kAll);
941
+
942
+ // create the DB if it's not already present
943
+ options.create_if_missing = true;
944
+
945
+ // Useful for now as we are trying to compare uncompressed data savings on
946
+ // flush().
947
+ options.compression = kNoCompression;
948
+
949
+ // Prevent memtable in place updates. Should already be disabled
950
+ // (from Wiki:
951
+ // In place updates can be enabled by toggling on the bool
952
+ // inplace_update_support flag. However, this flag is by default set to
953
+ // false
954
+ // because this thread-safe in-place update support is not compatible
955
+ // with concurrent memtable writes. Note that the bool
956
+ // allow_concurrent_memtable_write is set to true by default )
957
+ options.inplace_update_support = false;
958
+ options.allow_concurrent_memtable_write = true;
959
+
960
+ // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes).
961
+ options.write_buffer_size = 1 << 20;
962
+ // Initially deactivate the MemPurge prototype.
963
+ // (negative values are equivalent to 0.0).
964
+ options.experimental_mempurge_threshold = -25.3;
965
+ TestFlushListener* listener = new TestFlushListener(options.env, this);
966
+ options.listeners.emplace_back(listener);
967
+
968
+ ASSERT_OK(TryReopen(options));
969
+ // Dynamically activate the MemPurge prototype without restarting the DB.
970
+ ColumnFamilyHandle* cfh = db_->DefaultColumnFamily();
971
+ // Values greater than 1.0 are equivalent to 1.0
972
+ ASSERT_OK(
973
+ db_->SetOptions(cfh, {{"experimental_mempurge_threshold", "3.7898"}}));
974
+ std::atomic<uint32_t> mempurge_count{0};
975
+ std::atomic<uint32_t> sst_count{0};
976
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
977
+ "DBImpl::FlushJob:MemPurgeSuccessful",
978
+ [&](void* /*arg*/) { mempurge_count++; });
979
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
980
+ "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; });
981
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
982
+ const size_t KVSIZE = 3;
983
+ std::vector<std::string> KEYS(KVSIZE);
984
+ for (size_t k = 0; k < KVSIZE; k++) {
985
+ KEYS[k] = "IamKey" + std::to_string(k);
986
+ }
987
+
988
+ std::vector<std::string> RNDVALS(KVSIZE);
989
+ const std::string NOT_FOUND = "NOT_FOUND";
990
+
991
+ // Heavy overwrite workload,
992
+ // more than would fit in maximum allowed memtables.
993
+ Random rnd(719);
994
+ const size_t NUM_REPEAT = 100;
995
+ const size_t RAND_VALUES_LENGTH = 10240;
996
+
997
+ // Insertion of of K-V pairs, multiple times (overwrites).
998
+ for (size_t i = 0; i < NUM_REPEAT; i++) {
999
+ for (size_t j = 0; j < KEYS.size(); j++) {
1000
+ RNDVALS[j] = rnd.RandomString(RAND_VALUES_LENGTH);
1001
+ ASSERT_OK(Put(KEYS[j], RNDVALS[j]));
1002
+ ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]);
1003
+ }
1004
+ for (size_t j = 0; j < KEYS.size(); j++) {
1005
+ ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]);
1006
+ }
1007
+ }
1008
+
1009
+ // Check that there was at least one mempurge
1010
+ const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 1;
1011
+ // Check that there was no SST files created during flush.
1012
+ const uint32_t EXPECTED_SST_COUNT = 0;
1013
+
1014
+ EXPECT_GE(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT);
1015
+ EXPECT_EQ(sst_count.exchange(0), EXPECTED_SST_COUNT);
1016
+
1017
+ // Dynamically deactivate MemPurge.
1018
+ ASSERT_OK(
1019
+ db_->SetOptions(cfh, {{"experimental_mempurge_threshold", "-1023.0"}}));
1020
+
1021
+ // Insertion of of K-V pairs, multiple times (overwrites).
1022
+ for (size_t i = 0; i < NUM_REPEAT; i++) {
1023
+ for (size_t j = 0; j < KEYS.size(); j++) {
1024
+ RNDVALS[j] = rnd.RandomString(RAND_VALUES_LENGTH);
1025
+ ASSERT_OK(Put(KEYS[j], RNDVALS[j]));
1026
+ ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]);
1027
+ }
1028
+ for (size_t j = 0; j < KEYS.size(); j++) {
1029
+ ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]);
1030
+ }
1031
+ }
1032
+
1033
+ // Check that there was at least one mempurge
1034
+ const uint32_t ZERO = 0;
1035
+ // Assert that at least one flush to storage has been performed
1036
+ EXPECT_GT(sst_count.exchange(0), EXPECTED_SST_COUNT);
1037
+ // The mempurge count is expected to be set to 0 when the options are updated.
1038
+ // We expect no mempurge at all.
1039
+ EXPECT_EQ(mempurge_count.exchange(0), ZERO);
1040
+
1041
+ Close();
1042
+ }
1043
+ // Closes the "#ifndef ROCKSDB_LITE"
1044
+ // End of MemPurgeBasicToggle, which is not
1045
+ // supported with RocksDB LITE because it
1046
+ // relies on dynamically changing the option
1047
+ // flag experimental_mempurge_threshold.
1048
+ #endif
1049
+
1050
+ // At the moment, MemPurge feature is deactivated
1051
+ // when atomic_flush is enabled. This is because the level
1052
+ // of garbage between Column Families is not guaranteed to
1053
+ // be consistent, therefore a CF could hypothetically
1054
+ // trigger a MemPurge while another CF would trigger
1055
+ // a regular Flush.
1056
+ TEST_F(DBFlushTest, MemPurgeWithAtomicFlush) {
1057
+ Options options = CurrentOptions();
1058
+
1059
+ // The following options are used to enforce several values that
1060
+ // may already exist as default values to make this test resilient
1061
+ // to default value updates in the future.
1062
+ options.statistics = CreateDBStatistics();
1063
+
1064
+ // Record all statistics.
1065
+ options.statistics->set_stats_level(StatsLevel::kAll);
1066
+
1067
+ // create the DB if it's not already present
1068
+ options.create_if_missing = true;
1069
+
1070
+ // Useful for now as we are trying to compare uncompressed data savings on
1071
+ // flush().
1072
+ options.compression = kNoCompression;
1073
+
1074
+ // Prevent memtable in place updates. Should already be disabled
1075
+ // (from Wiki:
1076
+ // In place updates can be enabled by toggling on the bool
1077
+ // inplace_update_support flag. However, this flag is by default set to
1078
+ // false
1079
+ // because this thread-safe in-place update support is not compatible
1080
+ // with concurrent memtable writes. Note that the bool
1081
+ // allow_concurrent_memtable_write is set to true by default )
1082
+ options.inplace_update_support = false;
1083
+ options.allow_concurrent_memtable_write = true;
1084
+
1085
+ // Enforce size of a single MemTable to 64KB (64KB = 65,536 bytes).
1086
+ options.write_buffer_size = 1 << 20;
1087
+ // Activate the MemPurge prototype.
1088
+ options.experimental_mempurge_threshold = 153.245;
1089
+ // Activate atomic_flush.
1090
+ options.atomic_flush = true;
1091
+
1092
+ const std::vector<std::string> new_cf_names = {"pikachu", "eevie"};
1093
+ CreateColumnFamilies(new_cf_names, options);
1094
+
1095
+ Close();
1096
+
1097
+ // 3 CFs: default will be filled with overwrites (would normally trigger
1098
+ // mempurge)
1099
+ // new_cf_names[1] will be filled with random values (would trigger
1100
+ // flush) new_cf_names[2] not filled with anything.
1101
+ ReopenWithColumnFamilies(
1102
+ {kDefaultColumnFamilyName, new_cf_names[0], new_cf_names[1]}, options);
1103
+ size_t num_cfs = handles_.size();
1104
+ ASSERT_EQ(3, num_cfs);
1105
+ ASSERT_OK(Put(1, "foo", "bar"));
1106
+ ASSERT_OK(Put(2, "bar", "baz"));
1107
+
1108
+ std::atomic<uint32_t> mempurge_count{0};
1109
+ std::atomic<uint32_t> sst_count{0};
1110
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1111
+ "DBImpl::FlushJob:MemPurgeSuccessful",
1112
+ [&](void* /*arg*/) { mempurge_count++; });
1113
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1114
+ "DBImpl::FlushJob:SSTFileCreated", [&](void* /*arg*/) { sst_count++; });
1115
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1116
+
1117
+ const size_t KVSIZE = 3;
1118
+ std::vector<std::string> KEYS(KVSIZE);
1119
+ for (size_t k = 0; k < KVSIZE; k++) {
1120
+ KEYS[k] = "IamKey" + std::to_string(k);
1121
+ }
1122
+
1123
+ std::string RNDKEY;
1124
+ std::vector<std::string> RNDVALS(KVSIZE);
1125
+ const std::string NOT_FOUND = "NOT_FOUND";
1126
+
1127
+ // Heavy overwrite workload,
1128
+ // more than would fit in maximum allowed memtables.
1129
+ Random rnd(106);
1130
+ const size_t NUM_REPEAT = 100;
1131
+ const size_t RAND_KEY_LENGTH = 128;
1132
+ const size_t RAND_VALUES_LENGTH = 10240;
1133
+
1134
+ // Insertion of of K-V pairs, multiple times (overwrites).
1135
+ for (size_t i = 0; i < NUM_REPEAT; i++) {
1136
+ for (size_t j = 0; j < KEYS.size(); j++) {
1137
+ RNDKEY = rnd.RandomString(RAND_KEY_LENGTH);
1138
+ RNDVALS[j] = rnd.RandomString(RAND_VALUES_LENGTH);
1139
+ ASSERT_OK(Put(KEYS[j], RNDVALS[j]));
1140
+ ASSERT_OK(Put(1, RNDKEY, RNDVALS[j]));
1141
+ ASSERT_EQ(Get(KEYS[j]), RNDVALS[j]);
1142
+ ASSERT_EQ(Get(1, RNDKEY), RNDVALS[j]);
1143
+ }
1144
+ }
1145
+
1146
+ // Check that there was no mempurge because atomic_flush option is true.
1147
+ const uint32_t EXPECTED_MIN_MEMPURGE_COUNT = 0;
1148
+ // Check that there was at least one SST files created during flush.
1149
+ const uint32_t EXPECTED_SST_COUNT = 1;
1150
+
1151
+ EXPECT_EQ(mempurge_count.exchange(0), EXPECTED_MIN_MEMPURGE_COUNT);
1152
+ EXPECT_GE(sst_count.exchange(0), EXPECTED_SST_COUNT);
1153
+
1154
+ Close();
1155
+ }
1156
+
917
1157
  TEST_F(DBFlushTest, MemPurgeDeleteAndDeleteRange) {
918
1158
  Options options = CurrentOptions();
919
1159
 
@@ -930,7 +1170,7 @@ TEST_F(DBFlushTest, MemPurgeDeleteAndDeleteRange) {
930
1170
  // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes).
931
1171
  options.write_buffer_size = 1 << 20;
932
1172
  // Activate the MemPurge prototype.
933
- options.experimental_mempurge_threshold = 1.0;
1173
+ options.experimental_mempurge_threshold = 15.0;
934
1174
 
935
1175
  ASSERT_OK(TryReopen(options));
936
1176
 
@@ -1137,7 +1377,7 @@ TEST_F(DBFlushTest, MemPurgeAndCompactionFilter) {
1137
1377
  // Enforce size of a single MemTable to 64MB (64MB = 67108864 bytes).
1138
1378
  options.write_buffer_size = 1 << 20;
1139
1379
  // Activate the MemPurge prototype.
1140
- options.experimental_mempurge_threshold = 1.0;
1380
+ options.experimental_mempurge_threshold = 26.55;
1141
1381
 
1142
1382
  ASSERT_OK(TryReopen(options));
1143
1383
 
@@ -1212,8 +1452,9 @@ TEST_F(DBFlushTest, DISABLED_MemPurgeWALSupport) {
1212
1452
 
1213
1453
  // Enforce size of a single MemTable to 128KB.
1214
1454
  options.write_buffer_size = 128 << 10;
1215
- // Activate the MemPurge prototype.
1216
- options.experimental_mempurge_threshold = 1.0;
1455
+ // Activate the MemPurge prototype
1456
+ // (values >1.0 are equivalent to 1.0).
1457
+ options.experimental_mempurge_threshold = 2.5;
1217
1458
 
1218
1459
  ASSERT_OK(TryReopen(options));
1219
1460
 
@@ -54,12 +54,22 @@ class CompactedDBImpl : public DBImpl {
54
54
  const Slice& /*key*/, const Slice& /*value*/) override {
55
55
  return Status::NotSupported("Not supported in compacted db mode.");
56
56
  }
57
+
58
+ using DBImpl::PutEntity;
59
+ Status PutEntity(const WriteOptions& /* options */,
60
+ ColumnFamilyHandle* /* column_family */,
61
+ const Slice& /* key */,
62
+ const WideColumns& /* columns */) override {
63
+ return Status::NotSupported("Not supported in compacted db mode.");
64
+ }
65
+
57
66
  using DBImpl::Merge;
58
67
  virtual Status Merge(const WriteOptions& /*options*/,
59
68
  ColumnFamilyHandle* /*column_family*/,
60
69
  const Slice& /*key*/, const Slice& /*value*/) override {
61
70
  return Status::NotSupported("Not supported in compacted db mode.");
62
71
  }
72
+
63
73
  using DBImpl::Delete;
64
74
  virtual Status Delete(const WriteOptions& /*options*/,
65
75
  ColumnFamilyHandle* /*column_family*/,
@@ -156,7 +156,9 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
156
156
  bool read_only)
157
157
  : dbname_(dbname),
158
158
  own_info_log_(options.info_log == nullptr),
159
- initial_db_options_(SanitizeOptions(dbname, options, read_only)),
159
+ init_logger_creation_s_(),
160
+ initial_db_options_(SanitizeOptions(dbname, options, read_only,
161
+ &init_logger_creation_s_)),
160
162
  env_(initial_db_options_.env),
161
163
  io_tracer_(std::make_shared<IOTracer>()),
162
164
  immutable_db_options_(initial_db_options_),
@@ -576,38 +578,6 @@ Status DBImpl::CloseHelper() {
576
578
  flush_scheduler_.Clear();
577
579
  trim_history_scheduler_.Clear();
578
580
 
579
- // For now, simply trigger a manual flush at close time
580
- // on all the column families.
581
- // TODO(bjlemaire): Check if this is needed. Also, in the
582
- // future we can contemplate doing a more fine-grained
583
- // flushing by first checking if there is a need for
584
- // flushing (but need to implement something
585
- // else than imm()->IsFlushPending() because the output
586
- // memtables added to imm() don't trigger flushes).
587
- if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
588
- Status flush_ret;
589
- mutex_.Unlock();
590
- for (ColumnFamilyData* cf : *versions_->GetColumnFamilySet()) {
591
- if (immutable_db_options_.atomic_flush) {
592
- flush_ret = AtomicFlushMemTables({cf}, FlushOptions(),
593
- FlushReason::kManualFlush);
594
- if (!flush_ret.ok()) {
595
- ROCKS_LOG_INFO(
596
- immutable_db_options_.info_log,
597
- "Atomic flush memtables failed upon closing (mempurge).");
598
- }
599
- } else {
600
- flush_ret =
601
- FlushMemTable(cf, FlushOptions(), FlushReason::kManualFlush);
602
- if (!flush_ret.ok()) {
603
- ROCKS_LOG_INFO(immutable_db_options_.info_log,
604
- "Flush memtables failed upon closing (mempurge).");
605
- }
606
- }
607
- }
608
- mutex_.Lock();
609
- }
610
-
611
581
  while (!flush_queue_.empty()) {
612
582
  const FlushRequest& flush_req = PopFirstFromFlushQueue();
613
583
  for (const auto& iter : flush_req) {
@@ -747,6 +717,9 @@ Status DBImpl::CloseHelper() {
747
717
  Status DBImpl::CloseImpl() { return CloseHelper(); }
748
718
 
749
719
  DBImpl::~DBImpl() {
720
+ // TODO: remove this.
721
+ init_logger_creation_s_.PermitUncheckedError();
722
+
750
723
  InstrumentedMutexLock closing_lock_guard(&closing_mutex_);
751
724
  if (closed_) {
752
725
  return;
@@ -217,6 +217,11 @@ class DBImpl : public DB {
217
217
  Status Put(const WriteOptions& options, ColumnFamilyHandle* column_family,
218
218
  const Slice& key, const Slice& ts, const Slice& value) override;
219
219
 
220
+ using DB::PutEntity;
221
+ Status PutEntity(const WriteOptions& options,
222
+ ColumnFamilyHandle* column_family, const Slice& key,
223
+ const WideColumns& columns) override;
224
+
220
225
  using DB::Merge;
221
226
  Status Merge(const WriteOptions& options, ColumnFamilyHandle* column_family,
222
227
  const Slice& key, const Slice& value) override;
@@ -1248,6 +1253,7 @@ class DBImpl : public DB {
1248
1253
  std::unique_ptr<VersionSet> versions_;
1249
1254
  // Flag to check whether we allocated and own the info log file
1250
1255
  bool own_info_log_;
1256
+ Status init_logger_creation_s_;
1251
1257
  const DBOptions initial_db_options_;
1252
1258
  Env* const env_;
1253
1259
  std::shared_ptr<IOTracer> io_tracer_;
@@ -2600,10 +2606,12 @@ class GetWithTimestampReadCallback : public ReadCallback {
2600
2606
  };
2601
2607
 
2602
2608
  extern Options SanitizeOptions(const std::string& db, const Options& src,
2603
- bool read_only = false);
2609
+ bool read_only = false,
2610
+ Status* logger_creation_s = nullptr);
2604
2611
 
2605
2612
  extern DBOptions SanitizeOptions(const std::string& db, const DBOptions& src,
2606
- bool read_only = false);
2613
+ bool read_only = false,
2614
+ Status* logger_creation_s = nullptr);
2607
2615
 
2608
2616
  extern CompressionType GetCompressionFlush(
2609
2617
  const ImmutableCFOptions& ioptions,
@@ -2237,7 +2237,7 @@ Status DBImpl::AtomicFlushMemTables(
2237
2237
  // This method tries to avoid write stall (similar to CompactRange() behavior)
2238
2238
  // it emulates how the SuperVersion / LSM would change if flush happens, checks
2239
2239
  // it against various constrains and delays flush if it'd cause write stall.
2240
- // Called should check status and flush_needed to see if flush already happened.
2240
+ // Caller should check status and flush_needed to see if flush already happened.
2241
2241
  Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
2242
2242
  bool* flush_needed) {
2243
2243
  {
@@ -2616,17 +2616,7 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req,
2616
2616
  assert(flush_req.size() == 1);
2617
2617
  ColumnFamilyData* cfd = flush_req[0].first;
2618
2618
  assert(cfd);
2619
- // Note: SchedulePendingFlush is always preceded
2620
- // with an imm()->FlushRequested() call. However,
2621
- // we want to make this code snipper more resilient to
2622
- // future changes. Therefore, we add the following if
2623
- // statement - note that calling it twice (or more)
2624
- // doesn't break anything.
2625
- if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
2626
- // If imm() contains silent memtables,
2627
- // requesting a flush will mark the imm_needed as true.
2628
- cfd->imm()->FlushRequested();
2629
- }
2619
+
2630
2620
  if (!cfd->queued_for_flush() && cfd->imm()->IsFlushPending()) {
2631
2621
  cfd->Ref();
2632
2622
  cfd->set_queued_for_flush(true);
@@ -2775,11 +2765,13 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
2775
2765
 
2776
2766
  for (const auto& iter : flush_req) {
2777
2767
  ColumnFamilyData* cfd = iter.first;
2778
- if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
2779
- // If imm() contains silent memtables,
2780
- // requesting a flush will mark the imm_needed as true.
2768
+ if (cfd->GetMempurgeUsed()) {
2769
+ // If imm() contains silent memtables (e.g.: because
2770
+ // MemPurge was activated), requesting a flush will
2771
+ // mark the imm_needed as true.
2781
2772
  cfd->imm()->FlushRequested();
2782
2773
  }
2774
+
2783
2775
  if (cfd->IsDropped() || !cfd->imm()->IsFlushPending()) {
2784
2776
  // can't flush this CF, try next one
2785
2777
  column_families_not_to_flush.push_back(cfd);
@@ -19,6 +19,7 @@
19
19
  #include "logging/logging.h"
20
20
  #include "port/port.h"
21
21
  #include "util/autovector.h"
22
+ #include "util/defer.h"
22
23
 
23
24
  namespace ROCKSDB_NAMESPACE {
24
25
 
@@ -252,6 +253,22 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
252
253
  job_context->blob_delete_files);
253
254
  }
254
255
 
256
+ // Before potentially releasing mutex and waiting on condvar, increment
257
+ // pending_purge_obsolete_files_ so that another thread executing
258
+ // `GetSortedWals` will wait until this thread finishes execution since the
259
+ // other thread will be waiting for `pending_purge_obsolete_files_`.
260
+ // pending_purge_obsolete_files_ MUST be decremented if there is nothing to
261
+ // delete.
262
+ ++pending_purge_obsolete_files_;
263
+
264
+ Defer cleanup([job_context, this]() {
265
+ assert(job_context != nullptr);
266
+ if (!job_context->HaveSomethingToDelete()) {
267
+ mutex_.AssertHeld();
268
+ --pending_purge_obsolete_files_;
269
+ }
270
+ });
271
+
255
272
  // logs_ is empty when called during recovery, in which case there can't yet
256
273
  // be any tracked obsolete logs
257
274
  if (!alive_log_files_.empty() && !logs_.empty()) {
@@ -308,9 +325,6 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
308
325
  job_context->logs_to_free = logs_to_free_;
309
326
  job_context->log_recycle_files.assign(log_recycle_files_.begin(),
310
327
  log_recycle_files_.end());
311
- if (job_context->HaveSomethingToDelete()) {
312
- ++pending_purge_obsolete_files_;
313
- }
314
328
  logs_to_free_.clear();
315
329
  }
316
330