@nxtedition/rocksdb 8.0.0 → 8.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/BUILDING.md +2 -2
  2. package/binding.cc +2 -7
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -9
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/TARGETS +4 -2
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +8 -29
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +146 -0
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +13 -1
  10. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +57 -146
  11. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +32 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +11 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -9
  14. package/deps/rocksdb/rocksdb/db/column_family.h +20 -0
  15. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -33
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +27 -8
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -1
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -6
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +65 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +5 -0
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -32
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +28 -47
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +28 -22
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -14
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +170 -140
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -4
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  35. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  36. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  37. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  38. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +72 -5
  39. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +119 -10
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +585 -264
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +46 -18
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +6 -15
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -8
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +10 -0
  49. package/deps/rocksdb/rocksdb/db/db_iter.cc +57 -36
  50. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +250 -2
  52. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  53. package/deps/rocksdb/rocksdb/db/db_test2.cc +307 -8
  54. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  55. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  56. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  57. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +5 -2
  60. package/deps/rocksdb/rocksdb/db/flush_job.cc +5 -2
  61. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  62. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  63. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/memtable.cc +55 -9
  65. package/deps/rocksdb/rocksdb/db/merge_helper.cc +76 -102
  66. package/deps/rocksdb/rocksdb/db/merge_helper.h +2 -11
  67. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  68. package/deps/rocksdb/rocksdb/db/repair.cc +64 -22
  69. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  70. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  71. package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
  72. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  73. package/deps/rocksdb/rocksdb/db/version_builder.cc +90 -43
  74. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  75. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +190 -67
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +15 -1
  77. package/deps/rocksdb/rocksdb/db/version_edit.h +16 -4
  78. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +41 -11
  79. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +27 -12
  80. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +18 -16
  81. package/deps/rocksdb/rocksdb/db/version_set.cc +219 -38
  82. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  83. package/deps/rocksdb/rocksdb/db/version_set_test.cc +45 -25
  84. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +122 -61
  85. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +0 -1
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +0 -4
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +12 -17
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +6 -4
  90. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  91. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +1 -0
  92. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +0 -48
  93. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
  94. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +196 -171
  95. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  97. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -18
  98. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +27 -5
  99. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  100. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  101. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +3 -0
  102. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  103. package/deps/rocksdb/rocksdb/logging/logging.h +13 -19
  104. package/deps/rocksdb/rocksdb/memory/arena.cc +4 -3
  105. package/deps/rocksdb/rocksdb/memory/arena_test.cc +30 -0
  106. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
  107. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  108. package/deps/rocksdb/rocksdb/src.mk +2 -1
  109. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  110. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -10
  111. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -29
  112. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  113. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -39
  114. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +0 -1
  115. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  116. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +142 -0
  117. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +241 -0
  118. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  119. package/deps/rocksdb/rocksdb/table/format.h +5 -2
  120. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -11
  121. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +97 -115
  122. package/deps/rocksdb/rocksdb/table/merging_iterator.h +82 -1
  123. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  124. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  125. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  126. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  127. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +0 -6
  128. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  129. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  130. package/deps/rocksdb/rocksdb/util/crc32c.cc +1 -1
  131. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  132. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +5 -0
  133. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -0
  134. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -67
  135. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -3
  136. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  137. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +59 -0
  138. package/deps/rocksdb/rocksdb.gyp +2 -1
  139. package/package.json +1 -1
  140. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  141. package/prebuilds/linux-x64/node.napi.node +0 -0
  142. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +0 -580
  143. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +0 -476
  144. package/max_rev_operator.h +0 -100
@@ -10,7 +10,6 @@
10
10
 
11
11
  #include "cache/cache_key.h"
12
12
  #include "cache/clock_cache.h"
13
- #include "cache/fast_lru_cache.h"
14
13
  #include "db/db_test_util.h"
15
14
  #include "file/sst_file_manager_impl.h"
16
15
  #include "port/port.h"
@@ -364,148 +363,6 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
364
363
  ValidateLRUList({"x", "y", "g", "z", "d", "m"}, 2, 2, 2);
365
364
  }
366
365
 
367
- // TODO: FastLRUCache and ClockCache use the same tests. We can probably remove
368
- // them from FastLRUCache after ClockCache becomes productive, and we don't plan
369
- // to use or maintain FastLRUCache any more.
370
- namespace fast_lru_cache {
371
-
372
- // TODO(guido) Replicate LRU policy tests from LRUCache here.
373
- class FastLRUCacheTest : public testing::Test {
374
- public:
375
- FastLRUCacheTest() {}
376
- ~FastLRUCacheTest() override { DeleteCache(); }
377
-
378
- void DeleteCache() {
379
- if (cache_ != nullptr) {
380
- cache_->~LRUCacheShard();
381
- port::cacheline_aligned_free(cache_);
382
- cache_ = nullptr;
383
- }
384
- }
385
-
386
- void NewCache(size_t capacity) {
387
- DeleteCache();
388
- cache_ = reinterpret_cast<LRUCacheShard*>(
389
- port::cacheline_aligned_alloc(sizeof(LRUCacheShard)));
390
- new (cache_) LRUCacheShard(capacity, 1 /*estimated_value_size*/,
391
- false /*strict_capacity_limit*/,
392
- kDontChargeCacheMetadata);
393
- }
394
-
395
- Status Insert(const std::string& key) {
396
- return cache_->Insert(key, 0 /*hash*/, nullptr /*value*/, 1 /*charge*/,
397
- nullptr /*deleter*/, nullptr /*handle*/,
398
- Cache::Priority::LOW);
399
- }
400
-
401
- Status Insert(char key, size_t len) { return Insert(std::string(len, key)); }
402
-
403
- size_t CalcEstimatedHandleChargeWrapper(
404
- size_t estimated_value_size,
405
- CacheMetadataChargePolicy metadata_charge_policy) {
406
- return LRUCacheShard::CalcEstimatedHandleCharge(estimated_value_size,
407
- metadata_charge_policy);
408
- }
409
-
410
- int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size,
411
- CacheMetadataChargePolicy metadata_charge_policy) {
412
- return LRUCacheShard::CalcHashBits(capacity, estimated_value_size,
413
- metadata_charge_policy);
414
- }
415
-
416
- // Maximum number of items that a shard can hold.
417
- double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size,
418
- CacheMetadataChargePolicy metadata_charge_policy) {
419
- size_t handle_charge = LRUCacheShard::CalcEstimatedHandleCharge(
420
- estimated_value_size, metadata_charge_policy);
421
- return capacity / (kLoadFactor * handle_charge);
422
- }
423
- bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) {
424
- if (hash_bits == 0) {
425
- return max_occupancy <= 1;
426
- } else {
427
- return (1 << hash_bits >= max_occupancy) &&
428
- (1 << (hash_bits - 1) <= max_occupancy);
429
- }
430
- }
431
-
432
- private:
433
- LRUCacheShard* cache_ = nullptr;
434
- };
435
-
436
- TEST_F(FastLRUCacheTest, ValidateKeySize) {
437
- NewCache(3);
438
- EXPECT_OK(Insert('a', 16));
439
- EXPECT_NOK(Insert('b', 15));
440
- EXPECT_OK(Insert('b', 16));
441
- EXPECT_NOK(Insert('c', 17));
442
- EXPECT_NOK(Insert('d', 1000));
443
- EXPECT_NOK(Insert('e', 11));
444
- EXPECT_NOK(Insert('f', 0));
445
- }
446
-
447
- TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
448
- size_t capacity;
449
- size_t estimated_value_size;
450
- double max_occupancy;
451
- int hash_bits;
452
- CacheMetadataChargePolicy metadata_charge_policy;
453
- // Vary the cache capacity, fix the element charge.
454
- for (int i = 0; i < 2048; i++) {
455
- capacity = i;
456
- estimated_value_size = 0;
457
- metadata_charge_policy = kFullChargeCacheMetadata;
458
- max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
459
- metadata_charge_policy);
460
- hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
461
- metadata_charge_policy);
462
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
463
- }
464
- // Fix the cache capacity, vary the element charge.
465
- for (int i = 0; i < 1024; i++) {
466
- capacity = 1024;
467
- estimated_value_size = i;
468
- metadata_charge_policy = kFullChargeCacheMetadata;
469
- max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
470
- metadata_charge_policy);
471
- hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
472
- metadata_charge_policy);
473
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
474
- }
475
- // Zero-capacity cache, and only values have charge.
476
- capacity = 0;
477
- estimated_value_size = 1;
478
- metadata_charge_policy = kDontChargeCacheMetadata;
479
- hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
480
- metadata_charge_policy);
481
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
482
- // Zero-capacity cache, and only metadata has charge.
483
- capacity = 0;
484
- estimated_value_size = 0;
485
- metadata_charge_policy = kFullChargeCacheMetadata;
486
- hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
487
- metadata_charge_policy);
488
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
489
- // Small cache, large elements.
490
- capacity = 1024;
491
- estimated_value_size = 8192;
492
- metadata_charge_policy = kFullChargeCacheMetadata;
493
- hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
494
- metadata_charge_policy);
495
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
496
- // Large capacity.
497
- capacity = 31924172;
498
- estimated_value_size = 8192;
499
- metadata_charge_policy = kFullChargeCacheMetadata;
500
- max_occupancy =
501
- CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
502
- hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
503
- metadata_charge_policy);
504
- EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
505
- }
506
-
507
- } // namespace fast_lru_cache
508
-
509
366
  namespace clock_cache {
510
367
 
511
368
  class ClockCacheTest : public testing::Test {
@@ -1275,14 +1132,19 @@ TEST_F(LRUCacheSecondaryCacheTest, BasicTest) {
1275
1132
  nullptr /* memory_allocator */, kDefaultToAdaptiveMutex,
1276
1133
  kDontChargeCacheMetadata);
1277
1134
  std::shared_ptr<TestSecondaryCache> secondary_cache =
1278
- std::make_shared<TestSecondaryCache>(2048);
1135
+ std::make_shared<TestSecondaryCache>(4096);
1279
1136
  opts.secondary_cache = secondary_cache;
1280
1137
  std::shared_ptr<Cache> cache = NewLRUCache(opts);
1281
1138
  std::shared_ptr<Statistics> stats = CreateDBStatistics();
1282
1139
  CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get());
1283
1140
  CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get());
1141
+ CacheKey k3 = CacheKey::CreateUniqueForCacheLifetime(cache.get());
1284
1142
 
1285
1143
  Random rnd(301);
1144
+ // Start with warming k3
1145
+ std::string str3 = rnd.RandomString(1021);
1146
+ ASSERT_OK(secondary_cache->InsertSaved(k3.AsSlice(), str3));
1147
+
1286
1148
  std::string str1 = rnd.RandomString(1020);
1287
1149
  TestItem* item1 = new TestItem(str1.data(), str1.length());
1288
1150
  ASSERT_OK(cache->Insert(k1.AsSlice(), item1,
@@ -1299,15 +1161,27 @@ TEST_F(LRUCacheSecondaryCacheTest, BasicTest) {
1299
1161
  cache->Lookup(k2.AsSlice(), &LRUCacheSecondaryCacheTest::helper_,
1300
1162
  test_item_creator, Cache::Priority::LOW, true, stats.get());
1301
1163
  ASSERT_NE(handle, nullptr);
1164
+ ASSERT_EQ(static_cast<TestItem*>(cache->Value(handle))->Size(), str2.size());
1302
1165
  cache->Release(handle);
1166
+
1303
1167
  // This lookup should promote k1 and demote k2
1304
1168
  handle =
1305
1169
  cache->Lookup(k1.AsSlice(), &LRUCacheSecondaryCacheTest::helper_,
1306
1170
  test_item_creator, Cache::Priority::LOW, true, stats.get());
1307
1171
  ASSERT_NE(handle, nullptr);
1172
+ ASSERT_EQ(static_cast<TestItem*>(cache->Value(handle))->Size(), str1.size());
1308
1173
  cache->Release(handle);
1309
- ASSERT_EQ(secondary_cache->num_inserts(), 2u);
1310
- ASSERT_EQ(secondary_cache->num_lookups(), 1u);
1174
+
1175
+ // This lookup should promote k3 and demote k1
1176
+ handle =
1177
+ cache->Lookup(k3.AsSlice(), &LRUCacheSecondaryCacheTest::helper_,
1178
+ test_item_creator, Cache::Priority::LOW, true, stats.get());
1179
+ ASSERT_NE(handle, nullptr);
1180
+ ASSERT_EQ(static_cast<TestItem*>(cache->Value(handle))->Size(), str3.size());
1181
+ cache->Release(handle);
1182
+
1183
+ ASSERT_EQ(secondary_cache->num_inserts(), 3u);
1184
+ ASSERT_EQ(secondary_cache->num_lookups(), 2u);
1311
1185
  ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_HITS),
1312
1186
  secondary_cache->num_lookups());
1313
1187
  PerfContext perf_ctx = *get_perf_context();
@@ -1888,6 +1762,43 @@ TEST_F(DBSecondaryCacheTest, SecondaryCacheFailureTest) {
1888
1762
  Destroy(options);
1889
1763
  }
1890
1764
 
1765
+ TEST_F(DBSecondaryCacheTest, TestSecondaryWithCompressedCache) {
1766
+ if (!Snappy_Supported()) {
1767
+ ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support");
1768
+ return;
1769
+ }
1770
+ LRUCacheOptions opts(2000 /* capacity */, 0 /* num_shard_bits */,
1771
+ false /* strict_capacity_limit */,
1772
+ 0.5 /* high_pri_pool_ratio */,
1773
+ nullptr /* memory_allocator */, kDefaultToAdaptiveMutex,
1774
+ kDontChargeCacheMetadata);
1775
+ std::shared_ptr<TestSecondaryCache> secondary_cache(
1776
+ new TestSecondaryCache(2048 * 1024));
1777
+ opts.secondary_cache = secondary_cache;
1778
+ std::shared_ptr<Cache> cache = NewLRUCache(opts);
1779
+ BlockBasedTableOptions table_options;
1780
+ table_options.block_cache_compressed = cache;
1781
+ table_options.no_block_cache = true;
1782
+ table_options.block_size = 1234;
1783
+ Options options = GetDefaultOptions();
1784
+ options.compression = kSnappyCompression;
1785
+ options.create_if_missing = true;
1786
+ options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1787
+ DestroyAndReopen(options);
1788
+ Random rnd(301);
1789
+ const int N = 6;
1790
+ for (int i = 0; i < N; i++) {
1791
+ // Partly compressible
1792
+ std::string p_v = rnd.RandomString(507) + std::string(500, ' ');
1793
+ ASSERT_OK(Put(Key(i), p_v));
1794
+ }
1795
+ ASSERT_OK(Flush());
1796
+ for (int i = 0; i < 2 * N; i++) {
1797
+ std::string v = Get(Key(i % N));
1798
+ ASSERT_EQ(1007, v.size());
1799
+ }
1800
+ }
1801
+
1891
1802
  TEST_F(LRUCacheSecondaryCacheTest, BasicWaitAllTest) {
1892
1803
  LRUCacheOptions opts(1024 /* capacity */, 2 /* num_shard_bits */,
1893
1804
  false /* strict_capacity_limit */,
@@ -0,0 +1,32 @@
1
+ // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+
6
+ #include "rocksdb/secondary_cache.h"
7
+
8
+ #include "cache/cache_entry_roles.h"
9
+
10
+ namespace ROCKSDB_NAMESPACE {
11
+
12
+ namespace {
13
+
14
+ size_t SliceSize(void* obj) { return static_cast<Slice*>(obj)->size(); }
15
+
16
+ Status SliceSaveTo(void* from_obj, size_t from_offset, size_t length,
17
+ void* out) {
18
+ const Slice& slice = *static_cast<Slice*>(from_obj);
19
+ std::memcpy(out, slice.data() + from_offset, length);
20
+ return Status::OK();
21
+ }
22
+
23
+ } // namespace
24
+
25
+ Status SecondaryCache::InsertSaved(const Slice& key, const Slice& saved) {
26
+ static Cache::CacheItemHelper helper{
27
+ &SliceSize, &SliceSaveTo, GetNoopDeleterForRole<CacheEntryRole::kMisc>()};
28
+ // NOTE: depends on Insert() being synchronous, not keeping pointer `&saved`
29
+ return Insert(key, const_cast<Slice*>(&saved), &helper);
30
+ }
31
+
32
+ } // namespace ROCKSDB_NAMESPACE
@@ -123,6 +123,10 @@ class BlobCountingIterator : public InternalIterator {
123
123
  return iter_->GetProperty(prop_name, prop);
124
124
  }
125
125
 
126
+ bool IsDeleteRangeSentinelKey() const override {
127
+ return iter_->IsDeleteRangeSentinelKey();
128
+ }
129
+
126
130
  private:
127
131
  void UpdateAndCountBlobIfNeeded() {
128
132
  assert(!iter_->Valid() || iter_->status().ok());
@@ -130,6 +134,13 @@ class BlobCountingIterator : public InternalIterator {
130
134
  if (!iter_->Valid()) {
131
135
  status_ = iter_->status();
132
136
  return;
137
+ } else if (iter_->IsDeleteRangeSentinelKey()) {
138
+ // CompactionMergingIterator emits range tombstones, and range tombstone
139
+ // keys can be truncated at file boundaries. This means the range
140
+ // tombstone keys can have op_type kTypeBlobIndex.
141
+ // This could crash the ProcessInFlow() call below since
142
+ // value is empty for these keys.
143
+ return;
133
144
  }
134
145
 
135
146
  TEST_SYNC_POINT(
@@ -565,7 +565,8 @@ ColumnFamilyData::ColumnFamilyData(
565
565
  allow_2pc_(db_options.allow_2pc),
566
566
  last_memtable_id_(0),
567
567
  db_paths_registered_(false),
568
- mempurge_used_(false) {
568
+ mempurge_used_(false),
569
+ next_epoch_number_(1) {
569
570
  if (id_ != kDummyColumnFamilyDataId) {
570
571
  // TODO(cc): RegisterDbPaths can be expensive, considering moving it
571
572
  // outside of this constructor which might be called with db mutex held.
@@ -1128,12 +1129,9 @@ bool ColumnFamilyData::NeedsCompaction() const {
1128
1129
  Compaction* ColumnFamilyData::PickCompaction(
1129
1130
  const MutableCFOptions& mutable_options,
1130
1131
  const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
1131
- SequenceNumber earliest_mem_seqno =
1132
- std::min(mem_->GetEarliestSequenceNumber(),
1133
- imm_.current()->GetEarliestSequenceNumber(false));
1134
1132
  auto* result = compaction_picker_->PickCompaction(
1135
1133
  GetName(), mutable_options, mutable_db_options, current_->storage_info(),
1136
- log_buffer, earliest_mem_seqno);
1134
+ log_buffer);
1137
1135
  if (result != nullptr) {
1138
1136
  result->SetInputVersion(current_);
1139
1137
  }
@@ -1212,14 +1210,11 @@ Compaction* ColumnFamilyData::CompactRange(
1212
1210
  const InternalKey* begin, const InternalKey* end,
1213
1211
  InternalKey** compaction_end, bool* conflict,
1214
1212
  uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
1215
- SequenceNumber earliest_mem_seqno =
1216
- std::min(mem_->GetEarliestSequenceNumber(),
1217
- imm_.current()->GetEarliestSequenceNumber(false));
1218
1213
  auto* result = compaction_picker_->CompactRange(
1219
1214
  GetName(), mutable_cf_options, mutable_db_options,
1220
1215
  current_->storage_info(), input_level, output_level,
1221
1216
  compact_range_options, begin, end, compaction_end, conflict,
1222
- max_file_num_to_ignore, trim_ts, earliest_mem_seqno);
1217
+ max_file_num_to_ignore, trim_ts);
1223
1218
  if (result != nullptr) {
1224
1219
  result->SetInputVersion(current_);
1225
1220
  }
@@ -1523,6 +1518,13 @@ FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
1523
1518
  return data_dirs_[path_id].get();
1524
1519
  }
1525
1520
 
1521
+ void ColumnFamilyData::RecoverEpochNumbers() {
1522
+ assert(current_);
1523
+ auto* vstorage = current_->storage_info();
1524
+ assert(vstorage);
1525
+ vstorage->RecoverEpochNumbers(this);
1526
+ }
1527
+
1526
1528
  ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
1527
1529
  const ImmutableDBOptions* db_options,
1528
1530
  const FileOptions& file_options,
@@ -533,6 +533,24 @@ class ColumnFamilyData {
533
533
  void SetMempurgeUsed() { mempurge_used_ = true; }
534
534
  bool GetMempurgeUsed() { return mempurge_used_; }
535
535
 
536
+ // Allocate and return a new epoch number
537
+ uint64_t NewEpochNumber() { return next_epoch_number_.fetch_add(1); }
538
+
539
+ // Get the next epoch number to be assigned
540
+ uint64_t GetNextEpochNumber() const { return next_epoch_number_.load(); }
541
+
542
+ // Set the next epoch number to be assigned
543
+ void SetNextEpochNumber(uint64_t next_epoch_number) {
544
+ next_epoch_number_.store(next_epoch_number);
545
+ }
546
+
547
+ // Reset the next epoch number to be assigned
548
+ void ResetNextEpochNumber() { next_epoch_number_.store(1); }
549
+
550
+ // Recover the next epoch number of this CF and epoch number
551
+ // of its files (if missing)
552
+ void RecoverEpochNumbers();
553
+
536
554
  private:
537
555
  friend class ColumnFamilySet;
538
556
  ColumnFamilyData(uint32_t id, const std::string& name,
@@ -634,6 +652,8 @@ class ColumnFamilyData {
634
652
  // a Version associated with this CFD
635
653
  std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
636
654
  bool mempurge_used_;
655
+
656
+ std::atomic<uint64_t> next_epoch_number_;
637
657
  };
638
658
 
639
659
  // ColumnFamilySet has interesting thread-safety requirements
@@ -188,6 +188,11 @@ class ClippingIterator : public InternalIterator {
188
188
  return iter_->GetProperty(prop_name, prop);
189
189
  }
190
190
 
191
+ bool IsDeleteRangeSentinelKey() const override {
192
+ assert(valid_);
193
+ return iter_->IsDeleteRangeSentinelKey();
194
+ }
195
+
191
196
  private:
192
197
  void UpdateValid() {
193
198
  assert(!iter_->Valid() || iter_->status().ok());
@@ -20,9 +20,6 @@
20
20
 
21
21
  namespace ROCKSDB_NAMESPACE {
22
22
 
23
- const uint64_t kRangeTombstoneSentinel =
24
- PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
25
-
26
23
  int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
27
24
  const InternalKey& b) {
28
25
  auto c = user_cmp->CompareWithoutTimestamp(a.user_key(), b.user_key());
@@ -332,6 +329,7 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
332
329
  // the case that the penultimate level is empty).
333
330
  if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
334
331
  exclude_level = kInvalidLevel;
332
+ penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
335
333
  std::set<uint64_t> penultimate_inputs;
336
334
  for (const auto& input_lvl : inputs_) {
337
335
  if (input_lvl.level == penultimate_level_) {
@@ -345,7 +343,8 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
345
343
  if (penultimate_inputs.find(file->fd.GetNumber()) ==
346
344
  penultimate_inputs.end()) {
347
345
  exclude_level = number_levels_ - 1;
348
- penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
346
+ penultimate_output_range_type_ =
347
+ PenultimateOutputRangeType::kNonLastRange;
349
348
  break;
350
349
  }
351
350
  }
@@ -354,35 +353,6 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
354
353
  GetBoundaryKeys(input_vstorage_, inputs_,
355
354
  &penultimate_level_smallest_user_key_,
356
355
  &penultimate_level_largest_user_key_, exclude_level);
357
-
358
- // If there's a case that the penultimate level output range is overlapping
359
- // with the existing files, disable the penultimate level output by setting
360
- // the range to empty. One example is the range delete could have overlap
361
- // boundary with the next file. (which is actually a false overlap)
362
- // TODO: Exclude such false overlap, so it won't disable the penultimate
363
- // output.
364
- std::set<uint64_t> penultimate_inputs;
365
- for (const auto& input_lvl : inputs_) {
366
- if (input_lvl.level == penultimate_level_) {
367
- for (const auto& file : input_lvl.files) {
368
- penultimate_inputs.emplace(file->fd.GetNumber());
369
- }
370
- }
371
- }
372
-
373
- auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
374
- for (const auto& file : penultimate_files) {
375
- if (penultimate_inputs.find(file->fd.GetNumber()) ==
376
- penultimate_inputs.end() &&
377
- OverlapPenultimateLevelOutputRange(file->smallest.user_key(),
378
- file->largest.user_key())) {
379
- // basically disable the penultimate range output. which should be rare
380
- // or a false overlap caused by range del
381
- penultimate_level_smallest_user_key_ = "";
382
- penultimate_level_largest_user_key_ = "";
383
- penultimate_output_range_type_ = PenultimateOutputRangeType::kDisabled;
384
- }
385
- }
386
356
  }
387
357
 
388
358
  Compaction::~Compaction() {
@@ -807,6 +777,16 @@ uint64_t Compaction::MinInputFileOldestAncesterTime(
807
777
  return min_oldest_ancester_time;
808
778
  }
809
779
 
780
+ uint64_t Compaction::MinInputFileEpochNumber() const {
781
+ uint64_t min_epoch_number = std::numeric_limits<uint64_t>::max();
782
+ for (const auto& inputs_per_level : inputs_) {
783
+ for (const auto& file : inputs_per_level.files) {
784
+ min_epoch_number = std::min(min_epoch_number, file->epoch_number);
785
+ }
786
+ }
787
+ return min_epoch_number;
788
+ }
789
+
810
790
  int Compaction::EvaluatePenultimateLevel(
811
791
  const VersionStorageInfo* vstorage,
812
792
  const ImmutableOptions& immutable_options, const int start_level,
@@ -18,6 +18,8 @@ namespace ROCKSDB_NAMESPACE {
18
18
  // The file contains class Compaction, as well as some helper functions
19
19
  // and data structures used by the class.
20
20
 
21
+ const uint64_t kRangeTombstoneSentinel =
22
+ PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
21
23
  // Utility for comparing sstable boundary keys. Returns -1 if either a or b is
22
24
  // null which provides the property that a==null indicates a key that is less
23
25
  // than any key and b==null indicates a key that is greater than any key. Note
@@ -378,6 +380,9 @@ class Compaction {
378
380
  // This is used to filter out some input files' ancester's time range.
379
381
  uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
380
382
  const InternalKey* end) const;
383
+ // Return the minimum epoch number among
384
+ // input files' associated with this compaction
385
+ uint64_t MinInputFileEpochNumber() const;
381
386
 
382
387
  // Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
383
388
  // compaction begin and compaction completion callbacks match.
@@ -377,6 +377,7 @@ void CompactionIterator::NextFromInput() {
377
377
  value_ = input_.value();
378
378
  blob_value_.Reset();
379
379
  iter_stats_.num_input_records++;
380
+ is_range_del_ = input_.IsDeleteRangeSentinelKey();
380
381
 
381
382
  Status pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
382
383
  if (!pik_status.ok()) {
@@ -396,7 +397,10 @@ void CompactionIterator::NextFromInput() {
396
397
  break;
397
398
  }
398
399
  TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_);
399
-
400
+ if (is_range_del_) {
401
+ validity_info_.SetValid(kRangeDeletion);
402
+ break;
403
+ }
400
404
  // Update input statistics
401
405
  if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion ||
402
406
  ikey_.type == kTypeDeletionWithTimestamp) {
@@ -618,6 +622,14 @@ void CompactionIterator::NextFromInput() {
618
622
 
619
623
  ParsedInternalKey next_ikey;
620
624
  AdvanceInputIter();
625
+ while (input_.Valid() && input_.IsDeleteRangeSentinelKey() &&
626
+ ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
627
+ .ok() &&
628
+ cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
629
+ // skip range tombstone start keys with the same user key
630
+ // since they are not "real" point keys.
631
+ AdvanceInputIter();
632
+ }
621
633
 
622
634
  // Check whether the next key exists, is not corrupt, and is the same key
623
635
  // as the single delete.
@@ -625,6 +637,7 @@ void CompactionIterator::NextFromInput() {
625
637
  ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
626
638
  .ok() &&
627
639
  cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
640
+ assert(!input_.IsDeleteRangeSentinelKey());
628
641
  #ifndef NDEBUG
629
642
  const Compaction* c =
630
643
  compaction_ ? compaction_->real_compaction() : nullptr;
@@ -849,12 +862,14 @@ void CompactionIterator::NextFromInput() {
849
862
  // Note that a deletion marker of type kTypeDeletionWithTimestamp will be
850
863
  // considered to have a different user key unless the timestamp is older
851
864
  // than *full_history_ts_low_.
865
+ //
866
+ // Range tombstone start keys are skipped as they are not "real" keys.
852
867
  while (!IsPausingManualCompaction() && !IsShuttingDown() &&
853
868
  input_.Valid() &&
854
869
  (ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
855
870
  .ok()) &&
856
871
  cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key) &&
857
- (prev_snapshot == 0 ||
872
+ (prev_snapshot == 0 || input_.IsDeleteRangeSentinelKey() ||
858
873
  DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot))) {
859
874
  AdvanceInputIter();
860
875
  }
@@ -1105,7 +1120,9 @@ void CompactionIterator::DecideOutputLevel() {
1105
1120
  TEST_SYNC_POINT_CALLBACK("CompactionIterator::PrepareOutput.context",
1106
1121
  &context);
1107
1122
  output_to_penultimate_level_ = context.output_to_penultimate_level;
1108
- #endif /* !NDEBUG */
1123
+ #else
1124
+ output_to_penultimate_level_ = false;
1125
+ #endif // NDEBUG
1109
1126
 
1110
1127
  // if the key is newer than the cutoff sequence or within the earliest
1111
1128
  // snapshot, it should output to the penultimate level.
@@ -1145,10 +1162,12 @@ void CompactionIterator::DecideOutputLevel() {
1145
1162
 
1146
1163
  void CompactionIterator::PrepareOutput() {
1147
1164
  if (Valid()) {
1148
- if (ikey_.type == kTypeValue) {
1149
- ExtractLargeValueIfNeeded();
1150
- } else if (ikey_.type == kTypeBlobIndex) {
1151
- GarbageCollectBlobIfNeeded();
1165
+ if (LIKELY(!is_range_del_)) {
1166
+ if (ikey_.type == kTypeValue) {
1167
+ ExtractLargeValueIfNeeded();
1168
+ } else if (ikey_.type == kTypeBlobIndex) {
1169
+ GarbageCollectBlobIfNeeded();
1170
+ }
1152
1171
  }
1153
1172
 
1154
1173
  if (compaction_ != nullptr && compaction_->SupportsPerKeyPlacement()) {
@@ -1171,7 +1190,7 @@ void CompactionIterator::PrepareOutput() {
1171
1190
  DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
1172
1191
  ikey_.type != kTypeMerge && current_key_committed_ &&
1173
1192
  !output_to_penultimate_level_ &&
1174
- ikey_.sequence < preserve_time_min_seqno_) {
1193
+ ikey_.sequence < preserve_time_min_seqno_ && !is_range_del_) {
1175
1194
  if (ikey_.type == kTypeDeletion ||
1176
1195
  (ikey_.type == kTypeSingleDeletion && timestamp_size_ == 0)) {
1177
1196
  ROCKS_LOG_FATAL(
@@ -63,6 +63,10 @@ class SequenceIterWrapper : public InternalIterator {
63
63
  void SeekToLast() override { assert(false); }
64
64
 
65
65
  uint64_t num_itered() const { return num_itered_; }
66
+ bool IsDeleteRangeSentinelKey() const override {
67
+ assert(Valid());
68
+ return inner_iter_->IsDeleteRangeSentinelKey();
69
+ }
66
70
 
67
71
  private:
68
72
  InternalKeyComparator icmp_;
@@ -242,7 +246,12 @@ class CompactionIterator {
242
246
  const Status& status() const { return status_; }
243
247
  const ParsedInternalKey& ikey() const { return ikey_; }
244
248
  inline bool Valid() const { return validity_info_.IsValid(); }
245
- const Slice& user_key() const { return current_user_key_; }
249
+ const Slice& user_key() const {
250
+ if (UNLIKELY(is_range_del_)) {
251
+ return ikey_.user_key;
252
+ }
253
+ return current_user_key_;
254
+ }
246
255
  const CompactionIterationStats& iter_stats() const { return iter_stats_; }
247
256
  uint64_t num_input_entry_scanned() const { return input_.num_itered(); }
248
257
  // If the current key should be placed on penultimate level, only valid if
@@ -252,6 +261,8 @@ class CompactionIterator {
252
261
  }
253
262
  Status InputStatus() const { return input_.status(); }
254
263
 
264
+ bool IsDeleteRangeSentinelKey() const { return is_range_del_; }
265
+
255
266
  private:
256
267
  // Processes the input stream to find the next output
257
268
  void NextFromInput();
@@ -385,6 +396,7 @@ class CompactionIterator {
385
396
  kKeepSD = 8,
386
397
  kKeepDel = 9,
387
398
  kNewUserKey = 10,
399
+ kRangeDeletion = 11,
388
400
  };
389
401
 
390
402
  struct ValidityInfo {
@@ -492,6 +504,10 @@ class CompactionIterator {
492
504
  // This is a best-effort facility, so memory_order_relaxed is sufficient.
493
505
  return manual_compaction_canceled_.load(std::memory_order_relaxed);
494
506
  }
507
+
508
+ // Stores whether the current compaction iterator output
509
+ // is a range tombstone start key.
510
+ bool is_range_del_{false};
495
511
  };
496
512
 
497
513
  inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq,
@@ -1286,7 +1286,6 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
1286
1286
  while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
1287
1287
  // Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
1288
1288
  // returns true.
1289
-
1290
1289
  assert(!end.has_value() || cfd->user_comparator()->Compare(
1291
1290
  c_iter->user_key(), end.value()) < 0);
1292
1291
 
@@ -1834,12 +1833,14 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
1834
1833
  }
1835
1834
 
1836
1835
  // Initialize a SubcompactionState::Output and add it to sub_compact->outputs
1836
+ uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber();
1837
1837
  {
1838
1838
  FileMetaData meta;
1839
1839
  meta.fd = FileDescriptor(file_number,
1840
1840
  sub_compact->compaction->output_path_id(), 0);
1841
1841
  meta.oldest_ancester_time = oldest_ancester_time;
1842
1842
  meta.file_creation_time = current_time;
1843
+ meta.epoch_number = epoch_number;
1843
1844
  meta.temperature = temperature;
1844
1845
  assert(!db_id_.empty());
1845
1846
  assert(!db_session_id_.empty());