@nxtedition/rocksdb 8.0.0 → 8.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BUILDING.md +2 -2
- package/binding.cc +2 -7
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -9
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/TARGETS +4 -2
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +8 -29
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +146 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +13 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +57 -146
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +32 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +11 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +11 -9
- package/deps/rocksdb/rocksdb/db/column_family.h +20 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +27 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +65 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -32
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +28 -47
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +28 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +170 -140
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +72 -5
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +119 -10
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +585 -264
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +46 -18
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +6 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +10 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +57 -36
- package/deps/rocksdb/rocksdb/db/db_iter.h +2 -1
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +250 -2
- package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +307 -8
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +5 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +5 -2
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/memtable.cc +55 -9
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +76 -102
- package/deps/rocksdb/rocksdb/db/merge_helper.h +2 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
- package/deps/rocksdb/rocksdb/db/repair.cc +64 -22
- package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
- package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +90 -43
- package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +190 -67
- package/deps/rocksdb/rocksdb/db/version_edit.cc +15 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +16 -4
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +41 -11
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +27 -12
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +18 -16
- package/deps/rocksdb/rocksdb/db/version_set.cc +219 -38
- package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +45 -25
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +122 -61
- package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +0 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +12 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +6 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +1 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +0 -48
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +196 -171
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -18
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +27 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/logging/logging.h +13 -19
- package/deps/rocksdb/rocksdb/memory/arena.cc +4 -3
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +30 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
- package/deps/rocksdb/rocksdb/src.mk +2 -1
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -29
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -39
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +0 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +142 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +241 -0
- package/deps/rocksdb/rocksdb/table/format.cc +24 -20
- package/deps/rocksdb/rocksdb/table/format.h +5 -2
- package/deps/rocksdb/rocksdb/table/get_context.cc +52 -11
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +97 -115
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +82 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
- package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c.cc +1 -1
- package/deps/rocksdb/rocksdb/util/status.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -67
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -3
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +59 -0
- package/deps/rocksdb/rocksdb.gyp +2 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +0 -580
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +0 -476
- package/max_rev_operator.h +0 -100
|
@@ -10,7 +10,6 @@
|
|
|
10
10
|
|
|
11
11
|
#include "cache/cache_key.h"
|
|
12
12
|
#include "cache/clock_cache.h"
|
|
13
|
-
#include "cache/fast_lru_cache.h"
|
|
14
13
|
#include "db/db_test_util.h"
|
|
15
14
|
#include "file/sst_file_manager_impl.h"
|
|
16
15
|
#include "port/port.h"
|
|
@@ -364,148 +363,6 @@ TEST_F(LRUCacheTest, EntriesWithPriority) {
|
|
|
364
363
|
ValidateLRUList({"x", "y", "g", "z", "d", "m"}, 2, 2, 2);
|
|
365
364
|
}
|
|
366
365
|
|
|
367
|
-
// TODO: FastLRUCache and ClockCache use the same tests. We can probably remove
|
|
368
|
-
// them from FastLRUCache after ClockCache becomes productive, and we don't plan
|
|
369
|
-
// to use or maintain FastLRUCache any more.
|
|
370
|
-
namespace fast_lru_cache {
|
|
371
|
-
|
|
372
|
-
// TODO(guido) Replicate LRU policy tests from LRUCache here.
|
|
373
|
-
class FastLRUCacheTest : public testing::Test {
|
|
374
|
-
public:
|
|
375
|
-
FastLRUCacheTest() {}
|
|
376
|
-
~FastLRUCacheTest() override { DeleteCache(); }
|
|
377
|
-
|
|
378
|
-
void DeleteCache() {
|
|
379
|
-
if (cache_ != nullptr) {
|
|
380
|
-
cache_->~LRUCacheShard();
|
|
381
|
-
port::cacheline_aligned_free(cache_);
|
|
382
|
-
cache_ = nullptr;
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
void NewCache(size_t capacity) {
|
|
387
|
-
DeleteCache();
|
|
388
|
-
cache_ = reinterpret_cast<LRUCacheShard*>(
|
|
389
|
-
port::cacheline_aligned_alloc(sizeof(LRUCacheShard)));
|
|
390
|
-
new (cache_) LRUCacheShard(capacity, 1 /*estimated_value_size*/,
|
|
391
|
-
false /*strict_capacity_limit*/,
|
|
392
|
-
kDontChargeCacheMetadata);
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
Status Insert(const std::string& key) {
|
|
396
|
-
return cache_->Insert(key, 0 /*hash*/, nullptr /*value*/, 1 /*charge*/,
|
|
397
|
-
nullptr /*deleter*/, nullptr /*handle*/,
|
|
398
|
-
Cache::Priority::LOW);
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
Status Insert(char key, size_t len) { return Insert(std::string(len, key)); }
|
|
402
|
-
|
|
403
|
-
size_t CalcEstimatedHandleChargeWrapper(
|
|
404
|
-
size_t estimated_value_size,
|
|
405
|
-
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
406
|
-
return LRUCacheShard::CalcEstimatedHandleCharge(estimated_value_size,
|
|
407
|
-
metadata_charge_policy);
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
int CalcHashBitsWrapper(size_t capacity, size_t estimated_value_size,
|
|
411
|
-
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
412
|
-
return LRUCacheShard::CalcHashBits(capacity, estimated_value_size,
|
|
413
|
-
metadata_charge_policy);
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
// Maximum number of items that a shard can hold.
|
|
417
|
-
double CalcMaxOccupancy(size_t capacity, size_t estimated_value_size,
|
|
418
|
-
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
419
|
-
size_t handle_charge = LRUCacheShard::CalcEstimatedHandleCharge(
|
|
420
|
-
estimated_value_size, metadata_charge_policy);
|
|
421
|
-
return capacity / (kLoadFactor * handle_charge);
|
|
422
|
-
}
|
|
423
|
-
bool TableSizeIsAppropriate(int hash_bits, double max_occupancy) {
|
|
424
|
-
if (hash_bits == 0) {
|
|
425
|
-
return max_occupancy <= 1;
|
|
426
|
-
} else {
|
|
427
|
-
return (1 << hash_bits >= max_occupancy) &&
|
|
428
|
-
(1 << (hash_bits - 1) <= max_occupancy);
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
private:
|
|
433
|
-
LRUCacheShard* cache_ = nullptr;
|
|
434
|
-
};
|
|
435
|
-
|
|
436
|
-
TEST_F(FastLRUCacheTest, ValidateKeySize) {
|
|
437
|
-
NewCache(3);
|
|
438
|
-
EXPECT_OK(Insert('a', 16));
|
|
439
|
-
EXPECT_NOK(Insert('b', 15));
|
|
440
|
-
EXPECT_OK(Insert('b', 16));
|
|
441
|
-
EXPECT_NOK(Insert('c', 17));
|
|
442
|
-
EXPECT_NOK(Insert('d', 1000));
|
|
443
|
-
EXPECT_NOK(Insert('e', 11));
|
|
444
|
-
EXPECT_NOK(Insert('f', 0));
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
TEST_F(FastLRUCacheTest, CalcHashBitsTest) {
|
|
448
|
-
size_t capacity;
|
|
449
|
-
size_t estimated_value_size;
|
|
450
|
-
double max_occupancy;
|
|
451
|
-
int hash_bits;
|
|
452
|
-
CacheMetadataChargePolicy metadata_charge_policy;
|
|
453
|
-
// Vary the cache capacity, fix the element charge.
|
|
454
|
-
for (int i = 0; i < 2048; i++) {
|
|
455
|
-
capacity = i;
|
|
456
|
-
estimated_value_size = 0;
|
|
457
|
-
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
458
|
-
max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
|
|
459
|
-
metadata_charge_policy);
|
|
460
|
-
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
461
|
-
metadata_charge_policy);
|
|
462
|
-
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
463
|
-
}
|
|
464
|
-
// Fix the cache capacity, vary the element charge.
|
|
465
|
-
for (int i = 0; i < 1024; i++) {
|
|
466
|
-
capacity = 1024;
|
|
467
|
-
estimated_value_size = i;
|
|
468
|
-
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
469
|
-
max_occupancy = CalcMaxOccupancy(capacity, estimated_value_size,
|
|
470
|
-
metadata_charge_policy);
|
|
471
|
-
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
472
|
-
metadata_charge_policy);
|
|
473
|
-
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
474
|
-
}
|
|
475
|
-
// Zero-capacity cache, and only values have charge.
|
|
476
|
-
capacity = 0;
|
|
477
|
-
estimated_value_size = 1;
|
|
478
|
-
metadata_charge_policy = kDontChargeCacheMetadata;
|
|
479
|
-
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
480
|
-
metadata_charge_policy);
|
|
481
|
-
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
|
|
482
|
-
// Zero-capacity cache, and only metadata has charge.
|
|
483
|
-
capacity = 0;
|
|
484
|
-
estimated_value_size = 0;
|
|
485
|
-
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
486
|
-
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
487
|
-
metadata_charge_policy);
|
|
488
|
-
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
|
|
489
|
-
// Small cache, large elements.
|
|
490
|
-
capacity = 1024;
|
|
491
|
-
estimated_value_size = 8192;
|
|
492
|
-
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
493
|
-
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
494
|
-
metadata_charge_policy);
|
|
495
|
-
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, 0 /* max_occupancy */));
|
|
496
|
-
// Large capacity.
|
|
497
|
-
capacity = 31924172;
|
|
498
|
-
estimated_value_size = 8192;
|
|
499
|
-
metadata_charge_policy = kFullChargeCacheMetadata;
|
|
500
|
-
max_occupancy =
|
|
501
|
-
CalcMaxOccupancy(capacity, estimated_value_size, metadata_charge_policy);
|
|
502
|
-
hash_bits = CalcHashBitsWrapper(capacity, estimated_value_size,
|
|
503
|
-
metadata_charge_policy);
|
|
504
|
-
EXPECT_TRUE(TableSizeIsAppropriate(hash_bits, max_occupancy));
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
} // namespace fast_lru_cache
|
|
508
|
-
|
|
509
366
|
namespace clock_cache {
|
|
510
367
|
|
|
511
368
|
class ClockCacheTest : public testing::Test {
|
|
@@ -1275,14 +1132,19 @@ TEST_F(LRUCacheSecondaryCacheTest, BasicTest) {
|
|
|
1275
1132
|
nullptr /* memory_allocator */, kDefaultToAdaptiveMutex,
|
|
1276
1133
|
kDontChargeCacheMetadata);
|
|
1277
1134
|
std::shared_ptr<TestSecondaryCache> secondary_cache =
|
|
1278
|
-
std::make_shared<TestSecondaryCache>(
|
|
1135
|
+
std::make_shared<TestSecondaryCache>(4096);
|
|
1279
1136
|
opts.secondary_cache = secondary_cache;
|
|
1280
1137
|
std::shared_ptr<Cache> cache = NewLRUCache(opts);
|
|
1281
1138
|
std::shared_ptr<Statistics> stats = CreateDBStatistics();
|
|
1282
1139
|
CacheKey k1 = CacheKey::CreateUniqueForCacheLifetime(cache.get());
|
|
1283
1140
|
CacheKey k2 = CacheKey::CreateUniqueForCacheLifetime(cache.get());
|
|
1141
|
+
CacheKey k3 = CacheKey::CreateUniqueForCacheLifetime(cache.get());
|
|
1284
1142
|
|
|
1285
1143
|
Random rnd(301);
|
|
1144
|
+
// Start with warming k3
|
|
1145
|
+
std::string str3 = rnd.RandomString(1021);
|
|
1146
|
+
ASSERT_OK(secondary_cache->InsertSaved(k3.AsSlice(), str3));
|
|
1147
|
+
|
|
1286
1148
|
std::string str1 = rnd.RandomString(1020);
|
|
1287
1149
|
TestItem* item1 = new TestItem(str1.data(), str1.length());
|
|
1288
1150
|
ASSERT_OK(cache->Insert(k1.AsSlice(), item1,
|
|
@@ -1299,15 +1161,27 @@ TEST_F(LRUCacheSecondaryCacheTest, BasicTest) {
|
|
|
1299
1161
|
cache->Lookup(k2.AsSlice(), &LRUCacheSecondaryCacheTest::helper_,
|
|
1300
1162
|
test_item_creator, Cache::Priority::LOW, true, stats.get());
|
|
1301
1163
|
ASSERT_NE(handle, nullptr);
|
|
1164
|
+
ASSERT_EQ(static_cast<TestItem*>(cache->Value(handle))->Size(), str2.size());
|
|
1302
1165
|
cache->Release(handle);
|
|
1166
|
+
|
|
1303
1167
|
// This lookup should promote k1 and demote k2
|
|
1304
1168
|
handle =
|
|
1305
1169
|
cache->Lookup(k1.AsSlice(), &LRUCacheSecondaryCacheTest::helper_,
|
|
1306
1170
|
test_item_creator, Cache::Priority::LOW, true, stats.get());
|
|
1307
1171
|
ASSERT_NE(handle, nullptr);
|
|
1172
|
+
ASSERT_EQ(static_cast<TestItem*>(cache->Value(handle))->Size(), str1.size());
|
|
1308
1173
|
cache->Release(handle);
|
|
1309
|
-
|
|
1310
|
-
|
|
1174
|
+
|
|
1175
|
+
// This lookup should promote k3 and demote k1
|
|
1176
|
+
handle =
|
|
1177
|
+
cache->Lookup(k3.AsSlice(), &LRUCacheSecondaryCacheTest::helper_,
|
|
1178
|
+
test_item_creator, Cache::Priority::LOW, true, stats.get());
|
|
1179
|
+
ASSERT_NE(handle, nullptr);
|
|
1180
|
+
ASSERT_EQ(static_cast<TestItem*>(cache->Value(handle))->Size(), str3.size());
|
|
1181
|
+
cache->Release(handle);
|
|
1182
|
+
|
|
1183
|
+
ASSERT_EQ(secondary_cache->num_inserts(), 3u);
|
|
1184
|
+
ASSERT_EQ(secondary_cache->num_lookups(), 2u);
|
|
1311
1185
|
ASSERT_EQ(stats->getTickerCount(SECONDARY_CACHE_HITS),
|
|
1312
1186
|
secondary_cache->num_lookups());
|
|
1313
1187
|
PerfContext perf_ctx = *get_perf_context();
|
|
@@ -1888,6 +1762,43 @@ TEST_F(DBSecondaryCacheTest, SecondaryCacheFailureTest) {
|
|
|
1888
1762
|
Destroy(options);
|
|
1889
1763
|
}
|
|
1890
1764
|
|
|
1765
|
+
TEST_F(DBSecondaryCacheTest, TestSecondaryWithCompressedCache) {
|
|
1766
|
+
if (!Snappy_Supported()) {
|
|
1767
|
+
ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support");
|
|
1768
|
+
return;
|
|
1769
|
+
}
|
|
1770
|
+
LRUCacheOptions opts(2000 /* capacity */, 0 /* num_shard_bits */,
|
|
1771
|
+
false /* strict_capacity_limit */,
|
|
1772
|
+
0.5 /* high_pri_pool_ratio */,
|
|
1773
|
+
nullptr /* memory_allocator */, kDefaultToAdaptiveMutex,
|
|
1774
|
+
kDontChargeCacheMetadata);
|
|
1775
|
+
std::shared_ptr<TestSecondaryCache> secondary_cache(
|
|
1776
|
+
new TestSecondaryCache(2048 * 1024));
|
|
1777
|
+
opts.secondary_cache = secondary_cache;
|
|
1778
|
+
std::shared_ptr<Cache> cache = NewLRUCache(opts);
|
|
1779
|
+
BlockBasedTableOptions table_options;
|
|
1780
|
+
table_options.block_cache_compressed = cache;
|
|
1781
|
+
table_options.no_block_cache = true;
|
|
1782
|
+
table_options.block_size = 1234;
|
|
1783
|
+
Options options = GetDefaultOptions();
|
|
1784
|
+
options.compression = kSnappyCompression;
|
|
1785
|
+
options.create_if_missing = true;
|
|
1786
|
+
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
|
|
1787
|
+
DestroyAndReopen(options);
|
|
1788
|
+
Random rnd(301);
|
|
1789
|
+
const int N = 6;
|
|
1790
|
+
for (int i = 0; i < N; i++) {
|
|
1791
|
+
// Partly compressible
|
|
1792
|
+
std::string p_v = rnd.RandomString(507) + std::string(500, ' ');
|
|
1793
|
+
ASSERT_OK(Put(Key(i), p_v));
|
|
1794
|
+
}
|
|
1795
|
+
ASSERT_OK(Flush());
|
|
1796
|
+
for (int i = 0; i < 2 * N; i++) {
|
|
1797
|
+
std::string v = Get(Key(i % N));
|
|
1798
|
+
ASSERT_EQ(1007, v.size());
|
|
1799
|
+
}
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1891
1802
|
TEST_F(LRUCacheSecondaryCacheTest, BasicWaitAllTest) {
|
|
1892
1803
|
LRUCacheOptions opts(1024 /* capacity */, 2 /* num_shard_bits */,
|
|
1893
1804
|
false /* strict_capacity_limit */,
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
#include "rocksdb/secondary_cache.h"
|
|
7
|
+
|
|
8
|
+
#include "cache/cache_entry_roles.h"
|
|
9
|
+
|
|
10
|
+
namespace ROCKSDB_NAMESPACE {
|
|
11
|
+
|
|
12
|
+
namespace {
|
|
13
|
+
|
|
14
|
+
size_t SliceSize(void* obj) { return static_cast<Slice*>(obj)->size(); }
|
|
15
|
+
|
|
16
|
+
Status SliceSaveTo(void* from_obj, size_t from_offset, size_t length,
|
|
17
|
+
void* out) {
|
|
18
|
+
const Slice& slice = *static_cast<Slice*>(from_obj);
|
|
19
|
+
std::memcpy(out, slice.data() + from_offset, length);
|
|
20
|
+
return Status::OK();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
} // namespace
|
|
24
|
+
|
|
25
|
+
Status SecondaryCache::InsertSaved(const Slice& key, const Slice& saved) {
|
|
26
|
+
static Cache::CacheItemHelper helper{
|
|
27
|
+
&SliceSize, &SliceSaveTo, GetNoopDeleterForRole<CacheEntryRole::kMisc>()};
|
|
28
|
+
// NOTE: depends on Insert() being synchronous, not keeping pointer `&saved`
|
|
29
|
+
return Insert(key, const_cast<Slice*>(&saved), &helper);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -123,6 +123,10 @@ class BlobCountingIterator : public InternalIterator {
|
|
|
123
123
|
return iter_->GetProperty(prop_name, prop);
|
|
124
124
|
}
|
|
125
125
|
|
|
126
|
+
bool IsDeleteRangeSentinelKey() const override {
|
|
127
|
+
return iter_->IsDeleteRangeSentinelKey();
|
|
128
|
+
}
|
|
129
|
+
|
|
126
130
|
private:
|
|
127
131
|
void UpdateAndCountBlobIfNeeded() {
|
|
128
132
|
assert(!iter_->Valid() || iter_->status().ok());
|
|
@@ -130,6 +134,13 @@ class BlobCountingIterator : public InternalIterator {
|
|
|
130
134
|
if (!iter_->Valid()) {
|
|
131
135
|
status_ = iter_->status();
|
|
132
136
|
return;
|
|
137
|
+
} else if (iter_->IsDeleteRangeSentinelKey()) {
|
|
138
|
+
// CompactionMergingIterator emits range tombstones, and range tombstone
|
|
139
|
+
// keys can be truncated at file boundaries. This means the range
|
|
140
|
+
// tombstone keys can have op_type kTypeBlobIndex.
|
|
141
|
+
// This could crash the ProcessInFlow() call below since
|
|
142
|
+
// value is empty for these keys.
|
|
143
|
+
return;
|
|
133
144
|
}
|
|
134
145
|
|
|
135
146
|
TEST_SYNC_POINT(
|
|
@@ -565,7 +565,8 @@ ColumnFamilyData::ColumnFamilyData(
|
|
|
565
565
|
allow_2pc_(db_options.allow_2pc),
|
|
566
566
|
last_memtable_id_(0),
|
|
567
567
|
db_paths_registered_(false),
|
|
568
|
-
mempurge_used_(false)
|
|
568
|
+
mempurge_used_(false),
|
|
569
|
+
next_epoch_number_(1) {
|
|
569
570
|
if (id_ != kDummyColumnFamilyDataId) {
|
|
570
571
|
// TODO(cc): RegisterDbPaths can be expensive, considering moving it
|
|
571
572
|
// outside of this constructor which might be called with db mutex held.
|
|
@@ -1128,12 +1129,9 @@ bool ColumnFamilyData::NeedsCompaction() const {
|
|
|
1128
1129
|
Compaction* ColumnFamilyData::PickCompaction(
|
|
1129
1130
|
const MutableCFOptions& mutable_options,
|
|
1130
1131
|
const MutableDBOptions& mutable_db_options, LogBuffer* log_buffer) {
|
|
1131
|
-
SequenceNumber earliest_mem_seqno =
|
|
1132
|
-
std::min(mem_->GetEarliestSequenceNumber(),
|
|
1133
|
-
imm_.current()->GetEarliestSequenceNumber(false));
|
|
1134
1132
|
auto* result = compaction_picker_->PickCompaction(
|
|
1135
1133
|
GetName(), mutable_options, mutable_db_options, current_->storage_info(),
|
|
1136
|
-
log_buffer
|
|
1134
|
+
log_buffer);
|
|
1137
1135
|
if (result != nullptr) {
|
|
1138
1136
|
result->SetInputVersion(current_);
|
|
1139
1137
|
}
|
|
@@ -1212,14 +1210,11 @@ Compaction* ColumnFamilyData::CompactRange(
|
|
|
1212
1210
|
const InternalKey* begin, const InternalKey* end,
|
|
1213
1211
|
InternalKey** compaction_end, bool* conflict,
|
|
1214
1212
|
uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
|
|
1215
|
-
SequenceNumber earliest_mem_seqno =
|
|
1216
|
-
std::min(mem_->GetEarliestSequenceNumber(),
|
|
1217
|
-
imm_.current()->GetEarliestSequenceNumber(false));
|
|
1218
1213
|
auto* result = compaction_picker_->CompactRange(
|
|
1219
1214
|
GetName(), mutable_cf_options, mutable_db_options,
|
|
1220
1215
|
current_->storage_info(), input_level, output_level,
|
|
1221
1216
|
compact_range_options, begin, end, compaction_end, conflict,
|
|
1222
|
-
max_file_num_to_ignore, trim_ts
|
|
1217
|
+
max_file_num_to_ignore, trim_ts);
|
|
1223
1218
|
if (result != nullptr) {
|
|
1224
1219
|
result->SetInputVersion(current_);
|
|
1225
1220
|
}
|
|
@@ -1523,6 +1518,13 @@ FSDirectory* ColumnFamilyData::GetDataDir(size_t path_id) const {
|
|
|
1523
1518
|
return data_dirs_[path_id].get();
|
|
1524
1519
|
}
|
|
1525
1520
|
|
|
1521
|
+
void ColumnFamilyData::RecoverEpochNumbers() {
|
|
1522
|
+
assert(current_);
|
|
1523
|
+
auto* vstorage = current_->storage_info();
|
|
1524
|
+
assert(vstorage);
|
|
1525
|
+
vstorage->RecoverEpochNumbers(this);
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1526
1528
|
ColumnFamilySet::ColumnFamilySet(const std::string& dbname,
|
|
1527
1529
|
const ImmutableDBOptions* db_options,
|
|
1528
1530
|
const FileOptions& file_options,
|
|
@@ -533,6 +533,24 @@ class ColumnFamilyData {
|
|
|
533
533
|
void SetMempurgeUsed() { mempurge_used_ = true; }
|
|
534
534
|
bool GetMempurgeUsed() { return mempurge_used_; }
|
|
535
535
|
|
|
536
|
+
// Allocate and return a new epoch number
|
|
537
|
+
uint64_t NewEpochNumber() { return next_epoch_number_.fetch_add(1); }
|
|
538
|
+
|
|
539
|
+
// Get the next epoch number to be assigned
|
|
540
|
+
uint64_t GetNextEpochNumber() const { return next_epoch_number_.load(); }
|
|
541
|
+
|
|
542
|
+
// Set the next epoch number to be assigned
|
|
543
|
+
void SetNextEpochNumber(uint64_t next_epoch_number) {
|
|
544
|
+
next_epoch_number_.store(next_epoch_number);
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Reset the next epoch number to be assigned
|
|
548
|
+
void ResetNextEpochNumber() { next_epoch_number_.store(1); }
|
|
549
|
+
|
|
550
|
+
// Recover the next epoch number of this CF and epoch number
|
|
551
|
+
// of its files (if missing)
|
|
552
|
+
void RecoverEpochNumbers();
|
|
553
|
+
|
|
536
554
|
private:
|
|
537
555
|
friend class ColumnFamilySet;
|
|
538
556
|
ColumnFamilyData(uint32_t id, const std::string& name,
|
|
@@ -634,6 +652,8 @@ class ColumnFamilyData {
|
|
|
634
652
|
// a Version associated with this CFD
|
|
635
653
|
std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
|
|
636
654
|
bool mempurge_used_;
|
|
655
|
+
|
|
656
|
+
std::atomic<uint64_t> next_epoch_number_;
|
|
637
657
|
};
|
|
638
658
|
|
|
639
659
|
// ColumnFamilySet has interesting thread-safety requirements
|
|
@@ -188,6 +188,11 @@ class ClippingIterator : public InternalIterator {
|
|
|
188
188
|
return iter_->GetProperty(prop_name, prop);
|
|
189
189
|
}
|
|
190
190
|
|
|
191
|
+
bool IsDeleteRangeSentinelKey() const override {
|
|
192
|
+
assert(valid_);
|
|
193
|
+
return iter_->IsDeleteRangeSentinelKey();
|
|
194
|
+
}
|
|
195
|
+
|
|
191
196
|
private:
|
|
192
197
|
void UpdateValid() {
|
|
193
198
|
assert(!iter_->Valid() || iter_->status().ok());
|
|
@@ -20,9 +20,6 @@
|
|
|
20
20
|
|
|
21
21
|
namespace ROCKSDB_NAMESPACE {
|
|
22
22
|
|
|
23
|
-
const uint64_t kRangeTombstoneSentinel =
|
|
24
|
-
PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
|
|
25
|
-
|
|
26
23
|
int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
|
|
27
24
|
const InternalKey& b) {
|
|
28
25
|
auto c = user_cmp->CompareWithoutTimestamp(a.user_key(), b.user_key());
|
|
@@ -332,6 +329,7 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
332
329
|
// the case that the penultimate level is empty).
|
|
333
330
|
if (immutable_options_.compaction_style == kCompactionStyleUniversal) {
|
|
334
331
|
exclude_level = kInvalidLevel;
|
|
332
|
+
penultimate_output_range_type_ = PenultimateOutputRangeType::kFullRange;
|
|
335
333
|
std::set<uint64_t> penultimate_inputs;
|
|
336
334
|
for (const auto& input_lvl : inputs_) {
|
|
337
335
|
if (input_lvl.level == penultimate_level_) {
|
|
@@ -345,7 +343,8 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
345
343
|
if (penultimate_inputs.find(file->fd.GetNumber()) ==
|
|
346
344
|
penultimate_inputs.end()) {
|
|
347
345
|
exclude_level = number_levels_ - 1;
|
|
348
|
-
penultimate_output_range_type_ =
|
|
346
|
+
penultimate_output_range_type_ =
|
|
347
|
+
PenultimateOutputRangeType::kNonLastRange;
|
|
349
348
|
break;
|
|
350
349
|
}
|
|
351
350
|
}
|
|
@@ -354,35 +353,6 @@ void Compaction::PopulatePenultimateLevelOutputRange() {
|
|
|
354
353
|
GetBoundaryKeys(input_vstorage_, inputs_,
|
|
355
354
|
&penultimate_level_smallest_user_key_,
|
|
356
355
|
&penultimate_level_largest_user_key_, exclude_level);
|
|
357
|
-
|
|
358
|
-
// If there's a case that the penultimate level output range is overlapping
|
|
359
|
-
// with the existing files, disable the penultimate level output by setting
|
|
360
|
-
// the range to empty. One example is the range delete could have overlap
|
|
361
|
-
// boundary with the next file. (which is actually a false overlap)
|
|
362
|
-
// TODO: Exclude such false overlap, so it won't disable the penultimate
|
|
363
|
-
// output.
|
|
364
|
-
std::set<uint64_t> penultimate_inputs;
|
|
365
|
-
for (const auto& input_lvl : inputs_) {
|
|
366
|
-
if (input_lvl.level == penultimate_level_) {
|
|
367
|
-
for (const auto& file : input_lvl.files) {
|
|
368
|
-
penultimate_inputs.emplace(file->fd.GetNumber());
|
|
369
|
-
}
|
|
370
|
-
}
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
auto penultimate_files = input_vstorage_->LevelFiles(penultimate_level_);
|
|
374
|
-
for (const auto& file : penultimate_files) {
|
|
375
|
-
if (penultimate_inputs.find(file->fd.GetNumber()) ==
|
|
376
|
-
penultimate_inputs.end() &&
|
|
377
|
-
OverlapPenultimateLevelOutputRange(file->smallest.user_key(),
|
|
378
|
-
file->largest.user_key())) {
|
|
379
|
-
// basically disable the penultimate range output. which should be rare
|
|
380
|
-
// or a false overlap caused by range del
|
|
381
|
-
penultimate_level_smallest_user_key_ = "";
|
|
382
|
-
penultimate_level_largest_user_key_ = "";
|
|
383
|
-
penultimate_output_range_type_ = PenultimateOutputRangeType::kDisabled;
|
|
384
|
-
}
|
|
385
|
-
}
|
|
386
356
|
}
|
|
387
357
|
|
|
388
358
|
Compaction::~Compaction() {
|
|
@@ -807,6 +777,16 @@ uint64_t Compaction::MinInputFileOldestAncesterTime(
|
|
|
807
777
|
return min_oldest_ancester_time;
|
|
808
778
|
}
|
|
809
779
|
|
|
780
|
+
uint64_t Compaction::MinInputFileEpochNumber() const {
|
|
781
|
+
uint64_t min_epoch_number = std::numeric_limits<uint64_t>::max();
|
|
782
|
+
for (const auto& inputs_per_level : inputs_) {
|
|
783
|
+
for (const auto& file : inputs_per_level.files) {
|
|
784
|
+
min_epoch_number = std::min(min_epoch_number, file->epoch_number);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
return min_epoch_number;
|
|
788
|
+
}
|
|
789
|
+
|
|
810
790
|
int Compaction::EvaluatePenultimateLevel(
|
|
811
791
|
const VersionStorageInfo* vstorage,
|
|
812
792
|
const ImmutableOptions& immutable_options, const int start_level,
|
|
@@ -18,6 +18,8 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
18
18
|
// The file contains class Compaction, as well as some helper functions
|
|
19
19
|
// and data structures used by the class.
|
|
20
20
|
|
|
21
|
+
const uint64_t kRangeTombstoneSentinel =
|
|
22
|
+
PackSequenceAndType(kMaxSequenceNumber, kTypeRangeDeletion);
|
|
21
23
|
// Utility for comparing sstable boundary keys. Returns -1 if either a or b is
|
|
22
24
|
// null which provides the property that a==null indicates a key that is less
|
|
23
25
|
// than any key and b==null indicates a key that is greater than any key. Note
|
|
@@ -378,6 +380,9 @@ class Compaction {
|
|
|
378
380
|
// This is used to filter out some input files' ancester's time range.
|
|
379
381
|
uint64_t MinInputFileOldestAncesterTime(const InternalKey* start,
|
|
380
382
|
const InternalKey* end) const;
|
|
383
|
+
// Return the minimum epoch number among
|
|
384
|
+
// input files' associated with this compaction
|
|
385
|
+
uint64_t MinInputFileEpochNumber() const;
|
|
381
386
|
|
|
382
387
|
// Called by DBImpl::NotifyOnCompactionCompleted to make sure number of
|
|
383
388
|
// compaction begin and compaction completion callbacks match.
|
|
@@ -377,6 +377,7 @@ void CompactionIterator::NextFromInput() {
|
|
|
377
377
|
value_ = input_.value();
|
|
378
378
|
blob_value_.Reset();
|
|
379
379
|
iter_stats_.num_input_records++;
|
|
380
|
+
is_range_del_ = input_.IsDeleteRangeSentinelKey();
|
|
380
381
|
|
|
381
382
|
Status pik_status = ParseInternalKey(key_, &ikey_, allow_data_in_errors_);
|
|
382
383
|
if (!pik_status.ok()) {
|
|
@@ -396,7 +397,10 @@ void CompactionIterator::NextFromInput() {
|
|
|
396
397
|
break;
|
|
397
398
|
}
|
|
398
399
|
TEST_SYNC_POINT_CALLBACK("CompactionIterator:ProcessKV", &ikey_);
|
|
399
|
-
|
|
400
|
+
if (is_range_del_) {
|
|
401
|
+
validity_info_.SetValid(kRangeDeletion);
|
|
402
|
+
break;
|
|
403
|
+
}
|
|
400
404
|
// Update input statistics
|
|
401
405
|
if (ikey_.type == kTypeDeletion || ikey_.type == kTypeSingleDeletion ||
|
|
402
406
|
ikey_.type == kTypeDeletionWithTimestamp) {
|
|
@@ -618,6 +622,14 @@ void CompactionIterator::NextFromInput() {
|
|
|
618
622
|
|
|
619
623
|
ParsedInternalKey next_ikey;
|
|
620
624
|
AdvanceInputIter();
|
|
625
|
+
while (input_.Valid() && input_.IsDeleteRangeSentinelKey() &&
|
|
626
|
+
ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
|
|
627
|
+
.ok() &&
|
|
628
|
+
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
|
|
629
|
+
// skip range tombstone start keys with the same user key
|
|
630
|
+
// since they are not "real" point keys.
|
|
631
|
+
AdvanceInputIter();
|
|
632
|
+
}
|
|
621
633
|
|
|
622
634
|
// Check whether the next key exists, is not corrupt, and is the same key
|
|
623
635
|
// as the single delete.
|
|
@@ -625,6 +637,7 @@ void CompactionIterator::NextFromInput() {
|
|
|
625
637
|
ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
|
|
626
638
|
.ok() &&
|
|
627
639
|
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key)) {
|
|
640
|
+
assert(!input_.IsDeleteRangeSentinelKey());
|
|
628
641
|
#ifndef NDEBUG
|
|
629
642
|
const Compaction* c =
|
|
630
643
|
compaction_ ? compaction_->real_compaction() : nullptr;
|
|
@@ -849,12 +862,14 @@ void CompactionIterator::NextFromInput() {
|
|
|
849
862
|
// Note that a deletion marker of type kTypeDeletionWithTimestamp will be
|
|
850
863
|
// considered to have a different user key unless the timestamp is older
|
|
851
864
|
// than *full_history_ts_low_.
|
|
865
|
+
//
|
|
866
|
+
// Range tombstone start keys are skipped as they are not "real" keys.
|
|
852
867
|
while (!IsPausingManualCompaction() && !IsShuttingDown() &&
|
|
853
868
|
input_.Valid() &&
|
|
854
869
|
(ParseInternalKey(input_.key(), &next_ikey, allow_data_in_errors_)
|
|
855
870
|
.ok()) &&
|
|
856
871
|
cmp_->EqualWithoutTimestamp(ikey_.user_key, next_ikey.user_key) &&
|
|
857
|
-
(prev_snapshot == 0 ||
|
|
872
|
+
(prev_snapshot == 0 || input_.IsDeleteRangeSentinelKey() ||
|
|
858
873
|
DefinitelyNotInSnapshot(next_ikey.sequence, prev_snapshot))) {
|
|
859
874
|
AdvanceInputIter();
|
|
860
875
|
}
|
|
@@ -1105,7 +1120,9 @@ void CompactionIterator::DecideOutputLevel() {
|
|
|
1105
1120
|
TEST_SYNC_POINT_CALLBACK("CompactionIterator::PrepareOutput.context",
|
|
1106
1121
|
&context);
|
|
1107
1122
|
output_to_penultimate_level_ = context.output_to_penultimate_level;
|
|
1108
|
-
#
|
|
1123
|
+
#else
|
|
1124
|
+
output_to_penultimate_level_ = false;
|
|
1125
|
+
#endif // NDEBUG
|
|
1109
1126
|
|
|
1110
1127
|
// if the key is newer than the cutoff sequence or within the earliest
|
|
1111
1128
|
// snapshot, it should output to the penultimate level.
|
|
@@ -1145,10 +1162,12 @@ void CompactionIterator::DecideOutputLevel() {
|
|
|
1145
1162
|
|
|
1146
1163
|
void CompactionIterator::PrepareOutput() {
|
|
1147
1164
|
if (Valid()) {
|
|
1148
|
-
if (
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1165
|
+
if (LIKELY(!is_range_del_)) {
|
|
1166
|
+
if (ikey_.type == kTypeValue) {
|
|
1167
|
+
ExtractLargeValueIfNeeded();
|
|
1168
|
+
} else if (ikey_.type == kTypeBlobIndex) {
|
|
1169
|
+
GarbageCollectBlobIfNeeded();
|
|
1170
|
+
}
|
|
1152
1171
|
}
|
|
1153
1172
|
|
|
1154
1173
|
if (compaction_ != nullptr && compaction_->SupportsPerKeyPlacement()) {
|
|
@@ -1171,7 +1190,7 @@ void CompactionIterator::PrepareOutput() {
|
|
|
1171
1190
|
DefinitelyInSnapshot(ikey_.sequence, earliest_snapshot_) &&
|
|
1172
1191
|
ikey_.type != kTypeMerge && current_key_committed_ &&
|
|
1173
1192
|
!output_to_penultimate_level_ &&
|
|
1174
|
-
ikey_.sequence < preserve_time_min_seqno_) {
|
|
1193
|
+
ikey_.sequence < preserve_time_min_seqno_ && !is_range_del_) {
|
|
1175
1194
|
if (ikey_.type == kTypeDeletion ||
|
|
1176
1195
|
(ikey_.type == kTypeSingleDeletion && timestamp_size_ == 0)) {
|
|
1177
1196
|
ROCKS_LOG_FATAL(
|
|
@@ -63,6 +63,10 @@ class SequenceIterWrapper : public InternalIterator {
|
|
|
63
63
|
void SeekToLast() override { assert(false); }
|
|
64
64
|
|
|
65
65
|
uint64_t num_itered() const { return num_itered_; }
|
|
66
|
+
bool IsDeleteRangeSentinelKey() const override {
|
|
67
|
+
assert(Valid());
|
|
68
|
+
return inner_iter_->IsDeleteRangeSentinelKey();
|
|
69
|
+
}
|
|
66
70
|
|
|
67
71
|
private:
|
|
68
72
|
InternalKeyComparator icmp_;
|
|
@@ -242,7 +246,12 @@ class CompactionIterator {
|
|
|
242
246
|
const Status& status() const { return status_; }
|
|
243
247
|
const ParsedInternalKey& ikey() const { return ikey_; }
|
|
244
248
|
inline bool Valid() const { return validity_info_.IsValid(); }
|
|
245
|
-
const Slice& user_key() const {
|
|
249
|
+
const Slice& user_key() const {
|
|
250
|
+
if (UNLIKELY(is_range_del_)) {
|
|
251
|
+
return ikey_.user_key;
|
|
252
|
+
}
|
|
253
|
+
return current_user_key_;
|
|
254
|
+
}
|
|
246
255
|
const CompactionIterationStats& iter_stats() const { return iter_stats_; }
|
|
247
256
|
uint64_t num_input_entry_scanned() const { return input_.num_itered(); }
|
|
248
257
|
// If the current key should be placed on penultimate level, only valid if
|
|
@@ -252,6 +261,8 @@ class CompactionIterator {
|
|
|
252
261
|
}
|
|
253
262
|
Status InputStatus() const { return input_.status(); }
|
|
254
263
|
|
|
264
|
+
bool IsDeleteRangeSentinelKey() const { return is_range_del_; }
|
|
265
|
+
|
|
255
266
|
private:
|
|
256
267
|
// Processes the input stream to find the next output
|
|
257
268
|
void NextFromInput();
|
|
@@ -385,6 +396,7 @@ class CompactionIterator {
|
|
|
385
396
|
kKeepSD = 8,
|
|
386
397
|
kKeepDel = 9,
|
|
387
398
|
kNewUserKey = 10,
|
|
399
|
+
kRangeDeletion = 11,
|
|
388
400
|
};
|
|
389
401
|
|
|
390
402
|
struct ValidityInfo {
|
|
@@ -492,6 +504,10 @@ class CompactionIterator {
|
|
|
492
504
|
// This is a best-effort facility, so memory_order_relaxed is sufficient.
|
|
493
505
|
return manual_compaction_canceled_.load(std::memory_order_relaxed);
|
|
494
506
|
}
|
|
507
|
+
|
|
508
|
+
// Stores whether the current compaction iterator output
|
|
509
|
+
// is a range tombstone start key.
|
|
510
|
+
bool is_range_del_{false};
|
|
495
511
|
};
|
|
496
512
|
|
|
497
513
|
inline bool CompactionIterator::DefinitelyInSnapshot(SequenceNumber seq,
|
|
@@ -1286,7 +1286,6 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1286
1286
|
while (status.ok() && !cfd->IsDropped() && c_iter->Valid()) {
|
|
1287
1287
|
// Invariant: c_iter.status() is guaranteed to be OK if c_iter->Valid()
|
|
1288
1288
|
// returns true.
|
|
1289
|
-
|
|
1290
1289
|
assert(!end.has_value() || cfd->user_comparator()->Compare(
|
|
1291
1290
|
c_iter->user_key(), end.value()) < 0);
|
|
1292
1291
|
|
|
@@ -1834,12 +1833,14 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
|
|
|
1834
1833
|
}
|
|
1835
1834
|
|
|
1836
1835
|
// Initialize a SubcompactionState::Output and add it to sub_compact->outputs
|
|
1836
|
+
uint64_t epoch_number = sub_compact->compaction->MinInputFileEpochNumber();
|
|
1837
1837
|
{
|
|
1838
1838
|
FileMetaData meta;
|
|
1839
1839
|
meta.fd = FileDescriptor(file_number,
|
|
1840
1840
|
sub_compact->compaction->output_path_id(), 0);
|
|
1841
1841
|
meta.oldest_ancester_time = oldest_ancester_time;
|
|
1842
1842
|
meta.file_creation_time = current_time;
|
|
1843
|
+
meta.epoch_number = epoch_number;
|
|
1843
1844
|
meta.temperature = temperature;
|
|
1844
1845
|
assert(!db_id_.empty());
|
|
1845
1846
|
assert(!db_session_id_.empty());
|