@nxtedition/rocksdb 12.1.3 → 12.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +12 -13
- package/binding.gyp +0 -4
- package/deps/rocksdb/rocksdb/Makefile +10 -5
- package/deps/rocksdb/rocksdb/TARGETS +9 -7
- package/deps/rocksdb/rocksdb/cache/cache.cc +15 -11
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +26 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +16 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +6 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +38 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +4 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +11 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +6 -0
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +56 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +9 -0
- package/deps/rocksdb/rocksdb/db/c.cc +9 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +12 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +6 -23
- package/deps/rocksdb/rocksdb/db/column_family.h +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +4 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +14 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +19 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +34 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +16 -31
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +7 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +95 -84
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +616 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +8 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +93 -69
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +353 -89
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +4 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +116 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +67 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +42 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +50 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +79 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +36 -59
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +72 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -12
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +75 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +36 -22
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +23 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +28 -3
- package/deps/rocksdb/rocksdb/db/error_handler.h +2 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +165 -33
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +37 -28
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -6
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -6
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -6
- package/deps/rocksdb/rocksdb/db/job_context.h +4 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +24 -14
- package/deps/rocksdb/rocksdb/db/memtable.h +2 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +61 -33
- package/deps/rocksdb/rocksdb/db/memtable_list.h +8 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -2
- package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
- package/deps/rocksdb/rocksdb/db/version_builder.cc +14 -11
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +20 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +40 -30
- package/deps/rocksdb/rocksdb/db/version_set.h +13 -3
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -76
- package/deps/rocksdb/rocksdb/db/write_batch.cc +6 -2
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +25 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +11 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.h +5 -0
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +11 -1
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +10 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +30 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +10 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +287 -83
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +68 -36
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +4 -4
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +31 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +14 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +15 -4
- package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +5 -4
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +38 -45
- package/deps/rocksdb/rocksdb/port/port.h +16 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +8 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +10 -20
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -9
- package/deps/rocksdb/rocksdb/table/format.cc +32 -4
- package/deps/rocksdb/rocksdb/table/format.h +12 -1
- package/deps/rocksdb/rocksdb/table/iterator.cc +4 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +214 -161
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +4 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +2 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -22
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +3 -0
- package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +3 -3
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -65,9 +65,8 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
|
|
|
65
65
|
valid_(false),
|
|
66
66
|
current_entry_is_merged_(false),
|
|
67
67
|
is_key_seqnum_zero_(false),
|
|
68
|
-
prefix_same_as_start_(
|
|
69
|
-
|
|
70
|
-
: false),
|
|
68
|
+
prefix_same_as_start_(
|
|
69
|
+
prefix_extractor_ ? read_options.prefix_same_as_start : false),
|
|
71
70
|
pin_thru_lifetime_(read_options.pin_data),
|
|
72
71
|
expect_total_order_inner_iter_(prefix_extractor_ == nullptr ||
|
|
73
72
|
read_options.total_order_seek ||
|
|
@@ -93,6 +92,9 @@ DBIter::DBIter(Env* _env, const ReadOptions& read_options,
|
|
|
93
92
|
status_.PermitUncheckedError();
|
|
94
93
|
assert(timestamp_size_ ==
|
|
95
94
|
user_comparator_.user_comparator()->timestamp_size());
|
|
95
|
+
// prefix_seek_opt_in_only should force total_order_seek whereever the caller
|
|
96
|
+
// is duplicating the original ReadOptions
|
|
97
|
+
assert(!ioptions.prefix_seek_opt_in_only || read_options.total_order_seek);
|
|
96
98
|
}
|
|
97
99
|
|
|
98
100
|
Status DBIter::GetProperty(std::string prop_name, std::string* prop) {
|
|
@@ -538,6 +540,8 @@ bool DBIter::FindNextUserEntryInternal(bool skipping_saved_key,
|
|
|
538
540
|
} else {
|
|
539
541
|
iter_.Next();
|
|
540
542
|
}
|
|
543
|
+
// This could be a long-running operation due to tombstones, etc.
|
|
544
|
+
ROCKSDB_THREAD_YIELD_HOOK();
|
|
541
545
|
} while (iter_.Valid());
|
|
542
546
|
|
|
543
547
|
valid_ = false;
|
|
@@ -244,7 +244,7 @@ TEST_F(DBSecondaryTest, SimpleInternalCompaction) {
|
|
|
244
244
|
ASSERT_EQ(largest.user_key().ToString(), "foo");
|
|
245
245
|
ASSERT_EQ(result.output_level, 1);
|
|
246
246
|
ASSERT_EQ(result.output_path, this->secondary_path_);
|
|
247
|
-
ASSERT_EQ(result.num_output_records, 2);
|
|
247
|
+
ASSERT_EQ(result.stats.num_output_records, 2);
|
|
248
248
|
ASSERT_GT(result.bytes_written, 0);
|
|
249
249
|
ASSERT_OK(result.status);
|
|
250
250
|
}
|
|
@@ -383,12 +383,16 @@ TEST_F(DBSSTTest, DBWithSstFileManager) {
|
|
|
383
383
|
ASSERT_EQ(files_moved, 0);
|
|
384
384
|
|
|
385
385
|
Close();
|
|
386
|
+
ASSERT_EQ(sfm->GetTrackedFiles().size(), 0) << "sfm should be empty";
|
|
387
|
+
ASSERT_EQ(sfm->GetTotalSize(), 0) << "sfm should be empty";
|
|
386
388
|
Reopen(options);
|
|
387
389
|
ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
|
|
388
390
|
ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
|
|
389
391
|
|
|
390
392
|
// Verify that we track all the files again after the DB is closed and opened
|
|
391
393
|
Close();
|
|
394
|
+
ASSERT_EQ(sfm->GetTrackedFiles().size(), 0) << "sfm should be empty";
|
|
395
|
+
ASSERT_EQ(sfm->GetTotalSize(), 0) << "sfm should be empty";
|
|
392
396
|
sst_file_manager.reset(NewSstFileManager(env_));
|
|
393
397
|
options.sst_file_manager = sst_file_manager;
|
|
394
398
|
sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
|
|
@@ -439,6 +443,11 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) {
|
|
|
439
443
|
|
|
440
444
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
441
445
|
"SstFileManagerImpl::OnMoveFile", [&](void* /*arg*/) { files_moved++; });
|
|
446
|
+
|
|
447
|
+
int64_t untracked_files = 0;
|
|
448
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
449
|
+
"SstFileManagerImpl::OnUntrackFile",
|
|
450
|
+
[&](void* /*arg*/) { ++untracked_files; });
|
|
442
451
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
|
|
443
452
|
|
|
444
453
|
Options options = CurrentOptions();
|
|
@@ -485,6 +494,10 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) {
|
|
|
485
494
|
}
|
|
486
495
|
ASSERT_EQ(sfm->GetTotalSize(), total_files_size);
|
|
487
496
|
Close();
|
|
497
|
+
ASSERT_EQ(untracked_files, files_in_db.size());
|
|
498
|
+
untracked_files = 0;
|
|
499
|
+
ASSERT_EQ(sfm->GetTrackedFiles().size(), 0) << "sfm should be empty";
|
|
500
|
+
ASSERT_EQ(sfm->GetTotalSize(), 0) << "sfm should be empty";
|
|
488
501
|
|
|
489
502
|
Reopen(options);
|
|
490
503
|
ASSERT_EQ(sfm->GetTrackedFiles(), files_in_db);
|
|
@@ -492,6 +505,10 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) {
|
|
|
492
505
|
|
|
493
506
|
// Verify that we track all the files again after the DB is closed and opened.
|
|
494
507
|
Close();
|
|
508
|
+
ASSERT_EQ(untracked_files, files_in_db.size());
|
|
509
|
+
untracked_files = 0;
|
|
510
|
+
ASSERT_EQ(sfm->GetTrackedFiles().size(), 0) << "sfm should be empty";
|
|
511
|
+
ASSERT_EQ(sfm->GetTotalSize(), 0) << "sfm should be empty";
|
|
495
512
|
|
|
496
513
|
sst_file_manager.reset(NewSstFileManager(env_));
|
|
497
514
|
options.sst_file_manager = sst_file_manager;
|
|
@@ -507,6 +524,10 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFiles) {
|
|
|
507
524
|
ASSERT_EQ(files_deleted, 0);
|
|
508
525
|
ASSERT_EQ(files_scheduled_to_delete, 0);
|
|
509
526
|
Close();
|
|
527
|
+
ASSERT_EQ(untracked_files, files_in_db.size());
|
|
528
|
+
untracked_files = 0;
|
|
529
|
+
ASSERT_EQ(sfm->GetTrackedFiles().size(), 0) << "sfm should be empty";
|
|
530
|
+
ASSERT_EQ(sfm->GetTotalSize(), 0) << "sfm should be empty";
|
|
510
531
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
511
532
|
"SstFileManagerImpl::ScheduleUnaccountedFileDeletion", [&](void* arg) {
|
|
512
533
|
assert(arg);
|
|
@@ -666,6 +687,9 @@ TEST_F(DBSSTTest, DBWithSstFileManagerForBlobFilesWithGC) {
|
|
|
666
687
|
}
|
|
667
688
|
|
|
668
689
|
Close();
|
|
690
|
+
ASSERT_EQ(sfm->GetTrackedFiles().size(), 0) << "sfm should be empty";
|
|
691
|
+
ASSERT_EQ(sfm->GetTotalSize(), 0) << "sfm should be empty";
|
|
692
|
+
|
|
669
693
|
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
|
|
670
694
|
"SstFileManagerImpl::ScheduleUnaccountedFileDeletion", [&](void* arg) {
|
|
671
695
|
assert(arg);
|
|
@@ -5597,32 +5597,45 @@ TEST_F(DBTest2, PrefixBloomFilteredOut) {
|
|
|
5597
5597
|
bbto.filter_policy.reset(NewBloomFilterPolicy(10, false));
|
|
5598
5598
|
bbto.whole_key_filtering = false;
|
|
5599
5599
|
options.table_factory.reset(NewBlockBasedTableFactory(bbto));
|
|
5600
|
-
DestroyAndReopen(options);
|
|
5601
5600
|
|
|
5602
|
-
//
|
|
5603
|
-
|
|
5604
|
-
|
|
5605
|
-
|
|
5606
|
-
ASSERT_OK(Flush());
|
|
5607
|
-
ASSERT_OK(Put("ddd0", ""));
|
|
5608
|
-
ASSERT_OK(Flush());
|
|
5609
|
-
CompactRangeOptions cro;
|
|
5610
|
-
cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip;
|
|
5611
|
-
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
5601
|
+
// This test is also the primary test for prefix_seek_opt_in_only
|
|
5602
|
+
for (bool opt_in : {false, true}) {
|
|
5603
|
+
options.prefix_seek_opt_in_only = opt_in;
|
|
5604
|
+
DestroyAndReopen(options);
|
|
5612
5605
|
|
|
5613
|
-
|
|
5614
|
-
|
|
5606
|
+
// Construct two L1 files with keys:
|
|
5607
|
+
// f1:[aaa1 ccc1] f2:[ddd0]
|
|
5608
|
+
ASSERT_OK(Put("aaa1", ""));
|
|
5609
|
+
ASSERT_OK(Put("ccc1", ""));
|
|
5610
|
+
ASSERT_OK(Flush());
|
|
5611
|
+
ASSERT_OK(Put("ddd0", ""));
|
|
5612
|
+
ASSERT_OK(Flush());
|
|
5613
|
+
CompactRangeOptions cro;
|
|
5614
|
+
cro.bottommost_level_compaction = BottommostLevelCompaction::kSkip;
|
|
5615
|
+
ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
|
|
5615
5616
|
|
|
5616
|
-
|
|
5617
|
-
|
|
5618
|
-
|
|
5619
|
-
|
|
5620
|
-
|
|
5621
|
-
iter->Seek("bbb1");
|
|
5622
|
-
ASSERT_OK(iter->status());
|
|
5623
|
-
ASSERT_FALSE(iter->Valid());
|
|
5617
|
+
ReadOptions ropts;
|
|
5618
|
+
for (bool same : {false, true}) {
|
|
5619
|
+
ropts.prefix_same_as_start = same;
|
|
5620
|
+
std::unique_ptr<Iterator> iter(db_->NewIterator(ropts));
|
|
5621
|
+
ASSERT_OK(iter->status());
|
|
5624
5622
|
|
|
5625
|
-
|
|
5623
|
+
iter->Seek("bbb1");
|
|
5624
|
+
ASSERT_OK(iter->status());
|
|
5625
|
+
if (opt_in && !same) {
|
|
5626
|
+
// Unbounded total order seek
|
|
5627
|
+
ASSERT_TRUE(iter->Valid());
|
|
5628
|
+
ASSERT_EQ(iter->key(), "ccc1");
|
|
5629
|
+
} else {
|
|
5630
|
+
// Bloom filter is filterd out by f1. When same == false, this is just
|
|
5631
|
+
// one valid position following the contract. Postioning to ccc1 or ddd0
|
|
5632
|
+
// is also valid. This is just to validate the behavior of the current
|
|
5633
|
+
// implementation. If underlying implementation changes, the test might
|
|
5634
|
+
// fail here.
|
|
5635
|
+
ASSERT_FALSE(iter->Valid());
|
|
5636
|
+
}
|
|
5637
|
+
}
|
|
5638
|
+
}
|
|
5626
5639
|
}
|
|
5627
5640
|
|
|
5628
5641
|
TEST_F(DBTest2, RowCacheSnapshot) {
|
|
@@ -5987,6 +6000,7 @@ TEST_F(DBTest2, ChangePrefixExtractor) {
|
|
|
5987
6000
|
// create a DB with block prefix index
|
|
5988
6001
|
BlockBasedTableOptions table_options;
|
|
5989
6002
|
Options options = CurrentOptions();
|
|
6003
|
+
options.prefix_seek_opt_in_only = false; // Use legacy prefix seek
|
|
5990
6004
|
|
|
5991
6005
|
// Sometimes filter is checked based on upper bound. Assert counters
|
|
5992
6006
|
// for that case. Otherwise, only check data correctness.
|
|
@@ -2931,6 +2931,29 @@ TEST_F(DBWALTest, RecoveryFlushSwitchWALOnEmptyMemtable) {
|
|
|
2931
2931
|
ASSERT_EQ("new_v", Get("k"));
|
|
2932
2932
|
Destroy(options);
|
|
2933
2933
|
}
|
|
2934
|
+
|
|
2935
|
+
TEST_F(DBWALTest, WALWriteErrorNoRecovery) {
|
|
2936
|
+
Options options = CurrentOptions();
|
|
2937
|
+
auto fault_fs = std::make_shared<FaultInjectionTestFS>(FileSystem::Default());
|
|
2938
|
+
std::unique_ptr<Env> fault_fs_env(NewCompositeEnv(fault_fs));
|
|
2939
|
+
options.env = fault_fs_env.get();
|
|
2940
|
+
options.manual_wal_flush = true;
|
|
2941
|
+
DestroyAndReopen(options);
|
|
2942
|
+
fault_fs->SetThreadLocalErrorContext(
|
|
2943
|
+
FaultInjectionIOType::kWrite, 7 /* seed*/, 1 /* one_in */,
|
|
2944
|
+
true /* retryable */, false /* has_data_loss*/);
|
|
2945
|
+
fault_fs->EnableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
|
|
2946
|
+
|
|
2947
|
+
ASSERT_OK(Put("k", "v"));
|
|
2948
|
+
Status s;
|
|
2949
|
+
s = db_->FlushWAL(false);
|
|
2950
|
+
ASSERT_TRUE(s.IsIOError());
|
|
2951
|
+
s = dbfull()->TEST_GetBGError();
|
|
2952
|
+
ASSERT_EQ(s.severity(), Status::Severity::kFatalError);
|
|
2953
|
+
ASSERT_FALSE(dbfull()->TEST_IsRecoveryInProgress());
|
|
2954
|
+
fault_fs->DisableThreadLocalErrorInjection(FaultInjectionIOType::kWrite);
|
|
2955
|
+
Destroy(options);
|
|
2956
|
+
}
|
|
2934
2957
|
} // namespace ROCKSDB_NAMESPACE
|
|
2935
2958
|
|
|
2936
2959
|
int main(int argc, char** argv) {
|
|
@@ -832,6 +832,7 @@ TEST_P(DBBasicTestWithTimestampTableOptions, GetAndMultiGet) {
|
|
|
832
832
|
|
|
833
833
|
TEST_P(DBBasicTestWithTimestampTableOptions, SeekWithPrefixLessThanKey) {
|
|
834
834
|
Options options = CurrentOptions();
|
|
835
|
+
options.prefix_seek_opt_in_only = false; // Use legacy prefix seek
|
|
835
836
|
options.env = env_;
|
|
836
837
|
options.create_if_missing = true;
|
|
837
838
|
options.prefix_extractor.reset(NewFixedPrefixTransform(3));
|
|
@@ -1009,6 +1010,7 @@ TEST_F(DBBasicTestWithTimestamp, ChangeIterationDirection) {
|
|
|
1009
1010
|
TestComparator test_cmp(kTimestampSize);
|
|
1010
1011
|
options.comparator = &test_cmp;
|
|
1011
1012
|
options.prefix_extractor.reset(NewFixedPrefixTransform(1));
|
|
1013
|
+
options.prefix_seek_opt_in_only = false; // Use legacy prefix seek
|
|
1012
1014
|
options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
|
|
1013
1015
|
DestroyAndReopen(options);
|
|
1014
1016
|
const std::vector<std::string> timestamps = {Timestamp(1, 1), Timestamp(0, 2),
|
|
@@ -381,7 +381,7 @@ void ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|
|
381
381
|
// BackgroundErrorReason reason) will be called to handle other error cases
|
|
382
382
|
// such as delegating to SstFileManager to handle no space error.
|
|
383
383
|
void ErrorHandler::SetBGError(const Status& bg_status,
|
|
384
|
-
BackgroundErrorReason reason) {
|
|
384
|
+
BackgroundErrorReason reason, bool wal_related) {
|
|
385
385
|
db_mutex_->AssertHeld();
|
|
386
386
|
Status tmp_status = bg_status;
|
|
387
387
|
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
|
|
@@ -389,8 +389,8 @@ void ErrorHandler::SetBGError(const Status& bg_status,
|
|
|
389
389
|
if (bg_io_err.ok()) {
|
|
390
390
|
return;
|
|
391
391
|
}
|
|
392
|
-
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
|
393
|
-
bg_io_err.ToString().c_str());
|
|
392
|
+
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s, reason %d",
|
|
393
|
+
bg_io_err.ToString().c_str(), static_cast<int>(reason));
|
|
394
394
|
|
|
395
395
|
RecordStats({ERROR_HANDLER_BG_ERROR_COUNT, ERROR_HANDLER_BG_IO_ERROR_COUNT},
|
|
396
396
|
{} /* int_histograms */);
|
|
@@ -412,6 +412,31 @@ void ErrorHandler::SetBGError(const Status& bg_status,
|
|
|
412
412
|
recover_context_ = context;
|
|
413
413
|
return;
|
|
414
414
|
}
|
|
415
|
+
if (wal_related) {
|
|
416
|
+
assert(reason == BackgroundErrorReason::kWriteCallback ||
|
|
417
|
+
reason == BackgroundErrorReason::kMemTable ||
|
|
418
|
+
reason == BackgroundErrorReason::kFlush);
|
|
419
|
+
}
|
|
420
|
+
if (db_options_.manual_wal_flush && wal_related && bg_io_err.IsIOError()) {
|
|
421
|
+
// With manual_wal_flush, a WAL write failure can drop buffered WAL writes.
|
|
422
|
+
// Memtables and WAL then become inconsistent. A successful memtable flush
|
|
423
|
+
// on one CF can cause CFs to be inconsistent upon restart. Before we fix
|
|
424
|
+
// the bug in auto recovery from WAL write failures that can flush one CF
|
|
425
|
+
// at a time, we set the error severity to fatal to disallow auto recovery.
|
|
426
|
+
// TODO: remove parameter `wal_related` once we can automatically recover
|
|
427
|
+
// from WAL write failures.
|
|
428
|
+
bool auto_recovery = false;
|
|
429
|
+
Status bg_err(new_bg_io_err, Status::Severity::kFatalError);
|
|
430
|
+
CheckAndSetRecoveryAndBGError(bg_err);
|
|
431
|
+
ROCKS_LOG_WARN(db_options_.info_log,
|
|
432
|
+
"ErrorHandler: A potentially WAL error happened, set "
|
|
433
|
+
"background IO error as fatal error\n");
|
|
434
|
+
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
|
435
|
+
&bg_err, db_mutex_, &auto_recovery);
|
|
436
|
+
recover_context_ = context;
|
|
437
|
+
return;
|
|
438
|
+
}
|
|
439
|
+
|
|
415
440
|
if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
|
|
416
441
|
(bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
|
|
417
442
|
bg_io_err.GetRetryable())) {
|
|
@@ -56,7 +56,8 @@ class ErrorHandler {
|
|
|
56
56
|
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
|
57
57
|
Status::Code code, Status::SubCode subcode);
|
|
58
58
|
|
|
59
|
-
void SetBGError(const Status& bg_err, BackgroundErrorReason reason
|
|
59
|
+
void SetBGError(const Status& bg_err, BackgroundErrorReason reason,
|
|
60
|
+
bool wal_related = false);
|
|
60
61
|
|
|
61
62
|
Status GetBGError() const { return bg_error_; }
|
|
62
63
|
|
|
@@ -124,6 +124,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished(
|
|
|
124
124
|
<< "comparator" << table_properties.comparator_name
|
|
125
125
|
<< "user_defined_timestamps_persisted"
|
|
126
126
|
<< table_properties.user_defined_timestamps_persisted
|
|
127
|
+
<< "key_largest_seqno" << table_properties.key_largest_seqno
|
|
127
128
|
<< "merge_operator" << table_properties.merge_operator_name
|
|
128
129
|
<< "prefix_extractor_name"
|
|
129
130
|
<< table_properties.prefix_extractor_name << "property_collectors"
|
|
@@ -152,6 +152,85 @@ Status UpdateManifestForFilesState(
|
|
|
152
152
|
// EXPERIMENTAL new filtering features
|
|
153
153
|
|
|
154
154
|
namespace {
|
|
155
|
+
template <size_t N>
|
|
156
|
+
class SemiStaticCappedKeySegmentsExtractor : public KeySegmentsExtractor {
|
|
157
|
+
public:
|
|
158
|
+
SemiStaticCappedKeySegmentsExtractor(const uint32_t* byte_widths) {
|
|
159
|
+
id_ = kName();
|
|
160
|
+
uint32_t prev_end = 0;
|
|
161
|
+
if constexpr (N > 0) { // Suppress a compiler warning
|
|
162
|
+
for (size_t i = 0; i < N; ++i) {
|
|
163
|
+
prev_end = prev_end + byte_widths[i];
|
|
164
|
+
ideal_ends_[i] = prev_end;
|
|
165
|
+
id_ += std::to_string(byte_widths[i]) + "b";
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
static const char* kName() { return "CappedKeySegmentsExtractor"; }
|
|
171
|
+
|
|
172
|
+
const char* Name() const override { return kName(); }
|
|
173
|
+
|
|
174
|
+
std::string GetId() const override { return id_; }
|
|
175
|
+
|
|
176
|
+
void Extract(const Slice& key_or_bound, KeyKind /*kind*/,
|
|
177
|
+
Result* result) const override {
|
|
178
|
+
// Optimistic assignment
|
|
179
|
+
result->segment_ends.assign(ideal_ends_.begin(), ideal_ends_.end());
|
|
180
|
+
if constexpr (N > 0) { // Suppress a compiler warning
|
|
181
|
+
uint32_t key_size = static_cast<uint32_t>(key_or_bound.size());
|
|
182
|
+
if (key_size < ideal_ends_.back()) {
|
|
183
|
+
// Need to fix up (should be rare)
|
|
184
|
+
for (size_t i = 0; i < N; ++i) {
|
|
185
|
+
result->segment_ends[i] = std::min(key_size, result->segment_ends[i]);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
private:
|
|
192
|
+
std::array<uint32_t, N> ideal_ends_;
|
|
193
|
+
std::string id_;
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
class DynamicCappedKeySegmentsExtractor : public KeySegmentsExtractor {
|
|
197
|
+
public:
|
|
198
|
+
DynamicCappedKeySegmentsExtractor(const std::vector<uint32_t>& byte_widths) {
|
|
199
|
+
id_ = kName();
|
|
200
|
+
uint32_t prev_end = 0;
|
|
201
|
+
for (size_t i = 0; i < byte_widths.size(); ++i) {
|
|
202
|
+
prev_end = prev_end + byte_widths[i];
|
|
203
|
+
ideal_ends_[i] = prev_end;
|
|
204
|
+
id_ += std::to_string(byte_widths[i]) + "b";
|
|
205
|
+
}
|
|
206
|
+
final_ideal_end_ = prev_end;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
static const char* kName() { return "CappedKeySegmentsExtractor"; }
|
|
210
|
+
|
|
211
|
+
const char* Name() const override { return kName(); }
|
|
212
|
+
|
|
213
|
+
std::string GetId() const override { return id_; }
|
|
214
|
+
|
|
215
|
+
void Extract(const Slice& key_or_bound, KeyKind /*kind*/,
|
|
216
|
+
Result* result) const override {
|
|
217
|
+
// Optimistic assignment
|
|
218
|
+
result->segment_ends = ideal_ends_;
|
|
219
|
+
uint32_t key_size = static_cast<uint32_t>(key_or_bound.size());
|
|
220
|
+
if (key_size < final_ideal_end_) {
|
|
221
|
+
// Need to fix up (should be rare)
|
|
222
|
+
for (size_t i = 0; i < ideal_ends_.size(); ++i) {
|
|
223
|
+
result->segment_ends[i] = std::min(key_size, result->segment_ends[i]);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
private:
|
|
229
|
+
std::vector<uint32_t> ideal_ends_;
|
|
230
|
+
uint32_t final_ideal_end_;
|
|
231
|
+
std::string id_;
|
|
232
|
+
};
|
|
233
|
+
|
|
155
234
|
void GetFilterInput(FilterInput select, const Slice& key,
|
|
156
235
|
const KeySegmentsExtractor::Result& extracted,
|
|
157
236
|
Slice* out_input, Slice* out_leadup) {
|
|
@@ -211,12 +290,6 @@ void GetFilterInput(FilterInput select, const Slice& key,
|
|
|
211
290
|
assert(false);
|
|
212
291
|
return Slice();
|
|
213
292
|
}
|
|
214
|
-
|
|
215
|
-
Slice operator()(SelectValue) {
|
|
216
|
-
// TODO
|
|
217
|
-
assert(false);
|
|
218
|
-
return Slice();
|
|
219
|
-
}
|
|
220
293
|
};
|
|
221
294
|
|
|
222
295
|
Slice input = std::visit(FilterInputGetter(key, extracted), select);
|
|
@@ -256,9 +329,6 @@ const char* DeserializeFilterInput(const char* p, const char* limit,
|
|
|
256
329
|
case 3:
|
|
257
330
|
*out = SelectColumnName{};
|
|
258
331
|
return p;
|
|
259
|
-
case 4:
|
|
260
|
-
*out = SelectValue{};
|
|
261
|
-
return p;
|
|
262
332
|
default:
|
|
263
333
|
// Reserved for future use
|
|
264
334
|
return nullptr;
|
|
@@ -315,7 +385,6 @@ void SerializeFilterInput(std::string* out, const FilterInput& select) {
|
|
|
315
385
|
void operator()(SelectLegacyKeyPrefix) { out->push_back(1); }
|
|
316
386
|
void operator()(SelectUserTimestamp) { out->push_back(2); }
|
|
317
387
|
void operator()(SelectColumnName) { out->push_back(3); }
|
|
318
|
-
void operator()(SelectValue) { out->push_back(4); }
|
|
319
388
|
void operator()(SelectKeySegment select) {
|
|
320
389
|
// TODO: expand supported cases
|
|
321
390
|
assert(select.segment_index < 16);
|
|
@@ -372,6 +441,7 @@ enum BuiltinSstQueryFilters : char {
|
|
|
372
441
|
// and filtered independently because it might be a special case that is
|
|
373
442
|
// not representative of the minimum in a spread of values.
|
|
374
443
|
kBytewiseMinMaxFilter = 0x10,
|
|
444
|
+
kRevBytewiseMinMaxFilter = 0x11,
|
|
375
445
|
};
|
|
376
446
|
|
|
377
447
|
class SstQueryFilterBuilder {
|
|
@@ -459,7 +529,10 @@ class CategoryScopeFilterWrapperBuilder : public SstQueryFilterBuilder {
|
|
|
459
529
|
|
|
460
530
|
class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
461
531
|
public:
|
|
462
|
-
|
|
532
|
+
explicit BytewiseMinMaxSstQueryFilterConfig(
|
|
533
|
+
const FilterInput& input,
|
|
534
|
+
const KeySegmentsExtractor::KeyCategorySet& categories, bool reverse)
|
|
535
|
+
: SstQueryFilterConfigImpl(input, categories), reverse_(reverse) {}
|
|
463
536
|
|
|
464
537
|
std::unique_ptr<SstQueryFilterBuilder> NewBuilder(
|
|
465
538
|
bool sanity_checks) const override {
|
|
@@ -477,11 +550,13 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
477
550
|
const KeySegmentsExtractor::Result& lower_bound_extracted,
|
|
478
551
|
const Slice& upper_bound_excl,
|
|
479
552
|
const KeySegmentsExtractor::Result& upper_bound_extracted) {
|
|
480
|
-
assert(!filter.empty() && filter[0] == kBytewiseMinMaxFilter
|
|
553
|
+
assert(!filter.empty() && (filter[0] == kBytewiseMinMaxFilter ||
|
|
554
|
+
filter[0] == kRevBytewiseMinMaxFilter));
|
|
481
555
|
if (filter.size() <= 4) {
|
|
482
556
|
// Missing some data
|
|
483
557
|
return true;
|
|
484
558
|
}
|
|
559
|
+
bool reverse = (filter[0] == kRevBytewiseMinMaxFilter);
|
|
485
560
|
bool empty_included = (filter[1] & kEmptySeenFlag) != 0;
|
|
486
561
|
const char* p = filter.data() + 2;
|
|
487
562
|
const char* limit = filter.data() + filter.size();
|
|
@@ -528,8 +603,13 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
528
603
|
|
|
529
604
|
// May match if both the upper bound and lower bound indicate there could
|
|
530
605
|
// be overlap
|
|
531
|
-
|
|
532
|
-
|
|
606
|
+
if (reverse) {
|
|
607
|
+
return upper_bound_input.compare(smallest) <= 0 &&
|
|
608
|
+
lower_bound_input.compare(largest) >= 0;
|
|
609
|
+
} else {
|
|
610
|
+
return upper_bound_input.compare(smallest) >= 0 &&
|
|
611
|
+
lower_bound_input.compare(largest) <= 0;
|
|
612
|
+
}
|
|
533
613
|
}
|
|
534
614
|
|
|
535
615
|
protected:
|
|
@@ -551,19 +631,11 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
551
631
|
&prev_leadup);
|
|
552
632
|
|
|
553
633
|
int compare = prev_leadup.compare(leadup);
|
|
554
|
-
if (compare
|
|
555
|
-
status = Status::Corruption(
|
|
556
|
-
"Ordering invariant violated from 0x" +
|
|
557
|
-
prev_key->ToString(/*hex=*/true) + " with prefix 0x" +
|
|
558
|
-
prev_leadup.ToString(/*hex=*/true) + " to 0x" +
|
|
559
|
-
key.ToString(/*hex=*/true) + " with prefix 0x" +
|
|
560
|
-
leadup.ToString(/*hex=*/true));
|
|
561
|
-
return;
|
|
562
|
-
} else if (compare == 0) {
|
|
634
|
+
if (compare == 0) {
|
|
563
635
|
// On the same prefix leading up to the segment, the segments must
|
|
564
636
|
// not be out of order.
|
|
565
637
|
compare = prev_input.compare(input);
|
|
566
|
-
if (compare > 0) {
|
|
638
|
+
if (parent.reverse_ ? compare < 0 : compare > 0) {
|
|
567
639
|
status = Status::Corruption(
|
|
568
640
|
"Ordering invariant violated from 0x" +
|
|
569
641
|
prev_key->ToString(/*hex=*/true) + " with segment 0x" +
|
|
@@ -573,6 +645,9 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
573
645
|
return;
|
|
574
646
|
}
|
|
575
647
|
}
|
|
648
|
+
// NOTE: it is not strictly required that the leadup be ordered, just
|
|
649
|
+
// satisfy the "common segment prefix property" which would be
|
|
650
|
+
// expensive to check
|
|
576
651
|
}
|
|
577
652
|
|
|
578
653
|
// Now actually update state for the filter inputs
|
|
@@ -598,7 +673,8 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
598
673
|
return 0;
|
|
599
674
|
}
|
|
600
675
|
return 2 + GetFilterInputSerializedLength(parent.input_) +
|
|
601
|
-
VarintLength(
|
|
676
|
+
VarintLength(parent.reverse_ ? largest.size() : smallest.size()) +
|
|
677
|
+
smallest.size() + largest.size();
|
|
602
678
|
}
|
|
603
679
|
|
|
604
680
|
void Finish(std::string& append_to) override {
|
|
@@ -610,23 +686,27 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
610
686
|
}
|
|
611
687
|
size_t old_append_to_size = append_to.size();
|
|
612
688
|
append_to.reserve(old_append_to_size + encoded_length);
|
|
613
|
-
append_to.push_back(
|
|
689
|
+
append_to.push_back(parent.reverse_ ? kRevBytewiseMinMaxFilter
|
|
690
|
+
: kBytewiseMinMaxFilter);
|
|
614
691
|
|
|
615
692
|
append_to.push_back(empty_seen ? kEmptySeenFlag : 0);
|
|
616
693
|
|
|
617
694
|
SerializeFilterInput(&append_to, parent.input_);
|
|
618
695
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
append_to.append(
|
|
696
|
+
auto& minv = parent.reverse_ ? largest : smallest;
|
|
697
|
+
auto& maxv = parent.reverse_ ? smallest : largest;
|
|
698
|
+
PutVarint32(&append_to, static_cast<uint32_t>(minv.size()));
|
|
699
|
+
append_to.append(minv);
|
|
700
|
+
// The end of `maxv` is given by the end of the filter
|
|
701
|
+
append_to.append(maxv);
|
|
623
702
|
assert(append_to.size() == old_append_to_size + encoded_length);
|
|
624
703
|
}
|
|
625
704
|
|
|
626
705
|
const BytewiseMinMaxSstQueryFilterConfig& parent;
|
|
627
706
|
const bool sanity_checks;
|
|
628
707
|
// Smallest and largest segment seen, excluding the empty segment which
|
|
629
|
-
// is tracked separately
|
|
708
|
+
// is tracked separately. "Reverse" from parent is only applied at
|
|
709
|
+
// serialization time, for efficiency.
|
|
630
710
|
std::string smallest;
|
|
631
711
|
std::string largest;
|
|
632
712
|
bool empty_seen = false;
|
|
@@ -635,6 +715,8 @@ class BytewiseMinMaxSstQueryFilterConfig : public SstQueryFilterConfigImpl {
|
|
|
635
715
|
Status status;
|
|
636
716
|
};
|
|
637
717
|
|
|
718
|
+
bool reverse_;
|
|
719
|
+
|
|
638
720
|
private:
|
|
639
721
|
static constexpr char kEmptySeenFlag = 0x1;
|
|
640
722
|
};
|
|
@@ -1036,6 +1118,7 @@ class SstQueryFilterConfigsManagerImpl : public SstQueryFilterConfigsManager {
|
|
|
1036
1118
|
may_match = MayMatch_CategoryScopeFilterWrapper(filter, *state);
|
|
1037
1119
|
break;
|
|
1038
1120
|
case kBytewiseMinMaxFilter:
|
|
1121
|
+
case kRevBytewiseMinMaxFilter:
|
|
1039
1122
|
if (state == nullptr) {
|
|
1040
1123
|
// TODO? Report problem
|
|
1041
1124
|
// No filtering
|
|
@@ -1189,14 +1272,63 @@ const std::string SstQueryFilterConfigsManagerImpl::kTablePropertyName =
|
|
|
1189
1272
|
"rocksdb.sqfc";
|
|
1190
1273
|
} // namespace
|
|
1191
1274
|
|
|
1275
|
+
std::shared_ptr<const KeySegmentsExtractor>
|
|
1276
|
+
MakeSharedCappedKeySegmentsExtractor(const std::vector<size_t>& byte_widths) {
|
|
1277
|
+
std::vector<uint32_t> byte_widths_checked;
|
|
1278
|
+
byte_widths_checked.resize(byte_widths.size());
|
|
1279
|
+
size_t final_end = 0;
|
|
1280
|
+
for (size_t i = 0; i < byte_widths.size(); ++i) {
|
|
1281
|
+
final_end += byte_widths[i];
|
|
1282
|
+
if (byte_widths[i] > UINT32_MAX / 2 || final_end > UINT32_MAX) {
|
|
1283
|
+
// Better to crash than to proceed unsafely
|
|
1284
|
+
return nullptr;
|
|
1285
|
+
}
|
|
1286
|
+
byte_widths_checked[i] = static_cast<uint32_t>(byte_widths[i]);
|
|
1287
|
+
}
|
|
1288
|
+
|
|
1289
|
+
switch (byte_widths_checked.size()) {
|
|
1290
|
+
case 0:
|
|
1291
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<0>>(
|
|
1292
|
+
byte_widths_checked.data());
|
|
1293
|
+
case 1:
|
|
1294
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<1>>(
|
|
1295
|
+
byte_widths_checked.data());
|
|
1296
|
+
case 2:
|
|
1297
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<2>>(
|
|
1298
|
+
byte_widths_checked.data());
|
|
1299
|
+
case 3:
|
|
1300
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<3>>(
|
|
1301
|
+
byte_widths_checked.data());
|
|
1302
|
+
case 4:
|
|
1303
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<4>>(
|
|
1304
|
+
byte_widths_checked.data());
|
|
1305
|
+
case 5:
|
|
1306
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<5>>(
|
|
1307
|
+
byte_widths_checked.data());
|
|
1308
|
+
case 6:
|
|
1309
|
+
return std::make_shared<SemiStaticCappedKeySegmentsExtractor<6>>(
|
|
1310
|
+
byte_widths_checked.data());
|
|
1311
|
+
default:
|
|
1312
|
+
return std::make_shared<DynamicCappedKeySegmentsExtractor>(
|
|
1313
|
+
byte_widths_checked);
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1192
1317
|
bool SstQueryFilterConfigs::IsEmptyNotFound() const {
|
|
1193
1318
|
return this == &kEmptyNotFoundSQFC;
|
|
1194
1319
|
}
|
|
1195
1320
|
|
|
1196
1321
|
std::shared_ptr<SstQueryFilterConfig> MakeSharedBytewiseMinMaxSQFC(
|
|
1197
1322
|
FilterInput input, KeySegmentsExtractor::KeyCategorySet categories) {
|
|
1198
|
-
return std::make_shared<BytewiseMinMaxSstQueryFilterConfig>(
|
|
1199
|
-
|
|
1323
|
+
return std::make_shared<BytewiseMinMaxSstQueryFilterConfig>(
|
|
1324
|
+
input, categories,
|
|
1325
|
+
/*reverse=*/false);
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
std::shared_ptr<SstQueryFilterConfig> MakeSharedReverseBytewiseMinMaxSQFC(
|
|
1329
|
+
FilterInput input, KeySegmentsExtractor::KeyCategorySet categories) {
|
|
1330
|
+
return std::make_shared<BytewiseMinMaxSstQueryFilterConfig>(input, categories,
|
|
1331
|
+
/*reverse=*/true);
|
|
1200
1332
|
}
|
|
1201
1333
|
|
|
1202
1334
|
Status SstQueryFilterConfigsManager::MakeShared(
|
|
@@ -113,8 +113,7 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
113
113
|
const std::string path_outside_db = f.external_file_path;
|
|
114
114
|
const std::string path_inside_db = TableFileName(
|
|
115
115
|
cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId());
|
|
116
|
-
if (ingestion_options_.move_files) {
|
|
117
|
-
assert(!ingestion_options_.allow_db_generated_files);
|
|
116
|
+
if (ingestion_options_.move_files || ingestion_options_.link_files) {
|
|
118
117
|
status =
|
|
119
118
|
fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr);
|
|
120
119
|
if (status.ok()) {
|
|
@@ -914,9 +913,18 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
914
913
|
} else if (!iter->status().ok()) {
|
|
915
914
|
return iter->status();
|
|
916
915
|
}
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
916
|
+
SequenceNumber largest_seqno =
|
|
917
|
+
table_reader.get()->GetTableProperties()->key_largest_seqno;
|
|
918
|
+
// UINT64_MAX means unknown and the file is generated before table property
|
|
919
|
+
// `key_largest_seqno` is introduced.
|
|
920
|
+
if (largest_seqno != UINT64_MAX && largest_seqno > 0) {
|
|
921
|
+
return Status::Corruption(
|
|
922
|
+
"External file has non zero largest sequence number " +
|
|
923
|
+
std::to_string(largest_seqno));
|
|
924
|
+
}
|
|
925
|
+
if (ingestion_options_.allow_db_generated_files &&
|
|
926
|
+
largest_seqno == UINT64_MAX) {
|
|
927
|
+
// Need to verify that all keys have seqno zero.
|
|
920
928
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
921
929
|
Status pik_status =
|
|
922
930
|
ParseInternalKey(iter->key(), &key, allow_data_in_errors);
|