@nxtedition/rocksdb 8.0.2 → 8.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +17 -17
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +55 -3
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +8 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +6 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +12 -13
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +13 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +14 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +6 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +39 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +9 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -532,23 +532,18 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
532
532
|
// Pretend the smallest key has the same user key as lower_bound
|
|
533
533
|
// (the max key in the previous table or subcompaction) in order for
|
|
534
534
|
// files to appear key-space partitioned.
|
|
535
|
-
//
|
|
536
|
-
// When lower_bound is chosen by a subcompaction, we know that
|
|
537
|
-
// subcompactions over smaller keys cannot contain any keys at
|
|
538
|
-
// lower_bound. We also know that smaller subcompactions exist,
|
|
539
|
-
// because otherwise the subcompaction woud be unbounded on the left.
|
|
540
|
-
// As a result, we know that no other files on the output level will
|
|
541
|
-
// contain actual keys at lower_bound (an output file may have a
|
|
542
|
-
// largest key of lower_bound@kMaxSequenceNumber, but this only
|
|
543
|
-
// indicates a large range tombstone was truncated). Therefore, it is
|
|
544
|
-
// safe to use the tombstone's sequence number, to ensure that keys at
|
|
545
|
-
// lower_bound at lower levels are covered by truncated tombstones.
|
|
546
|
-
//
|
|
547
|
-
// If lower_bound was chosen by the smallest data key in the file,
|
|
548
|
-
// choose lowest seqnum so this file's smallest internal key comes
|
|
549
|
-
// after the previous file's largest. The fake seqnum is OK because
|
|
550
|
-
// the read path's file-picking code only considers user key.
|
|
551
535
|
if (lower_bound_from_sub_compact) {
|
|
536
|
+
// When lower_bound is chosen by a subcompaction
|
|
537
|
+
// (lower_bound_from_sub_compact), we know that subcompactions over
|
|
538
|
+
// smaller keys cannot contain any keys at lower_bound. We also know
|
|
539
|
+
// that smaller subcompactions exist, because otherwise the
|
|
540
|
+
// subcompaction woud be unbounded on the left. As a result, we know
|
|
541
|
+
// that no other files on the output level will contain actual keys at
|
|
542
|
+
// lower_bound (an output file may have a largest key of
|
|
543
|
+
// lower_bound@kMaxSequenceNumber, but this only indicates a large range
|
|
544
|
+
// tombstone was truncated). Therefore, it is safe to use the
|
|
545
|
+
// tombstone's sequence number, to ensure that keys at lower_bound at
|
|
546
|
+
// lower levels are covered by truncated tombstones.
|
|
552
547
|
if (ts_sz) {
|
|
553
548
|
assert(tombstone.ts_.size() == ts_sz);
|
|
554
549
|
smallest_candidate = InternalKey(*lower_bound, tombstone.seq_,
|
|
@@ -558,6 +553,7 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
558
553
|
InternalKey(*lower_bound, tombstone.seq_, kTypeRangeDeletion);
|
|
559
554
|
}
|
|
560
555
|
} else if (lower_bound_from_range_tombstone) {
|
|
556
|
+
// When lower_bound is chosen from a range tombtone start key:
|
|
561
557
|
// Range tombstone keys can be truncated at file boundaries of the files
|
|
562
558
|
// that contain them.
|
|
563
559
|
//
|
|
@@ -591,6 +587,10 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
591
587
|
smallest_candidate = range_tombstone_lower_bound_;
|
|
592
588
|
}
|
|
593
589
|
} else {
|
|
590
|
+
// If lower_bound was chosen by the smallest data key in the file,
|
|
591
|
+
// choose lowest seqnum so this file's smallest internal key comes
|
|
592
|
+
// after the previous file's largest. The fake seqnum is OK because
|
|
593
|
+
// the read path's file-picking code only considers user key.
|
|
594
594
|
smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
|
|
595
595
|
}
|
|
596
596
|
}
|
|
@@ -640,7 +640,7 @@ Status CompactionOutputs::AddRangeDels(
|
|
|
640
640
|
// it cannot have a seqnum of 0 (unless the smallest data key in a file
|
|
641
641
|
// has a seqnum of 0). Otherwise, the truncated tombstone may expose
|
|
642
642
|
// deleted keys at lower levels.
|
|
643
|
-
assert(smallest_ikey_seqnum == 0 ||
|
|
643
|
+
assert(smallest_ikey_seqnum == 0 || lower_bound_from_range_tombstone ||
|
|
644
644
|
ExtractInternalKeyFooter(meta.smallest.Encode()) !=
|
|
645
645
|
PackSequenceAndType(0, kTypeRangeDeletion));
|
|
646
646
|
}
|
|
@@ -1026,6 +1026,70 @@ TEST_F(DBCompactionTest, CompactionSstPartitioner) {
|
|
|
1026
1026
|
ASSERT_EQ("B", Get("bbbb1"));
|
|
1027
1027
|
}
|
|
1028
1028
|
|
|
1029
|
+
TEST_F(DBCompactionTest, CompactionSstPartitionWithManualCompaction) {
|
|
1030
|
+
Options options = CurrentOptions();
|
|
1031
|
+
options.compaction_style = kCompactionStyleLevel;
|
|
1032
|
+
options.level0_file_num_compaction_trigger = 3;
|
|
1033
|
+
|
|
1034
|
+
DestroyAndReopen(options);
|
|
1035
|
+
|
|
1036
|
+
// create first file and flush to l0
|
|
1037
|
+
ASSERT_OK(Put("000015", "A"));
|
|
1038
|
+
ASSERT_OK(Put("000025", "B"));
|
|
1039
|
+
ASSERT_OK(Flush());
|
|
1040
|
+
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
1041
|
+
|
|
1042
|
+
// create second file and flush to l0
|
|
1043
|
+
ASSERT_OK(Put("000015", "A2"));
|
|
1044
|
+
ASSERT_OK(Put("000025", "B2"));
|
|
1045
|
+
ASSERT_OK(Flush());
|
|
1046
|
+
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
1047
|
+
|
|
1048
|
+
// CONTROL 1: compact without partitioner
|
|
1049
|
+
CompactRangeOptions compact_options;
|
|
1050
|
+
compact_options.bottommost_level_compaction =
|
|
1051
|
+
BottommostLevelCompaction::kForceOptimized;
|
|
1052
|
+
ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
|
|
1053
|
+
|
|
1054
|
+
// Check (compacted but no partitioning yet)
|
|
1055
|
+
std::vector<LiveFileMetaData> files;
|
|
1056
|
+
dbfull()->GetLiveFilesMetaData(&files);
|
|
1057
|
+
ASSERT_EQ(1, files.size());
|
|
1058
|
+
|
|
1059
|
+
// Install partitioner
|
|
1060
|
+
std::shared_ptr<SstPartitionerFactory> factory(
|
|
1061
|
+
NewSstPartitionerFixedPrefixFactory(5));
|
|
1062
|
+
options.sst_partitioner_factory = factory;
|
|
1063
|
+
Reopen(options);
|
|
1064
|
+
|
|
1065
|
+
// CONTROL 2: request compaction on range with no partition boundary and no
|
|
1066
|
+
// overlap with actual entries
|
|
1067
|
+
Slice from("000017");
|
|
1068
|
+
Slice to("000019");
|
|
1069
|
+
ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));
|
|
1070
|
+
|
|
1071
|
+
// Check (no partitioning yet)
|
|
1072
|
+
files.clear();
|
|
1073
|
+
dbfull()->GetLiveFilesMetaData(&files);
|
|
1074
|
+
ASSERT_EQ(1, files.size());
|
|
1075
|
+
ASSERT_EQ("A2", Get("000015"));
|
|
1076
|
+
ASSERT_EQ("B2", Get("000025"));
|
|
1077
|
+
|
|
1078
|
+
// TEST: request compaction overlapping with partition boundary but no
|
|
1079
|
+
// actual entries
|
|
1080
|
+
// NOTE: `to` is INCLUSIVE
|
|
1081
|
+
from = Slice("000019");
|
|
1082
|
+
to = Slice("000020");
|
|
1083
|
+
ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));
|
|
1084
|
+
|
|
1085
|
+
// Check (must be partitioned)
|
|
1086
|
+
files.clear();
|
|
1087
|
+
dbfull()->GetLiveFilesMetaData(&files);
|
|
1088
|
+
ASSERT_EQ(2, files.size());
|
|
1089
|
+
ASSERT_EQ("A2", Get("000015"));
|
|
1090
|
+
ASSERT_EQ("B2", Get("000025"));
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1029
1093
|
TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) {
|
|
1030
1094
|
Options options = CurrentOptions();
|
|
1031
1095
|
options.compaction_style = kCompactionStyleLevel;
|
|
@@ -1087,6 +1087,22 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1087
1087
|
{
|
|
1088
1088
|
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
|
|
1089
1089
|
Version* current_version = super_version->current;
|
|
1090
|
+
|
|
1091
|
+
// Might need to query the partitioner
|
|
1092
|
+
SstPartitionerFactory* partitioner_factory =
|
|
1093
|
+
current_version->cfd()->ioptions()->sst_partitioner_factory.get();
|
|
1094
|
+
std::unique_ptr<SstPartitioner> partitioner;
|
|
1095
|
+
if (partitioner_factory && begin != nullptr && end != nullptr) {
|
|
1096
|
+
SstPartitioner::Context context;
|
|
1097
|
+
context.is_full_compaction = false;
|
|
1098
|
+
context.is_manual_compaction = true;
|
|
1099
|
+
context.output_level = /*unknown*/ -1;
|
|
1100
|
+
// Small lies about compaction range
|
|
1101
|
+
context.smallest_user_key = *begin;
|
|
1102
|
+
context.largest_user_key = *end;
|
|
1103
|
+
partitioner = partitioner_factory->CreatePartitioner(context);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1090
1106
|
ReadOptions ro;
|
|
1091
1107
|
ro.total_order_seek = true;
|
|
1092
1108
|
bool overlap;
|
|
@@ -1094,14 +1110,50 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1094
1110
|
level < current_version->storage_info()->num_non_empty_levels();
|
|
1095
1111
|
level++) {
|
|
1096
1112
|
overlap = true;
|
|
1113
|
+
|
|
1114
|
+
// Whether to look at specific keys within files for overlap with
|
|
1115
|
+
// compaction range, other than largest and smallest keys of the file
|
|
1116
|
+
// known in Version metadata.
|
|
1117
|
+
bool check_overlap_within_file = false;
|
|
1097
1118
|
if (begin != nullptr && end != nullptr) {
|
|
1119
|
+
// Typically checking overlap within files in this case
|
|
1120
|
+
check_overlap_within_file = true;
|
|
1121
|
+
// WART: Not known why we don't check within file in one-sided bound
|
|
1122
|
+
// cases
|
|
1123
|
+
if (partitioner) {
|
|
1124
|
+
// Especially if the partitioner is new, the manual compaction
|
|
1125
|
+
// might be used to enforce the partitioning. Checking overlap
|
|
1126
|
+
// within files might miss cases where compaction is needed to
|
|
1127
|
+
// partition the files, as in this example:
|
|
1128
|
+
// * File has two keys "001" and "111"
|
|
1129
|
+
// * Compaction range is ["011", "101")
|
|
1130
|
+
// * Partition boundary at "100"
|
|
1131
|
+
// In cases like this, file-level overlap with the compaction
|
|
1132
|
+
// range is sufficient to force any partitioning that is needed
|
|
1133
|
+
// within the compaction range.
|
|
1134
|
+
//
|
|
1135
|
+
// But if there's no partitioning boundary within the compaction
|
|
1136
|
+
// range, we can be sure there's no need to fix partitioning
|
|
1137
|
+
// within that range, thus safe to check overlap within file.
|
|
1138
|
+
//
|
|
1139
|
+
// Use a hypothetical trivial move query to check for partition
|
|
1140
|
+
// boundary in range. (NOTE: in defiance of all conventions,
|
|
1141
|
+
// `begin` and `end` here are both INCLUSIVE bounds, which makes
|
|
1142
|
+
// this analogy to CanDoTrivialMove() accurate even when `end` is
|
|
1143
|
+
// the first key in a partition.)
|
|
1144
|
+
if (!partitioner->CanDoTrivialMove(*begin, *end)) {
|
|
1145
|
+
check_overlap_within_file = false;
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
if (check_overlap_within_file) {
|
|
1098
1150
|
Status status = current_version->OverlapWithLevelIterator(
|
|
1099
1151
|
ro, file_options_, *begin, *end, level, &overlap);
|
|
1100
1152
|
if (!status.ok()) {
|
|
1101
|
-
|
|
1102
|
-
level, begin, end);
|
|
1153
|
+
check_overlap_within_file = false;
|
|
1103
1154
|
}
|
|
1104
|
-
}
|
|
1155
|
+
}
|
|
1156
|
+
if (!check_overlap_within_file) {
|
|
1105
1157
|
overlap = current_version->storage_info()->OverlapInLevel(level,
|
|
1106
1158
|
begin, end);
|
|
1107
1159
|
}
|
|
@@ -354,7 +354,7 @@ TEST_F(RangeTombstoneFragmenterTest,
|
|
|
354
354
|
|
|
355
355
|
FragmentedRangeTombstoneList fragment_list(
|
|
356
356
|
std::move(range_del_iter), bytewise_icmp, true /* for_compaction */,
|
|
357
|
-
{
|
|
357
|
+
{9, 20} /* snapshots */);
|
|
358
358
|
FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
|
|
359
359
|
kMaxSequenceNumber /* upper_bound */);
|
|
360
360
|
VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},
|
|
@@ -12,13 +12,15 @@
|
|
|
12
12
|
#include "db_stress_tool/db_stress_common.h"
|
|
13
13
|
|
|
14
14
|
namespace ROCKSDB_NAMESPACE {
|
|
15
|
-
class
|
|
15
|
+
class DbStressFSWrapper : public FileSystemWrapper {
|
|
16
16
|
public:
|
|
17
|
-
explicit
|
|
18
|
-
|
|
17
|
+
explicit DbStressFSWrapper(const std::shared_ptr<FileSystem>& t)
|
|
18
|
+
: FileSystemWrapper(t) {}
|
|
19
|
+
static const char* kClassName() { return "DbStressFS"; }
|
|
19
20
|
const char* Name() const override { return kClassName(); }
|
|
20
21
|
|
|
21
|
-
|
|
22
|
+
IOStatus DeleteFile(const std::string& f, const IOOptions& opts,
|
|
23
|
+
IODebugContext* dbg) override {
|
|
22
24
|
// We determine whether it is a manifest file by searching a strong,
|
|
23
25
|
// so that there will be false positive if the directory path contains the
|
|
24
26
|
// keyword but it is unlikely.
|
|
@@ -28,11 +30,11 @@ class DbStressEnvWrapper : public EnvWrapper {
|
|
|
28
30
|
f.find("checkpoint") != std::string::npos ||
|
|
29
31
|
f.find(".backup") != std::string::npos ||
|
|
30
32
|
f.find(".restore") != std::string::npos) {
|
|
31
|
-
return target()->DeleteFile(f);
|
|
33
|
+
return target()->DeleteFile(f, opts, dbg);
|
|
32
34
|
}
|
|
33
35
|
// Rename the file instead of deletion to keep the history, and
|
|
34
36
|
// at the same time it is not visible to RocksDB.
|
|
35
|
-
return target()->RenameFile(f, f + "_renamed_");
|
|
37
|
+
return target()->RenameFile(f, f + "_renamed_", opts, dbg);
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
// If true, all manifest files will not be delted in DeleteFile().
|
|
@@ -1046,9 +1046,11 @@ void StressTest::OperateDb(ThreadState* thread) {
|
|
|
1046
1046
|
TestIterateAgainstExpected(thread, read_opts, rand_column_families,
|
|
1047
1047
|
rand_keys);
|
|
1048
1048
|
} else {
|
|
1049
|
-
int num_seeks = static_cast<int>(
|
|
1050
|
-
std::
|
|
1051
|
-
|
|
1049
|
+
int num_seeks = static_cast<int>(std::min(
|
|
1050
|
+
std::max(static_cast<uint64_t>(thread->rand.Uniform(4)),
|
|
1051
|
+
static_cast<uint64_t>(1)),
|
|
1052
|
+
std::max(static_cast<uint64_t>(FLAGS_ops_per_thread - i - 1),
|
|
1053
|
+
static_cast<uint64_t>(1))));
|
|
1052
1054
|
rand_keys = GenerateNKeys(thread, num_seeks, i);
|
|
1053
1055
|
i += num_seeks - 1;
|
|
1054
1056
|
TestIterate(thread, read_opts, rand_column_families, rand_keys);
|
|
@@ -3025,7 +3027,7 @@ bool InitializeOptionsFromFile(Options& options) {
|
|
|
3025
3027
|
FLAGS_options_file.c_str(), s.ToString().c_str());
|
|
3026
3028
|
exit(1);
|
|
3027
3029
|
}
|
|
3028
|
-
db_options.env = new
|
|
3030
|
+
db_options.env = new CompositeEnvWrapper(db_stress_env);
|
|
3029
3031
|
options = Options(db_options, cf_descriptors[0].options);
|
|
3030
3032
|
return true;
|
|
3031
3033
|
}
|
|
@@ -29,8 +29,8 @@
|
|
|
29
29
|
namespace ROCKSDB_NAMESPACE {
|
|
30
30
|
namespace {
|
|
31
31
|
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
|
|
32
|
-
static std::shared_ptr<ROCKSDB_NAMESPACE::
|
|
33
|
-
static std::shared_ptr<ROCKSDB_NAMESPACE::
|
|
32
|
+
static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_wrapper_guard;
|
|
33
|
+
static std::shared_ptr<ROCKSDB_NAMESPACE::CompositeEnvWrapper>
|
|
34
34
|
dbsl_env_wrapper_guard;
|
|
35
35
|
static std::shared_ptr<CompositeEnvWrapper> fault_env_guard;
|
|
36
36
|
} // namespace
|
|
@@ -77,7 +77,7 @@ int db_stress_tool(int argc, char** argv) {
|
|
|
77
77
|
s.ToString().c_str());
|
|
78
78
|
exit(1);
|
|
79
79
|
}
|
|
80
|
-
dbsl_env_wrapper_guard = std::make_shared<
|
|
80
|
+
dbsl_env_wrapper_guard = std::make_shared<CompositeEnvWrapper>(raw_env);
|
|
81
81
|
db_stress_listener_env = dbsl_env_wrapper_guard.get();
|
|
82
82
|
|
|
83
83
|
if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
|
|
@@ -96,17 +96,16 @@ int db_stress_tool(int argc, char** argv) {
|
|
|
96
96
|
raw_env = fault_env_guard.get();
|
|
97
97
|
}
|
|
98
98
|
|
|
99
|
-
env_wrapper_guard = std::make_shared<
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
//
|
|
104
|
-
//
|
|
105
|
-
//
|
|
106
|
-
|
|
107
|
-
// CompositeEnvWrapper of env and fault_fs.
|
|
108
|
-
db_stress_env = raw_env;
|
|
99
|
+
env_wrapper_guard = std::make_shared<CompositeEnvWrapper>(
|
|
100
|
+
raw_env, std::make_shared<DbStressFSWrapper>(raw_env->GetFileSystem()));
|
|
101
|
+
if (!env_opts) {
|
|
102
|
+
// If using the default Env (Posix), wrap DbStressEnvWrapper with the
|
|
103
|
+
// legacy EnvWrapper. This is a temporary fix for the ReadAsync interface
|
|
104
|
+
// not being properly supported with Posix and db_stress. The EnvWrapper
|
|
105
|
+
// has a default implementation of ReadAsync that redirects to Read.
|
|
106
|
+
env_wrapper_guard = std::make_shared<EnvWrapper>(env_wrapper_guard);
|
|
109
107
|
}
|
|
108
|
+
db_stress_env = env_wrapper_guard.get();
|
|
110
109
|
|
|
111
110
|
FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());
|
|
112
111
|
|
|
@@ -700,6 +700,11 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
700
700
|
uint64_t count = 0;
|
|
701
701
|
Status s;
|
|
702
702
|
|
|
703
|
+
if (fault_fs_guard) {
|
|
704
|
+
fault_fs_guard->EnableErrorInjection();
|
|
705
|
+
SharedState::ignore_read_error = false;
|
|
706
|
+
}
|
|
707
|
+
|
|
703
708
|
for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix);
|
|
704
709
|
iter->Next()) {
|
|
705
710
|
++count;
|
|
@@ -733,13 +738,20 @@ class NonBatchedOpsStressTest : public StressTest {
|
|
|
733
738
|
s = iter->status();
|
|
734
739
|
}
|
|
735
740
|
|
|
736
|
-
|
|
741
|
+
uint64_t error_count = 0;
|
|
742
|
+
if (fault_fs_guard) {
|
|
743
|
+
error_count = fault_fs_guard->GetAndResetErrorCount();
|
|
744
|
+
}
|
|
745
|
+
if (!s.ok() && (!fault_fs_guard || (fault_fs_guard && !error_count))) {
|
|
737
746
|
fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
|
|
738
747
|
thread->stats.AddErrors(1);
|
|
739
748
|
|
|
740
749
|
return s;
|
|
741
750
|
}
|
|
742
751
|
|
|
752
|
+
if (fault_fs_guard) {
|
|
753
|
+
fault_fs_guard->DisableErrorInjection();
|
|
754
|
+
}
|
|
743
755
|
thread->stats.AddPrefixes(1, count);
|
|
744
756
|
|
|
745
757
|
return Status::OK();
|
|
@@ -247,10 +247,14 @@ void FilePrefetchBuffer::AbortAllIOs() {
|
|
|
247
247
|
// Release io_handles.
|
|
248
248
|
if (bufs_[curr_].io_handle_ != nullptr && bufs_[curr_].del_fn_ != nullptr) {
|
|
249
249
|
DestroyAndClearIOHandle(curr_);
|
|
250
|
+
} else {
|
|
251
|
+
bufs_[curr_].async_read_in_progress_ = false;
|
|
250
252
|
}
|
|
251
253
|
|
|
252
254
|
if (bufs_[second].io_handle_ != nullptr && bufs_[second].del_fn_ != nullptr) {
|
|
253
255
|
DestroyAndClearIOHandle(second);
|
|
256
|
+
} else {
|
|
257
|
+
bufs_[second].async_read_in_progress_ = false;
|
|
254
258
|
}
|
|
255
259
|
}
|
|
256
260
|
|
|
@@ -325,7 +329,16 @@ Status FilePrefetchBuffer::HandleOverlappingData(
|
|
|
325
329
|
uint64_t& tmp_offset, size_t& tmp_length) {
|
|
326
330
|
Status s;
|
|
327
331
|
size_t alignment = reader->file()->GetRequiredBufferAlignment();
|
|
328
|
-
uint32_t second
|
|
332
|
+
uint32_t second;
|
|
333
|
+
|
|
334
|
+
// Check if the first buffer has the required offset and the async read is
|
|
335
|
+
// still in progress. This should only happen if a prefetch was initiated
|
|
336
|
+
// by Seek, but the next access is at another offset.
|
|
337
|
+
if (bufs_[curr_].async_read_in_progress_ &&
|
|
338
|
+
IsOffsetInBufferWithAsyncProgress(offset, curr_)) {
|
|
339
|
+
PollAndUpdateBuffersIfNeeded(offset);
|
|
340
|
+
}
|
|
341
|
+
second = curr_ ^ 1;
|
|
329
342
|
|
|
330
343
|
// If data is overlapping over two buffers, copy the data from curr_ and
|
|
331
344
|
// call ReadAsync on curr_.
|
|
@@ -39,9 +39,13 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
|
|
|
39
39
|
if (rep_->uncompression_dict_reader && block_type == BlockType::kData) {
|
|
40
40
|
CachableEntry<UncompressionDict> uncompression_dict;
|
|
41
41
|
const bool no_io = (ro.read_tier == kBlockCacheTier);
|
|
42
|
+
// For async scans, don't use the prefetch buffer since an async prefetch
|
|
43
|
+
// might already be under way and this would invalidate it. Also, the
|
|
44
|
+
// uncompression dict is typically at the end of the file and would
|
|
45
|
+
// most likely break the sequentiality of the access pattern.
|
|
42
46
|
s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
|
|
43
|
-
prefetch_buffer, no_io, ro.verify_checksums,
|
|
44
|
-
lookup_context, &uncompression_dict);
|
|
47
|
+
ro.async_io ? nullptr : prefetch_buffer, no_io, ro.verify_checksums,
|
|
48
|
+
get_context, lookup_context, &uncompression_dict);
|
|
45
49
|
if (!s.ok()) {
|
|
46
50
|
iter->Invalidate(s);
|
|
47
51
|
return iter;
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
#include "test_util/sync_point.h"
|
|
27
27
|
#include "util/coding.h"
|
|
28
28
|
#include "util/crc32c.h"
|
|
29
|
+
#include "util/mutexlock.h"
|
|
29
30
|
#include "util/random.h"
|
|
30
31
|
#include "util/string_util.h"
|
|
31
32
|
#include "util/xxhash.h"
|
|
@@ -412,6 +413,35 @@ IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n,
|
|
|
412
413
|
return s;
|
|
413
414
|
}
|
|
414
415
|
|
|
416
|
+
IOStatus TestFSRandomAccessFile::ReadAsync(
|
|
417
|
+
FSReadRequest& req, const IOOptions& opts,
|
|
418
|
+
std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
|
|
419
|
+
void** io_handle, IOHandleDeleter* del_fn, IODebugContext* /*dbg*/) {
|
|
420
|
+
IOStatus ret;
|
|
421
|
+
IOStatus s;
|
|
422
|
+
FSReadRequest res;
|
|
423
|
+
if (!fs_->IsFilesystemActive()) {
|
|
424
|
+
ret = fs_->GetError();
|
|
425
|
+
} else {
|
|
426
|
+
ret = fs_->InjectThreadSpecificReadError(
|
|
427
|
+
FaultInjectionTestFS::ErrorOperation::kRead, &res.result,
|
|
428
|
+
use_direct_io(), req.scratch, /*need_count_increase=*/true,
|
|
429
|
+
/*fault_injected=*/nullptr);
|
|
430
|
+
}
|
|
431
|
+
if (ret.ok()) {
|
|
432
|
+
if (fs_->ShouldInjectRandomReadError()) {
|
|
433
|
+
ret = IOStatus::IOError("Injected read error");
|
|
434
|
+
} else {
|
|
435
|
+
s = target_->ReadAsync(req, opts, cb, cb_arg, io_handle, del_fn, nullptr);
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
if (!ret.ok()) {
|
|
439
|
+
res.status = ret;
|
|
440
|
+
cb(res, cb_arg);
|
|
441
|
+
}
|
|
442
|
+
return s;
|
|
443
|
+
}
|
|
444
|
+
|
|
415
445
|
IOStatus TestFSRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs,
|
|
416
446
|
const IOOptions& options,
|
|
417
447
|
IODebugContext* dbg) {
|
|
@@ -803,6 +833,15 @@ IOStatus FaultInjectionTestFS::LinkFile(const std::string& s,
|
|
|
803
833
|
return io_s;
|
|
804
834
|
}
|
|
805
835
|
|
|
836
|
+
IOStatus FaultInjectionTestFS::Poll(std::vector<void*>& io_handles,
|
|
837
|
+
size_t min_completions) {
|
|
838
|
+
return target()->Poll(io_handles, min_completions);
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
IOStatus FaultInjectionTestFS::AbortIO(std::vector<void*>& io_handles) {
|
|
842
|
+
return target()->AbortIO(io_handles);
|
|
843
|
+
}
|
|
844
|
+
|
|
806
845
|
void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) {
|
|
807
846
|
MutexLock l(&mutex_);
|
|
808
847
|
if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
|
|
@@ -141,6 +141,10 @@ class TestFSRandomAccessFile : public FSRandomAccessFile {
|
|
|
141
141
|
IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
|
|
142
142
|
Slice* result, char* scratch,
|
|
143
143
|
IODebugContext* dbg) const override;
|
|
144
|
+
IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
|
|
145
|
+
std::function<void(const FSReadRequest&, void*)> cb,
|
|
146
|
+
void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
|
|
147
|
+
IODebugContext* dbg) override;
|
|
144
148
|
IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
|
|
145
149
|
const IOOptions& options, IODebugContext* dbg) override;
|
|
146
150
|
size_t GetRequiredBufferAlignment() const override {
|
|
@@ -266,6 +270,11 @@ class FaultInjectionTestFS : public FileSystemWrapper {
|
|
|
266
270
|
return io_s;
|
|
267
271
|
}
|
|
268
272
|
|
|
273
|
+
virtual IOStatus Poll(std::vector<void*>& io_handles,
|
|
274
|
+
size_t min_completions) override;
|
|
275
|
+
|
|
276
|
+
virtual IOStatus AbortIO(std::vector<void*>& io_handles) override;
|
|
277
|
+
|
|
269
278
|
void WritableFileClosed(const FSFileState& state);
|
|
270
279
|
|
|
271
280
|
void WritableFileSynced(const FSFileState& state);
|
package/package.json
CHANGED
|
Binary file
|
|
Binary file
|