@nxtedition/rocksdb 7.0.23 → 7.0.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +3 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +5 -0
- package/deps/rocksdb/rocksdb/Makefile +6 -2
- package/deps/rocksdb/rocksdb/TARGETS +14 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +4 -1
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +20 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +44 -31
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +491 -722
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +468 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +51 -52
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +28 -16
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +12 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +1 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +170 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +63 -36
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +4 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +57 -38
- package/deps/rocksdb/rocksdb/db/blob/blob_read_request.h +58 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +164 -74
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +42 -29
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +419 -62
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +208 -8
- package/deps/rocksdb/rocksdb/db/c.cc +68 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +95 -2
- package/deps/rocksdb/rocksdb/db/column_family.cc +12 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +92 -15
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +76 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +52 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +30 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +126 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +203 -1584
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +93 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +87 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +314 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +328 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +7 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +174 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +474 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +825 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +46 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +42 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +223 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +255 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +1253 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +32 -8
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -8
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +376 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +103 -78
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +4 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +21 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +19 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +91 -14
- package/deps/rocksdb/rocksdb/db/db_iter.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +33 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +79 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +185 -0
- package/deps/rocksdb/rocksdb/db/dbformat.cc +1 -4
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -8
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +71 -29
- package/deps/rocksdb/rocksdb/db/internal_stats.h +160 -5
- package/deps/rocksdb/rocksdb/db/log_reader.cc +29 -3
- package/deps/rocksdb/rocksdb/db/log_reader.h +12 -3
- package/deps/rocksdb/rocksdb/db/repair_test.cc +1 -3
- package/deps/rocksdb/rocksdb/db/version_edit.cc +6 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +93 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +4 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +42 -35
- package/deps/rocksdb/rocksdb/db/write_batch.cc +10 -2
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +10 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +3 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +140 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +12 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +46 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +27 -7
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +14 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +130 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +7 -1
- package/deps/rocksdb/rocksdb/env/io_posix.cc +18 -50
- package/deps/rocksdb/rocksdb/env/io_posix.h +53 -6
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +8 -10
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +3 -7
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +239 -259
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +84 -19
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +24 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +31 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +37 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/threadpool.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +13 -13
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -2
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +38 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +17 -0
- package/deps/rocksdb/rocksdb/port/win/env_win.h +8 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +6 -3
- package/deps/rocksdb/rocksdb/src.mk +5 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +15 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +5 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -2
- package/deps/rocksdb/rocksdb/table/get_context.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +24 -4
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/util/compression.h +2 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +18 -1
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +67 -4
- package/deps/rocksdb/rocksdb/util/threadpool_imp.h +8 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +15 -12
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb.gyp +5 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -30,10 +30,100 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
30
30
|
// SYNC_POINT is not supported in released Windows mode.
|
|
31
31
|
#if !defined(ROCKSDB_LITE)
|
|
32
32
|
|
|
33
|
+
class CompactionStatsCollector : public EventListener {
|
|
34
|
+
public:
|
|
35
|
+
CompactionStatsCollector()
|
|
36
|
+
: compaction_completed_(
|
|
37
|
+
static_cast<int>(CompactionReason::kNumOfReasons)) {
|
|
38
|
+
for (auto& v : compaction_completed_) {
|
|
39
|
+
v.store(0);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
~CompactionStatsCollector() override {}
|
|
44
|
+
|
|
45
|
+
void OnCompactionCompleted(DB* /* db */,
|
|
46
|
+
const CompactionJobInfo& info) override {
|
|
47
|
+
int k = static_cast<int>(info.compaction_reason);
|
|
48
|
+
int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
|
|
49
|
+
assert(k >= 0 && k < num_of_reasons);
|
|
50
|
+
compaction_completed_[k]++;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
void OnExternalFileIngested(
|
|
54
|
+
DB* /* db */, const ExternalFileIngestionInfo& /* info */) override {
|
|
55
|
+
int k = static_cast<int>(CompactionReason::kExternalSstIngestion);
|
|
56
|
+
compaction_completed_[k]++;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
void OnFlushCompleted(DB* /* db */, const FlushJobInfo& /* info */) override {
|
|
60
|
+
int k = static_cast<int>(CompactionReason::kFlush);
|
|
61
|
+
compaction_completed_[k]++;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
int NumberOfCompactions(CompactionReason reason) const {
|
|
65
|
+
int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
|
|
66
|
+
int k = static_cast<int>(reason);
|
|
67
|
+
assert(k >= 0 && k < num_of_reasons);
|
|
68
|
+
return compaction_completed_.at(k).load();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
private:
|
|
72
|
+
std::vector<std::atomic<int>> compaction_completed_;
|
|
73
|
+
};
|
|
74
|
+
|
|
33
75
|
class DBCompactionTest : public DBTestBase {
|
|
34
76
|
public:
|
|
35
77
|
DBCompactionTest()
|
|
36
78
|
: DBTestBase("db_compaction_test", /*env_do_fsync=*/true) {}
|
|
79
|
+
|
|
80
|
+
protected:
|
|
81
|
+
#ifndef ROCKSDB_LITE
|
|
82
|
+
uint64_t GetSstSizeHelper(Temperature temperature) {
|
|
83
|
+
std::string prop;
|
|
84
|
+
EXPECT_TRUE(dbfull()->GetProperty(
|
|
85
|
+
DB::Properties::kLiveSstFilesSizeAtTemperature +
|
|
86
|
+
std::to_string(static_cast<uint8_t>(temperature)),
|
|
87
|
+
&prop));
|
|
88
|
+
return static_cast<uint64_t>(std::atoi(prop.c_str()));
|
|
89
|
+
}
|
|
90
|
+
#endif // ROCKSDB_LITE
|
|
91
|
+
|
|
92
|
+
/*
|
|
93
|
+
* Verifies compaction stats of cfd are valid.
|
|
94
|
+
*
|
|
95
|
+
* For each level of cfd, its compaction stats are valid if
|
|
96
|
+
* 1) sum(stat.counts) == stat.count, and
|
|
97
|
+
* 2) stat.counts[i] == collector.NumberOfCompactions(i)
|
|
98
|
+
*/
|
|
99
|
+
void VerifyCompactionStats(ColumnFamilyData& cfd,
|
|
100
|
+
const CompactionStatsCollector& collector) {
|
|
101
|
+
#ifndef NDEBUG
|
|
102
|
+
InternalStats* internal_stats_ptr = cfd.internal_stats();
|
|
103
|
+
ASSERT_NE(internal_stats_ptr, nullptr);
|
|
104
|
+
const std::vector<InternalStats::CompactionStats>& comp_stats =
|
|
105
|
+
internal_stats_ptr->TEST_GetCompactionStats();
|
|
106
|
+
const int num_of_reasons =
|
|
107
|
+
static_cast<int>(CompactionReason::kNumOfReasons);
|
|
108
|
+
std::vector<int> counts(num_of_reasons, 0);
|
|
109
|
+
// Count the number of compactions caused by each CompactionReason across
|
|
110
|
+
// all levels.
|
|
111
|
+
for (const auto& stat : comp_stats) {
|
|
112
|
+
int sum = 0;
|
|
113
|
+
for (int i = 0; i < num_of_reasons; i++) {
|
|
114
|
+
counts[i] += stat.counts[i];
|
|
115
|
+
sum += stat.counts[i];
|
|
116
|
+
}
|
|
117
|
+
ASSERT_EQ(sum, stat.count);
|
|
118
|
+
}
|
|
119
|
+
// Verify InternalStats bookkeeping matches that of
|
|
120
|
+
// CompactionStatsCollector, assuming that all compactions complete.
|
|
121
|
+
for (int i = 0; i < num_of_reasons; i++) {
|
|
122
|
+
ASSERT_EQ(collector.NumberOfCompactions(static_cast<CompactionReason>(i)),
|
|
123
|
+
counts[i]);
|
|
124
|
+
}
|
|
125
|
+
#endif /* NDEBUG */
|
|
126
|
+
}
|
|
37
127
|
};
|
|
38
128
|
|
|
39
129
|
class DBCompactionTestWithParam
|
|
@@ -110,47 +200,6 @@ class FlushedFileCollector : public EventListener {
|
|
|
110
200
|
std::mutex mutex_;
|
|
111
201
|
};
|
|
112
202
|
|
|
113
|
-
class CompactionStatsCollector : public EventListener {
|
|
114
|
-
public:
|
|
115
|
-
CompactionStatsCollector()
|
|
116
|
-
: compaction_completed_(static_cast<int>(CompactionReason::kNumOfReasons)) {
|
|
117
|
-
for (auto& v : compaction_completed_) {
|
|
118
|
-
v.store(0);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
~CompactionStatsCollector() override {}
|
|
123
|
-
|
|
124
|
-
void OnCompactionCompleted(DB* /* db */,
|
|
125
|
-
const CompactionJobInfo& info) override {
|
|
126
|
-
int k = static_cast<int>(info.compaction_reason);
|
|
127
|
-
int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
|
|
128
|
-
assert(k >= 0 && k < num_of_reasons);
|
|
129
|
-
compaction_completed_[k]++;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
void OnExternalFileIngested(
|
|
133
|
-
DB* /* db */, const ExternalFileIngestionInfo& /* info */) override {
|
|
134
|
-
int k = static_cast<int>(CompactionReason::kExternalSstIngestion);
|
|
135
|
-
compaction_completed_[k]++;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
void OnFlushCompleted(DB* /* db */, const FlushJobInfo& /* info */) override {
|
|
139
|
-
int k = static_cast<int>(CompactionReason::kFlush);
|
|
140
|
-
compaction_completed_[k]++;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
int NumberOfCompactions(CompactionReason reason) const {
|
|
144
|
-
int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
|
|
145
|
-
int k = static_cast<int>(reason);
|
|
146
|
-
assert(k >= 0 && k < num_of_reasons);
|
|
147
|
-
return compaction_completed_.at(k).load();
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
private:
|
|
151
|
-
std::vector<std::atomic<int>> compaction_completed_;
|
|
152
|
-
};
|
|
153
|
-
|
|
154
203
|
class SstStatsCollector : public EventListener {
|
|
155
204
|
public:
|
|
156
205
|
SstStatsCollector() : num_ssts_creation_started_(0) {}
|
|
@@ -247,40 +296,6 @@ void VerifyCompactionResult(
|
|
|
247
296
|
#endif
|
|
248
297
|
}
|
|
249
298
|
|
|
250
|
-
/*
|
|
251
|
-
* Verifies compaction stats of cfd are valid.
|
|
252
|
-
*
|
|
253
|
-
* For each level of cfd, its compaction stats are valid if
|
|
254
|
-
* 1) sum(stat.counts) == stat.count, and
|
|
255
|
-
* 2) stat.counts[i] == collector.NumberOfCompactions(i)
|
|
256
|
-
*/
|
|
257
|
-
void VerifyCompactionStats(ColumnFamilyData& cfd,
|
|
258
|
-
const CompactionStatsCollector& collector) {
|
|
259
|
-
#ifndef NDEBUG
|
|
260
|
-
InternalStats* internal_stats_ptr = cfd.internal_stats();
|
|
261
|
-
ASSERT_NE(internal_stats_ptr, nullptr);
|
|
262
|
-
const std::vector<InternalStats::CompactionStats>& comp_stats =
|
|
263
|
-
internal_stats_ptr->TEST_GetCompactionStats();
|
|
264
|
-
const int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
|
|
265
|
-
std::vector<int> counts(num_of_reasons, 0);
|
|
266
|
-
// Count the number of compactions caused by each CompactionReason across
|
|
267
|
-
// all levels.
|
|
268
|
-
for (const auto& stat : comp_stats) {
|
|
269
|
-
int sum = 0;
|
|
270
|
-
for (int i = 0; i < num_of_reasons; i++) {
|
|
271
|
-
counts[i] += stat.counts[i];
|
|
272
|
-
sum += stat.counts[i];
|
|
273
|
-
}
|
|
274
|
-
ASSERT_EQ(sum, stat.count);
|
|
275
|
-
}
|
|
276
|
-
// Verify InternalStats bookkeeping matches that of CompactionStatsCollector,
|
|
277
|
-
// assuming that all compactions complete.
|
|
278
|
-
for (int i = 0; i < num_of_reasons; i++) {
|
|
279
|
-
ASSERT_EQ(collector.NumberOfCompactions(static_cast<CompactionReason>(i)), counts[i]);
|
|
280
|
-
}
|
|
281
|
-
#endif /* NDEBUG */
|
|
282
|
-
}
|
|
283
|
-
|
|
284
299
|
const SstFileMetaData* PickFileRandomly(
|
|
285
300
|
const ColumnFamilyMetaData& cf_meta,
|
|
286
301
|
Random* rand,
|
|
@@ -1093,16 +1108,20 @@ TEST_F(DBCompactionTest, ManualCompactionUnknownOutputSize) {
|
|
|
1093
1108
|
// create two files in l1 that we can compact
|
|
1094
1109
|
for (int i = 0; i < 2; ++i) {
|
|
1095
1110
|
for (int j = 0; j < options.level0_file_num_compaction_trigger; j++) {
|
|
1096
|
-
// make l0 files' ranges overlap to avoid trivial move
|
|
1097
1111
|
ASSERT_OK(Put(std::to_string(2 * i), std::string(1, 'A')));
|
|
1098
1112
|
ASSERT_OK(Put(std::to_string(2 * i + 1), std::string(1, 'A')));
|
|
1099
1113
|
ASSERT_OK(Flush());
|
|
1100
1114
|
ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
|
|
1101
1115
|
}
|
|
1102
1116
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
1103
|
-
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
|
|
1104
|
-
ASSERT_EQ(NumTableFilesAtLevel(1, 0), i + 1);
|
|
1105
1117
|
}
|
|
1118
|
+
ASSERT_OK(
|
|
1119
|
+
dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}}));
|
|
1120
|
+
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
1121
|
+
ASSERT_EQ(NumTableFilesAtLevel(0, 0), 0);
|
|
1122
|
+
ASSERT_EQ(NumTableFilesAtLevel(1, 0), 2);
|
|
1123
|
+
ASSERT_OK(
|
|
1124
|
+
dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "3"}}));
|
|
1106
1125
|
|
|
1107
1126
|
ColumnFamilyMetaData cf_meta;
|
|
1108
1127
|
dbfull()->GetColumnFamilyMetaData(dbfull()->DefaultColumnFamily(), &cf_meta);
|
|
@@ -4366,7 +4385,13 @@ TEST_F(DBCompactionTest, LevelTtlBooster) {
|
|
|
4366
4385
|
ASSERT_OK(Flush());
|
|
4367
4386
|
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
4368
4387
|
}
|
|
4388
|
+
// Force files to be compacted to L1
|
|
4389
|
+
ASSERT_OK(
|
|
4390
|
+
dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "1"}}));
|
|
4391
|
+
ASSERT_OK(dbfull()->TEST_WaitForCompact());
|
|
4369
4392
|
ASSERT_EQ("0,1,2", FilesPerLevel());
|
|
4393
|
+
ASSERT_OK(
|
|
4394
|
+
dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"}}));
|
|
4370
4395
|
|
|
4371
4396
|
ASSERT_GT(SizeAtLevel(1), kNumKeysPerFile * 4 * kValueSize);
|
|
4372
4397
|
}
|
|
@@ -246,8 +246,6 @@ DBImpl::DBImpl(const DBOptions& options, const std::string& dbname,
|
|
|
246
246
|
// !batch_per_trx_ implies seq_per_batch_ because it is only unset for
|
|
247
247
|
// WriteUnprepared, which should use seq_per_batch_.
|
|
248
248
|
assert(batch_per_txn_ || seq_per_batch_);
|
|
249
|
-
// TODO: Check for an error here
|
|
250
|
-
env_->GetAbsolutePath(dbname, &db_absolute_path_).PermitUncheckedError();
|
|
251
249
|
|
|
252
250
|
// Reserve ten files or so for other uses and give the rest to TableCache.
|
|
253
251
|
// Give a large number for setting of "infinite" open files.
|
|
@@ -1444,12 +1442,12 @@ Status DBImpl::MarkLogsSynced(uint64_t up_to, bool synced_dir) {
|
|
|
1444
1442
|
for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) {
|
|
1445
1443
|
auto& wal = *it;
|
|
1446
1444
|
assert(wal.IsSyncing());
|
|
1447
|
-
if (immutable_db_options_.track_and_verify_wals_in_manifest &&
|
|
1448
|
-
wal.GetPreSyncSize() > 0) {
|
|
1449
|
-
synced_wals.AddWal(wal.number, WalMetadata(wal.GetPreSyncSize()));
|
|
1450
|
-
}
|
|
1451
1445
|
|
|
1452
1446
|
if (logs_.size() > 1) {
|
|
1447
|
+
if (immutable_db_options_.track_and_verify_wals_in_manifest &&
|
|
1448
|
+
wal.GetPreSyncSize() > 0) {
|
|
1449
|
+
synced_wals.AddWal(wal.number, WalMetadata(wal.GetPreSyncSize()));
|
|
1450
|
+
}
|
|
1453
1451
|
logs_to_free_.push_back(wal.ReleaseWriter());
|
|
1454
1452
|
// To modify logs_ both mutex_ and log_write_mutex_ must be held
|
|
1455
1453
|
InstrumentedMutexLock l(&log_write_mutex_);
|
|
@@ -1999,7 +1999,6 @@ class DBImpl : public DB {
|
|
|
1999
1999
|
void MemTableInsertStatusCheck(const Status& memtable_insert_status);
|
|
2000
2000
|
|
|
2001
2001
|
#ifndef ROCKSDB_LITE
|
|
2002
|
-
|
|
2003
2002
|
Status CompactFilesImpl(const CompactionOptions& compact_options,
|
|
2004
2003
|
ColumnFamilyData* cfd, Version* version,
|
|
2005
2004
|
const std::vector<std::string>& input_file_names,
|
|
@@ -2011,7 +2010,6 @@ class DBImpl : public DB {
|
|
|
2011
2010
|
// Wait for current IngestExternalFile() calls to finish.
|
|
2012
2011
|
// REQUIRES: mutex_ held
|
|
2013
2012
|
void WaitForIngestFile();
|
|
2014
|
-
|
|
2015
2013
|
#else
|
|
2016
2014
|
// IngestExternalFile is not supported in ROCKSDB_LITE so this function
|
|
2017
2015
|
// will be no-op
|
|
@@ -2498,12 +2496,6 @@ class DBImpl : public DB {
|
|
|
2498
2496
|
// log is fully commited.
|
|
2499
2497
|
bool unable_to_release_oldest_log_;
|
|
2500
2498
|
|
|
2501
|
-
static const int KEEP_LOG_FILE_NUM = 1000;
|
|
2502
|
-
// MSVC version 1800 still does not have constexpr for ::max()
|
|
2503
|
-
static const uint64_t kNoTimeOut = std::numeric_limits<uint64_t>::max();
|
|
2504
|
-
|
|
2505
|
-
std::string db_absolute_path_;
|
|
2506
|
-
|
|
2507
2499
|
// Number of running IngestExternalFile() or CreateColumnFamilyWithImport()
|
|
2508
2500
|
// calls.
|
|
2509
2501
|
// REQUIRES: mutex held
|
|
@@ -1108,9 +1108,11 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
|
|
|
1108
1108
|
|
|
1109
1109
|
TEST_SYNC_POINT_CALLBACK("DBImpl::RecoverLogFiles:BeforeReadWal",
|
|
1110
1110
|
/*arg=*/nullptr);
|
|
1111
|
+
uint64_t record_checksum;
|
|
1111
1112
|
while (!stop_replay_by_wal_filter &&
|
|
1112
1113
|
reader.ReadRecord(&record, &scratch,
|
|
1113
|
-
immutable_db_options_.wal_recovery_mode
|
|
1114
|
+
immutable_db_options_.wal_recovery_mode,
|
|
1115
|
+
&record_checksum) &&
|
|
1114
1116
|
status.ok()) {
|
|
1115
1117
|
if (record.size() < WriteBatchInternal::kHeader) {
|
|
1116
1118
|
reporter.Corruption(record.size(),
|
|
@@ -1126,8 +1128,13 @@ Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
|
|
|
1126
1128
|
if (!status.ok()) {
|
|
1127
1129
|
return status;
|
|
1128
1130
|
}
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
1132
|
+
"DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch", &batch);
|
|
1133
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
1134
|
+
"DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum",
|
|
1135
|
+
&record_checksum);
|
|
1136
|
+
status = WriteBatchInternal::UpdateProtectionInfo(
|
|
1137
|
+
&batch, 8 /* bytes_per_key */, &record_checksum);
|
|
1131
1138
|
if (!status.ok()) {
|
|
1132
1139
|
return status;
|
|
1133
1140
|
}
|
|
@@ -682,12 +682,6 @@ Status DB::OpenAsSecondary(
|
|
|
682
682
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
683
683
|
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr) {
|
|
684
684
|
*dbptr = nullptr;
|
|
685
|
-
if (db_options.max_open_files != -1) {
|
|
686
|
-
// TODO (yanqin) maybe support max_open_files != -1 by creating hard links
|
|
687
|
-
// on SST files so that db secondary can still have access to old SSTs
|
|
688
|
-
// while primary instance may delete original.
|
|
689
|
-
return Status::InvalidArgument("require max_open_files to be -1");
|
|
690
|
-
}
|
|
691
685
|
|
|
692
686
|
DBOptions tmp_opts(db_options);
|
|
693
687
|
Status s;
|
|
@@ -699,6 +693,27 @@ Status DB::OpenAsSecondary(
|
|
|
699
693
|
}
|
|
700
694
|
}
|
|
701
695
|
|
|
696
|
+
assert(tmp_opts.info_log != nullptr);
|
|
697
|
+
if (db_options.max_open_files != -1) {
|
|
698
|
+
std::ostringstream oss;
|
|
699
|
+
oss << "The primary instance may delete all types of files after they "
|
|
700
|
+
"become obsolete. The application can coordinate the primary and "
|
|
701
|
+
"secondary so that primary does not delete/rename files that are "
|
|
702
|
+
"currently being used by the secondary. Alternatively, a custom "
|
|
703
|
+
"Env/FS can be provided such that files become inaccessible only "
|
|
704
|
+
"after all primary and secondaries indicate that they are obsolete "
|
|
705
|
+
"and deleted. If the above two are not possible, you can open the "
|
|
706
|
+
"secondary instance with `max_open_files==-1` so that secondary "
|
|
707
|
+
"will eagerly keep all table files open. Even if a file is deleted, "
|
|
708
|
+
"its content can still be accessed via a prior open file "
|
|
709
|
+
"descriptor. This is a hacky workaround for only table files. If "
|
|
710
|
+
"none of the above is done, then point lookup or "
|
|
711
|
+
"range scan via the secondary instance can result in IOError: file "
|
|
712
|
+
"not found. This can be resolved by retrying "
|
|
713
|
+
"TryCatchUpWithPrimary().";
|
|
714
|
+
ROCKS_LOG_WARN(tmp_opts.info_log, "%s", oss.str().c_str());
|
|
715
|
+
}
|
|
716
|
+
|
|
702
717
|
handles->clear();
|
|
703
718
|
DBImplSecondary* impl = new DBImplSecondary(tmp_opts, dbname, secondary_path);
|
|
704
719
|
impl->versions_.reset(new ReactiveVersionSet(
|
|
@@ -84,8 +84,17 @@ class DBImplSecondary : public DBImpl {
|
|
|
84
84
|
bool error_if_data_exists_in_wals, uint64_t* = nullptr,
|
|
85
85
|
RecoveryContext* recovery_ctx = nullptr) override;
|
|
86
86
|
|
|
87
|
-
// Implementations of the DB interface
|
|
87
|
+
// Implementations of the DB interface.
|
|
88
88
|
using DB::Get;
|
|
89
|
+
// Can return IOError due to files being deleted by the primary. To avoid
|
|
90
|
+
// IOError in this case, application can coordinate between primary and
|
|
91
|
+
// secondaries so that primary will not delete files that are currently being
|
|
92
|
+
// used by the secondaries. The application can also provide a custom FS/Env
|
|
93
|
+
// implementation so that files will remain present until all primary and
|
|
94
|
+
// secondaries indicate that they can be deleted. As a partial hacky
|
|
95
|
+
// workaround, the secondaries can be opened with `max_open_files=-1` so that
|
|
96
|
+
// it eagerly keeps all talbe files open and is able to access the contents of
|
|
97
|
+
// deleted files via prior open fd.
|
|
89
98
|
Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
|
|
90
99
|
const Slice& key, PinnableSlice* value) override;
|
|
91
100
|
|
|
@@ -98,6 +107,15 @@ class DBImplSecondary : public DBImpl {
|
|
|
98
107
|
std::string* timestamp);
|
|
99
108
|
|
|
100
109
|
using DBImpl::NewIterator;
|
|
110
|
+
// Operations on the created iterators can return IOError due to files being
|
|
111
|
+
// deleted by the primary. To avoid IOError in this case, application can
|
|
112
|
+
// coordinate between primary and secondaries so that primary will not delete
|
|
113
|
+
// files that are currently being used by the secondaries. The application can
|
|
114
|
+
// also provide a custom FS/Env implementation so that files will remain
|
|
115
|
+
// present until all primary and secondaries indicate that they can be
|
|
116
|
+
// deleted. As a partial hacky workaround, the secondaries can be opened with
|
|
117
|
+
// `max_open_files=-1` so that it eagerly keeps all talbe files open and is
|
|
118
|
+
// able to access the contents of deleted files via prior open fd.
|
|
101
119
|
Iterator* NewIterator(const ReadOptions&,
|
|
102
120
|
ColumnFamilyHandle* column_family) override;
|
|
103
121
|
|
|
@@ -812,6 +812,10 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
812
812
|
ValueType last_not_merge_type = kTypeDeletion;
|
|
813
813
|
ValueType last_key_entry_type = kTypeDeletion;
|
|
814
814
|
|
|
815
|
+
// If false, it indicates that we have not seen any valid entry, even though
|
|
816
|
+
// last_key_entry_type is initialized to kTypeDeletion.
|
|
817
|
+
bool valid_entry_seen = false;
|
|
818
|
+
|
|
815
819
|
// Temporarily pin blocks that hold (merge operands / the value)
|
|
816
820
|
ReleaseTempPinnedData();
|
|
817
821
|
TempPinData();
|
|
@@ -822,20 +826,33 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
822
826
|
return false;
|
|
823
827
|
}
|
|
824
828
|
|
|
829
|
+
if (!user_comparator_.EqualWithoutTimestamp(ikey.user_key,
|
|
830
|
+
saved_key_.GetUserKey())) {
|
|
831
|
+
// Found a smaller user key, thus we are done with current user key.
|
|
832
|
+
break;
|
|
833
|
+
}
|
|
834
|
+
|
|
825
835
|
assert(ikey.user_key.size() >= timestamp_size_);
|
|
826
836
|
Slice ts;
|
|
827
837
|
if (timestamp_size_ > 0) {
|
|
828
838
|
ts = Slice(ikey.user_key.data() + ikey.user_key.size() - timestamp_size_,
|
|
829
839
|
timestamp_size_);
|
|
830
840
|
}
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
841
|
+
|
|
842
|
+
bool visible = IsVisible(ikey.sequence, ts);
|
|
843
|
+
if (!visible &&
|
|
844
|
+
(timestamp_lb_ == nullptr ||
|
|
845
|
+
user_comparator_.CompareTimestamp(ts, *timestamp_ub_) > 0)) {
|
|
846
|
+
// Found an invisible version of the current user key, and it must have
|
|
847
|
+
// a higher sequence number or timestamp. Therefore, we are done with the
|
|
848
|
+
// current user key.
|
|
834
849
|
break;
|
|
835
850
|
}
|
|
851
|
+
|
|
836
852
|
if (!ts.empty()) {
|
|
837
853
|
saved_timestamp_.assign(ts.data(), ts.size());
|
|
838
854
|
}
|
|
855
|
+
|
|
839
856
|
if (TooManyInternalKeysSkipped()) {
|
|
840
857
|
return false;
|
|
841
858
|
}
|
|
@@ -852,6 +869,15 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
852
869
|
return false;
|
|
853
870
|
}
|
|
854
871
|
|
|
872
|
+
if (timestamp_lb_ != nullptr) {
|
|
873
|
+
// Only needed when timestamp_lb_ is not null
|
|
874
|
+
[[maybe_unused]] const bool ret = ParseKey(&ikey_);
|
|
875
|
+
saved_ikey_.assign(iter_.key().data(), iter_.key().size());
|
|
876
|
+
// Since the preceding ParseKey(&ikey) succeeds, so must this.
|
|
877
|
+
assert(ret);
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
valid_entry_seen = true;
|
|
855
881
|
last_key_entry_type = ikey.type;
|
|
856
882
|
switch (last_key_entry_type) {
|
|
857
883
|
case kTypeValue:
|
|
@@ -908,6 +934,14 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
908
934
|
PERF_COUNTER_ADD(internal_key_skipped_count, 1);
|
|
909
935
|
iter_.Prev();
|
|
910
936
|
++num_skipped;
|
|
937
|
+
|
|
938
|
+
if (visible && timestamp_lb_ != nullptr) {
|
|
939
|
+
// If timestamp_lb_ is not nullptr, we do not have to look further for
|
|
940
|
+
// another internal key. We can return this current internal key. Yet we
|
|
941
|
+
// still keep the invariant that iter_ is positioned before the returned
|
|
942
|
+
// key.
|
|
943
|
+
break;
|
|
944
|
+
}
|
|
911
945
|
}
|
|
912
946
|
|
|
913
947
|
if (!iter_.status().ok()) {
|
|
@@ -915,6 +949,20 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
915
949
|
return false;
|
|
916
950
|
}
|
|
917
951
|
|
|
952
|
+
if (!valid_entry_seen) {
|
|
953
|
+
// Since we haven't seen any valid entry, last_key_entry_type remains
|
|
954
|
+
// unchanged and the same as its initial value.
|
|
955
|
+
assert(last_key_entry_type == kTypeDeletion);
|
|
956
|
+
assert(last_not_merge_type == kTypeDeletion);
|
|
957
|
+
valid_ = false;
|
|
958
|
+
return true;
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
if (timestamp_lb_ != nullptr) {
|
|
962
|
+
assert(last_key_entry_type == ikey_.type ||
|
|
963
|
+
last_key_entry_type == kTypeRangeDeletion);
|
|
964
|
+
}
|
|
965
|
+
|
|
918
966
|
Status s;
|
|
919
967
|
s.PermitUncheckedError();
|
|
920
968
|
is_blob_ = false;
|
|
@@ -923,7 +971,12 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
923
971
|
case kTypeDeletionWithTimestamp:
|
|
924
972
|
case kTypeSingleDeletion:
|
|
925
973
|
case kTypeRangeDeletion:
|
|
926
|
-
|
|
974
|
+
if (timestamp_lb_ == nullptr) {
|
|
975
|
+
valid_ = false;
|
|
976
|
+
} else {
|
|
977
|
+
saved_key_.SetInternalKey(saved_ikey_);
|
|
978
|
+
valid_ = true;
|
|
979
|
+
}
|
|
927
980
|
return true;
|
|
928
981
|
case kTypeMerge:
|
|
929
982
|
current_entry_is_merged_ = true;
|
|
@@ -970,6 +1023,9 @@ bool DBIter::FindValueForCurrentKey() {
|
|
|
970
1023
|
break;
|
|
971
1024
|
case kTypeValue:
|
|
972
1025
|
// do nothing - we've already has value in pinned_value_
|
|
1026
|
+
if (timestamp_lb_ != nullptr) {
|
|
1027
|
+
saved_key_.SetInternalKey(saved_ikey_);
|
|
1028
|
+
}
|
|
973
1029
|
break;
|
|
974
1030
|
case kTypeBlobIndex:
|
|
975
1031
|
if (!SetBlobValueIfNeeded(saved_key_.GetUserKey(), pinned_value_)) {
|
|
@@ -1015,7 +1071,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|
|
1015
1071
|
&last_key,
|
|
1016
1072
|
ParsedInternalKey(saved_key_.GetUserKey(), sequence_,
|
|
1017
1073
|
kValueTypeForSeek),
|
|
1018
|
-
*timestamp_ub_);
|
|
1074
|
+
timestamp_lb_ == nullptr ? *timestamp_ub_ : *timestamp_lb_);
|
|
1019
1075
|
}
|
|
1020
1076
|
iter_.Seek(last_key);
|
|
1021
1077
|
RecordTick(statistics_, NUMBER_OF_RESEEKS_IN_ITERATION);
|
|
@@ -1060,7 +1116,12 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|
|
1060
1116
|
range_del_agg_.ShouldDelete(
|
|
1061
1117
|
ikey, RangeDelPositioningMode::kBackwardTraversal) ||
|
|
1062
1118
|
kTypeDeletionWithTimestamp == ikey.type) {
|
|
1063
|
-
|
|
1119
|
+
if (timestamp_lb_ == nullptr) {
|
|
1120
|
+
valid_ = false;
|
|
1121
|
+
} else {
|
|
1122
|
+
valid_ = true;
|
|
1123
|
+
saved_key_.SetInternalKey(ikey);
|
|
1124
|
+
}
|
|
1064
1125
|
return true;
|
|
1065
1126
|
}
|
|
1066
1127
|
if (!iter_.PrepareValue()) {
|
|
@@ -1085,6 +1146,10 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
|
|
|
1085
1146
|
}
|
|
1086
1147
|
}
|
|
1087
1148
|
|
|
1149
|
+
if (timestamp_lb_ != nullptr) {
|
|
1150
|
+
saved_key_.SetInternalKey(ikey);
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1088
1153
|
valid_ = true;
|
|
1089
1154
|
return true;
|
|
1090
1155
|
}
|
|
@@ -1214,8 +1279,7 @@ bool DBIter::FindUserKeyBeforeSavedKey() {
|
|
|
1214
1279
|
return false;
|
|
1215
1280
|
}
|
|
1216
1281
|
|
|
1217
|
-
if (
|
|
1218
|
-
saved_key_.GetUserKey()) < 0) {
|
|
1282
|
+
if (CompareKeyForSkip(ikey.user_key, saved_key_.GetUserKey()) < 0) {
|
|
1219
1283
|
return true;
|
|
1220
1284
|
}
|
|
1221
1285
|
|
|
@@ -1328,7 +1392,9 @@ void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
|
|
|
1328
1392
|
if (timestamp_size_ > 0) {
|
|
1329
1393
|
const std::string kTsMin(timestamp_size_, '\0');
|
|
1330
1394
|
Slice ts = kTsMin;
|
|
1331
|
-
saved_key_.UpdateInternalKey(
|
|
1395
|
+
saved_key_.UpdateInternalKey(
|
|
1396
|
+
/*seq=*/0, kValueTypeForSeekForPrev,
|
|
1397
|
+
timestamp_lb_ == nullptr ? &ts : timestamp_lb_);
|
|
1332
1398
|
}
|
|
1333
1399
|
|
|
1334
1400
|
if (iterate_upper_bound_ != nullptr &&
|
|
@@ -1341,8 +1407,9 @@ void DBIter::SetSavedKeyToSeekForPrevTarget(const Slice& target) {
|
|
|
1341
1407
|
if (timestamp_size_ > 0) {
|
|
1342
1408
|
const std::string kTsMax(timestamp_size_, '\xff');
|
|
1343
1409
|
Slice ts = kTsMax;
|
|
1344
|
-
saved_key_.UpdateInternalKey(
|
|
1345
|
-
|
|
1410
|
+
saved_key_.UpdateInternalKey(
|
|
1411
|
+
kMaxSequenceNumber, kValueTypeForSeekForPrev,
|
|
1412
|
+
timestamp_lb_ != nullptr ? timestamp_lb_ : &ts);
|
|
1346
1413
|
}
|
|
1347
1414
|
}
|
|
1348
1415
|
}
|
|
@@ -1543,11 +1610,21 @@ void DBIter::SeekToLast() {
|
|
|
1543
1610
|
if (iterate_upper_bound_ != nullptr) {
|
|
1544
1611
|
// Seek to last key strictly less than ReadOptions.iterate_upper_bound.
|
|
1545
1612
|
SeekForPrev(*iterate_upper_bound_);
|
|
1546
|
-
|
|
1547
|
-
|
|
1548
|
-
|
|
1613
|
+
const bool is_ikey = (timestamp_size_ > 0 && timestamp_lb_ != nullptr);
|
|
1614
|
+
Slice k = Valid() ? key() : Slice();
|
|
1615
|
+
if (is_ikey && Valid()) {
|
|
1616
|
+
k.remove_suffix(kNumInternalBytes + timestamp_size_);
|
|
1617
|
+
}
|
|
1618
|
+
while (Valid() && 0 == user_comparator_.CompareWithoutTimestamp(
|
|
1619
|
+
*iterate_upper_bound_, /*a_has_ts=*/false, k,
|
|
1620
|
+
/*b_has_ts=*/false)) {
|
|
1549
1621
|
ReleaseTempPinnedData();
|
|
1550
1622
|
PrevInternal(nullptr);
|
|
1623
|
+
|
|
1624
|
+
k = key();
|
|
1625
|
+
if (is_ikey) {
|
|
1626
|
+
k.remove_suffix(kNumInternalBytes + timestamp_size_);
|
|
1627
|
+
}
|
|
1551
1628
|
}
|
|
1552
1629
|
return;
|
|
1553
1630
|
}
|
|
@@ -224,9 +224,11 @@ class DBIter final : public Iterator {
|
|
|
224
224
|
bool ReverseToBackward();
|
|
225
225
|
// Set saved_key_ to the seek key to target, with proper sequence number set.
|
|
226
226
|
// It might get adjusted if the seek key is smaller than iterator lower bound.
|
|
227
|
+
// target does not have timestamp.
|
|
227
228
|
void SetSavedKeyToSeekTarget(const Slice& target);
|
|
228
229
|
// Set saved_key_ to the seek key to target, with proper sequence number set.
|
|
229
230
|
// It might get adjusted if the seek key is larger than iterator upper bound.
|
|
231
|
+
// target does not have timestamp.
|
|
230
232
|
void SetSavedKeyToSeekForPrevTarget(const Slice& target);
|
|
231
233
|
bool FindValueForCurrentKey();
|
|
232
234
|
bool FindValueForCurrentKeyUsingSeek();
|
|
@@ -377,6 +379,9 @@ class DBIter final : public Iterator {
|
|
|
377
379
|
const Slice* const timestamp_lb_;
|
|
378
380
|
const size_t timestamp_size_;
|
|
379
381
|
std::string saved_timestamp_;
|
|
382
|
+
|
|
383
|
+
// Used only if timestamp_lb_ is not nullptr.
|
|
384
|
+
std::string saved_ikey_;
|
|
380
385
|
};
|
|
381
386
|
|
|
382
387
|
// Return a new iterator that converts internal keys (yielded by
|
|
@@ -627,6 +627,39 @@ INSTANTIATE_TEST_CASE_P(
|
|
|
627
627
|
|
|
628
628
|
// TODO: add test for transactions
|
|
629
629
|
// TODO: add test for corrupted write batch with WAL disabled
|
|
630
|
+
|
|
631
|
+
class DbKVChecksumWALToWriteBatchTest : public DBTestBase {
|
|
632
|
+
public:
|
|
633
|
+
DbKVChecksumWALToWriteBatchTest()
|
|
634
|
+
: DBTestBase("db_kv_checksum_test", /*env_do_fsync=*/false) {}
|
|
635
|
+
};
|
|
636
|
+
|
|
637
|
+
TEST_F(DbKVChecksumWALToWriteBatchTest, WriteBatchChecksumHandoff) {
|
|
638
|
+
Options options = CurrentOptions();
|
|
639
|
+
Reopen(options);
|
|
640
|
+
ASSERT_OK(db_->Put(WriteOptions(), "key", "val"));
|
|
641
|
+
std::string content = "";
|
|
642
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
643
|
+
"DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:batch",
|
|
644
|
+
[&](void* batch_ptr) {
|
|
645
|
+
WriteBatch* batch = reinterpret_cast<WriteBatch*>(batch_ptr);
|
|
646
|
+
content.assign(batch->Data().data(), batch->GetDataSize());
|
|
647
|
+
Slice batch_content = batch->Data();
|
|
648
|
+
// Corrupt first bit
|
|
649
|
+
CorruptWriteBatch(&batch_content, 0, 1);
|
|
650
|
+
});
|
|
651
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
652
|
+
"DBImpl::RecoverLogFiles:BeforeUpdateProtectionInfo:checksum",
|
|
653
|
+
[&](void* checksum_ptr) {
|
|
654
|
+
// Verify that checksum is produced on the batch content
|
|
655
|
+
uint64_t checksum = *reinterpret_cast<uint64_t*>(checksum_ptr);
|
|
656
|
+
ASSERT_EQ(checksum, XXH3_64bits(content.data(), content.size()));
|
|
657
|
+
});
|
|
658
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
659
|
+
ASSERT_TRUE(TryReopen(options).IsCorruption());
|
|
660
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
661
|
+
};
|
|
662
|
+
|
|
630
663
|
} // namespace ROCKSDB_NAMESPACE
|
|
631
664
|
|
|
632
665
|
int main(int argc, char** argv) {
|