@nxtedition/rocksdb 12.1.3 → 12.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +12 -13
- package/binding.gyp +0 -4
- package/deps/rocksdb/rocksdb/Makefile +10 -5
- package/deps/rocksdb/rocksdb/TARGETS +9 -7
- package/deps/rocksdb/rocksdb/cache/cache.cc +15 -11
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +26 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +16 -0
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +6 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +38 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +4 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +11 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +6 -0
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +56 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +12 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +9 -0
- package/deps/rocksdb/rocksdb/db/c.cc +9 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +12 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +6 -23
- package/deps/rocksdb/rocksdb/db/column_family.h +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +4 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +14 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +19 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +34 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +16 -31
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +7 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +95 -84
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +616 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +8 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +93 -69
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +353 -89
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +4 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +116 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +67 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +42 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +50 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +79 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +36 -59
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +72 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -12
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +75 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +36 -22
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +23 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +28 -3
- package/deps/rocksdb/rocksdb/db/error_handler.h +2 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +165 -33
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +37 -28
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -6
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -6
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +14 -6
- package/deps/rocksdb/rocksdb/db/job_context.h +4 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +24 -14
- package/deps/rocksdb/rocksdb/db/memtable.h +2 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +61 -33
- package/deps/rocksdb/rocksdb/db/memtable_list.h +8 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -2
- package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
- package/deps/rocksdb/rocksdb/db/version_builder.cc +14 -11
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +20 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +40 -30
- package/deps/rocksdb/rocksdb/db/version_set.h +13 -3
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -76
- package/deps/rocksdb/rocksdb/db/write_batch.cc +6 -2
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +2 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +25 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +11 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.h +5 -0
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +11 -1
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +10 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +30 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +10 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +287 -83
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +68 -36
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +4 -4
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +31 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +14 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +15 -4
- package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
- package/deps/rocksdb/rocksdb/options/options_parser.cc +5 -4
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +38 -45
- package/deps/rocksdb/rocksdb/port/port.h +16 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +8 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +10 -20
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -9
- package/deps/rocksdb/rocksdb/table/format.cc +32 -4
- package/deps/rocksdb/rocksdb/table/format.h +12 -1
- package/deps/rocksdb/rocksdb/table/iterator.cc +4 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +214 -161
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +4 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +4 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +2 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +11 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -22
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +3 -0
- package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +3 -3
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -4075,6 +4075,15 @@ void rocksdb_options_set_write_dbid_to_manifest(
|
|
|
4075
4075
|
opt->rep.write_dbid_to_manifest = write_dbid_to_manifest;
|
|
4076
4076
|
}
|
|
4077
4077
|
|
|
4078
|
+
unsigned char rocksdb_options_get_write_identity_file(rocksdb_options_t* opt) {
|
|
4079
|
+
return opt->rep.write_identity_file;
|
|
4080
|
+
}
|
|
4081
|
+
|
|
4082
|
+
void rocksdb_options_set_write_identity_file(
|
|
4083
|
+
rocksdb_options_t* opt, unsigned char write_identity_file) {
|
|
4084
|
+
opt->rep.write_identity_file = write_identity_file;
|
|
4085
|
+
}
|
|
4086
|
+
|
|
4078
4087
|
unsigned char rocksdb_options_get_track_and_verify_wals_in_manifest(
|
|
4079
4088
|
rocksdb_options_t* opt) {
|
|
4080
4089
|
return opt->rep.track_and_verify_wals_in_manifest;
|
|
@@ -772,6 +772,8 @@ int main(int argc, char** argv) {
|
|
|
772
772
|
rocksdb_options_set_write_buffer_size(options, 100000);
|
|
773
773
|
rocksdb_options_set_paranoid_checks(options, 1);
|
|
774
774
|
rocksdb_options_set_max_open_files(options, 10);
|
|
775
|
+
/* Compatibility with how test was written */
|
|
776
|
+
rocksdb_options_set_write_dbid_to_manifest(options, 0);
|
|
775
777
|
|
|
776
778
|
table_options = rocksdb_block_based_options_create();
|
|
777
779
|
rocksdb_block_based_options_set_block_cache(table_options, cache);
|
|
@@ -962,15 +964,24 @@ int main(int argc, char** argv) {
|
|
|
962
964
|
rocksdb_options_t* options_dbid_in_manifest = rocksdb_options_create();
|
|
963
965
|
rocksdb_options_set_create_if_missing(options_dbid_in_manifest, 1);
|
|
964
966
|
|
|
967
|
+
rocksdb_options_set_write_dbid_to_manifest(options_dbid_in_manifest, false);
|
|
965
968
|
unsigned char write_to_manifest =
|
|
966
969
|
rocksdb_options_get_write_dbid_to_manifest(options_dbid_in_manifest);
|
|
967
970
|
CheckCondition(!write_to_manifest);
|
|
968
971
|
rocksdb_options_set_write_dbid_to_manifest(options_dbid_in_manifest, true);
|
|
969
|
-
CheckCondition(!write_to_manifest);
|
|
970
972
|
write_to_manifest =
|
|
971
973
|
rocksdb_options_get_write_dbid_to_manifest(options_dbid_in_manifest);
|
|
972
974
|
CheckCondition(write_to_manifest);
|
|
973
975
|
|
|
976
|
+
rocksdb_options_set_write_identity_file(options_dbid_in_manifest, true);
|
|
977
|
+
unsigned char write_identity_file =
|
|
978
|
+
rocksdb_options_get_write_identity_file(options_dbid_in_manifest);
|
|
979
|
+
CheckCondition(write_identity_file);
|
|
980
|
+
rocksdb_options_set_write_identity_file(options_dbid_in_manifest, false);
|
|
981
|
+
write_identity_file =
|
|
982
|
+
rocksdb_options_get_write_identity_file(options_dbid_in_manifest);
|
|
983
|
+
CheckCondition(!write_identity_file);
|
|
984
|
+
|
|
974
985
|
db = rocksdb_open(options_dbid_in_manifest, dbbackupname, &err);
|
|
975
986
|
CheckNoError(err);
|
|
976
987
|
|
|
@@ -1201,8 +1201,10 @@ Status ColumnFamilyData::RangesOverlapWithMemtables(
|
|
|
1201
1201
|
read_opts.total_order_seek = true;
|
|
1202
1202
|
MergeIteratorBuilder merge_iter_builder(&internal_comparator_, &arena);
|
|
1203
1203
|
merge_iter_builder.AddIterator(super_version->mem->NewIterator(
|
|
1204
|
-
read_opts, /*seqno_to_time_mapping=*/nullptr, &arena
|
|
1204
|
+
read_opts, /*seqno_to_time_mapping=*/nullptr, &arena,
|
|
1205
|
+
/*prefix_extractor=*/nullptr));
|
|
1205
1206
|
super_version->imm->AddIterators(read_opts, /*seqno_to_time_mapping=*/nullptr,
|
|
1207
|
+
/*prefix_extractor=*/nullptr,
|
|
1206
1208
|
&merge_iter_builder,
|
|
1207
1209
|
false /* add_range_tombstone_iter */);
|
|
1208
1210
|
ScopedArenaPtr<InternalIterator> memtable_iter(merge_iter_builder.Finish());
|
|
@@ -1565,28 +1567,6 @@ Status ColumnFamilyData::SetOptions(
|
|
|
1565
1567
|
return s;
|
|
1566
1568
|
}
|
|
1567
1569
|
|
|
1568
|
-
// REQUIRES: DB mutex held
|
|
1569
|
-
Env::WriteLifeTimeHint ColumnFamilyData::CalculateSSTWriteHint(int level) {
|
|
1570
|
-
if (initial_cf_options_.compaction_style != kCompactionStyleLevel) {
|
|
1571
|
-
return Env::WLTH_NOT_SET;
|
|
1572
|
-
}
|
|
1573
|
-
if (level == 0) {
|
|
1574
|
-
return Env::WLTH_MEDIUM;
|
|
1575
|
-
}
|
|
1576
|
-
int base_level = current_->storage_info()->base_level();
|
|
1577
|
-
|
|
1578
|
-
// L1: medium, L2: long, ...
|
|
1579
|
-
if (level - base_level >= 2) {
|
|
1580
|
-
return Env::WLTH_EXTREME;
|
|
1581
|
-
} else if (level < base_level) {
|
|
1582
|
-
// There is no restriction which prevents level passed in to be smaller
|
|
1583
|
-
// than base_level.
|
|
1584
|
-
return Env::WLTH_MEDIUM;
|
|
1585
|
-
}
|
|
1586
|
-
return static_cast<Env::WriteLifeTimeHint>(
|
|
1587
|
-
level - base_level + static_cast<int>(Env::WLTH_MEDIUM));
|
|
1588
|
-
}
|
|
1589
|
-
|
|
1590
1570
|
Status ColumnFamilyData::AddDirectories(
|
|
1591
1571
|
std::map<std::string, std::shared_ptr<FSDirectory>>* created_dirs) {
|
|
1592
1572
|
Status s;
|
|
@@ -1652,6 +1632,9 @@ bool ColumnFamilyData::ShouldPostponeFlushToRetainUDT(
|
|
|
1652
1632
|
}
|
|
1653
1633
|
for (const Slice& table_newest_udt :
|
|
1654
1634
|
imm()->GetTablesNewestUDT(max_memtable_id)) {
|
|
1635
|
+
if (table_newest_udt.empty()) {
|
|
1636
|
+
continue;
|
|
1637
|
+
}
|
|
1655
1638
|
assert(table_newest_udt.size() == full_history_ts_low.size());
|
|
1656
1639
|
// Checking the newest UDT contained in MemTable with ascending ID up to
|
|
1657
1640
|
// `max_memtable_id`. Return immediately on finding the first MemTable that
|
|
@@ -401,6 +401,7 @@ class ColumnFamilyData {
|
|
|
401
401
|
SequenceNumber earliest_seq);
|
|
402
402
|
|
|
403
403
|
TableCache* table_cache() const { return table_cache_.get(); }
|
|
404
|
+
BlobFileCache* blob_file_cache() const { return blob_file_cache_.get(); }
|
|
404
405
|
BlobSource* blob_source() const { return blob_source_.get(); }
|
|
405
406
|
|
|
406
407
|
// See documentation in compaction_picker.h
|
|
@@ -511,8 +512,6 @@ class ColumnFamilyData {
|
|
|
511
512
|
return initial_cf_options_;
|
|
512
513
|
}
|
|
513
514
|
|
|
514
|
-
Env::WriteLifeTimeHint CalculateSSTWriteHint(int level);
|
|
515
|
-
|
|
516
515
|
// created_dirs remembers directory created, so that we don't need to call
|
|
517
516
|
// the same data creation operation again.
|
|
518
517
|
Status AddDirectories(
|
|
@@ -686,12 +686,11 @@ bool Compaction::KeyRangeNotExistsBeyondOutputLevel(
|
|
|
686
686
|
};
|
|
687
687
|
|
|
688
688
|
// Mark (or clear) each file that is being compacted
|
|
689
|
-
void Compaction::MarkFilesBeingCompacted(bool
|
|
689
|
+
void Compaction::MarkFilesBeingCompacted(bool being_compacted) const {
|
|
690
690
|
for (size_t i = 0; i < num_input_levels(); i++) {
|
|
691
691
|
for (size_t j = 0; j < inputs_[i].size(); j++) {
|
|
692
|
-
assert(
|
|
693
|
-
|
|
694
|
-
inputs_[i][j]->being_compacted = mark_as_compacted;
|
|
692
|
+
assert(being_compacted != inputs_[i][j]->being_compacted);
|
|
693
|
+
inputs_[i][j]->being_compacted = being_compacted;
|
|
695
694
|
}
|
|
696
695
|
}
|
|
697
696
|
}
|
|
@@ -735,7 +734,7 @@ uint64_t Compaction::CalculateTotalInputSize() const {
|
|
|
735
734
|
return size;
|
|
736
735
|
}
|
|
737
736
|
|
|
738
|
-
void Compaction::ReleaseCompactionFiles(Status status) {
|
|
737
|
+
void Compaction::ReleaseCompactionFiles(const Status& status) {
|
|
739
738
|
MarkFilesBeingCompacted(false);
|
|
740
739
|
cfd_->compaction_picker()->ReleaseCompactionFiles(this, status);
|
|
741
740
|
}
|
|
@@ -230,7 +230,7 @@ class Compaction {
|
|
|
230
230
|
// Delete this compaction from the list of running compactions.
|
|
231
231
|
//
|
|
232
232
|
// Requirement: DB mutex held
|
|
233
|
-
void ReleaseCompactionFiles(Status status);
|
|
233
|
+
void ReleaseCompactionFiles(const Status& status);
|
|
234
234
|
|
|
235
235
|
// Returns the summary of the compaction in "output" with maximum "len"
|
|
236
236
|
// in bytes. The caller is responsible for the memory management of
|
|
@@ -435,13 +435,13 @@ class Compaction {
|
|
|
435
435
|
const int start_level,
|
|
436
436
|
const int output_level);
|
|
437
437
|
|
|
438
|
+
// mark (or clear) all files that are being compacted
|
|
439
|
+
void MarkFilesBeingCompacted(bool being_compacted) const;
|
|
440
|
+
|
|
438
441
|
private:
|
|
439
442
|
|
|
440
443
|
Status InitInputTableProperties();
|
|
441
444
|
|
|
442
|
-
// mark (or clear) all files that are being compacted
|
|
443
|
-
void MarkFilesBeingCompacted(bool mark_as_compacted);
|
|
444
|
-
|
|
445
445
|
// get the smallest and largest key present in files to be compacted
|
|
446
446
|
static void GetBoundaryKeys(VersionStorageInfo* vstorage,
|
|
447
447
|
const std::vector<CompactionInputFiles>& inputs,
|
|
@@ -251,12 +251,13 @@ void CompactionJob::Prepare() {
|
|
|
251
251
|
|
|
252
252
|
// Generate file_levels_ for compaction before making Iterator
|
|
253
253
|
auto* c = compact_->compaction;
|
|
254
|
-
ColumnFamilyData* cfd = c->column_family_data();
|
|
254
|
+
[[maybe_unused]] ColumnFamilyData* cfd = c->column_family_data();
|
|
255
255
|
assert(cfd != nullptr);
|
|
256
|
-
|
|
257
|
-
|
|
256
|
+
const VersionStorageInfo* storage_info = c->input_version()->storage_info();
|
|
257
|
+
assert(storage_info);
|
|
258
|
+
assert(storage_info->NumLevelFiles(compact_->compaction->level()) > 0);
|
|
258
259
|
|
|
259
|
-
write_hint_ =
|
|
260
|
+
write_hint_ = storage_info->CalculateSSTWriteHint(c->output_level());
|
|
260
261
|
bottommost_level_ = c->bottommost_level();
|
|
261
262
|
|
|
262
263
|
if (c->ShouldFormSubcompactions()) {
|
|
@@ -297,8 +298,8 @@ void CompactionJob::Prepare() {
|
|
|
297
298
|
for (const auto& each_level : *c->inputs()) {
|
|
298
299
|
for (const auto& fmd : each_level.files) {
|
|
299
300
|
std::shared_ptr<const TableProperties> tp;
|
|
300
|
-
Status s =
|
|
301
|
-
|
|
301
|
+
Status s = c->input_version()->GetTableProperties(read_options, &tp,
|
|
302
|
+
fmd, nullptr);
|
|
302
303
|
if (s.ok()) {
|
|
303
304
|
s = seqno_to_time_mapping_.DecodeFrom(tp->seqno_to_time_mapping);
|
|
304
305
|
}
|
|
@@ -805,6 +806,12 @@ Status CompactionJob::Run() {
|
|
|
805
806
|
}
|
|
806
807
|
}
|
|
807
808
|
|
|
809
|
+
// Before the compaction starts, is_remote_compaction was set to true if
|
|
810
|
+
// compaction_service is set. We now know whether each sub_compaction was
|
|
811
|
+
// done remotely or not. Reset is_remote_compaction back to false and allow
|
|
812
|
+
// AggregateCompactionStats() to set the right value.
|
|
813
|
+
compaction_job_stats_->is_remote_compaction = false;
|
|
814
|
+
|
|
808
815
|
// Finish up all bookkeeping to unify the subcompaction results.
|
|
809
816
|
compact_->AggregateCompactionStats(compaction_stats_, *compaction_job_stats_);
|
|
810
817
|
uint64_t num_input_range_del = 0;
|
|
@@ -1083,6 +1090,7 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1083
1090
|
}
|
|
1084
1091
|
// fallback to local compaction
|
|
1085
1092
|
assert(comp_status == CompactionServiceJobStatus::kUseLocal);
|
|
1093
|
+
sub_compact->compaction_job_stats.is_remote_compaction = false;
|
|
1086
1094
|
}
|
|
1087
1095
|
|
|
1088
1096
|
uint64_t prev_cpu_micros = db_options_.clock->CPUMicros();
|
|
@@ -209,6 +209,8 @@ class CompactionJob {
|
|
|
209
209
|
// Returns true iff compaction_stats_.stats.num_input_records and
|
|
210
210
|
// num_input_range_del are calculated successfully.
|
|
211
211
|
bool UpdateCompactionStats(uint64_t* num_input_range_del = nullptr);
|
|
212
|
+
virtual void UpdateCompactionJobStats(
|
|
213
|
+
const InternalStats::CompactionStats& stats) const;
|
|
212
214
|
void LogCompaction();
|
|
213
215
|
virtual void RecordCompactionIOStats();
|
|
214
216
|
void CleanupCompaction();
|
|
@@ -279,8 +281,7 @@ class CompactionJob {
|
|
|
279
281
|
bool* compaction_released);
|
|
280
282
|
Status OpenCompactionOutputFile(SubcompactionState* sub_compact,
|
|
281
283
|
CompactionOutputs& outputs);
|
|
282
|
-
|
|
283
|
-
const InternalStats::CompactionStats& stats) const;
|
|
284
|
+
|
|
284
285
|
void RecordDroppedKeys(const CompactionIterationStats& c_iter_stats,
|
|
285
286
|
CompactionJobStats* compaction_job_stats = nullptr);
|
|
286
287
|
|
|
@@ -377,9 +378,7 @@ class CompactionJob {
|
|
|
377
378
|
// doesn't contain the LSM tree information, which is passed though MANIFEST
|
|
378
379
|
// file.
|
|
379
380
|
struct CompactionServiceInput {
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
DBOptions db_options;
|
|
381
|
+
std::string cf_name;
|
|
383
382
|
|
|
384
383
|
std::vector<SequenceNumber> snapshots;
|
|
385
384
|
|
|
@@ -398,13 +397,12 @@ struct CompactionServiceInput {
|
|
|
398
397
|
bool has_end = false;
|
|
399
398
|
std::string end;
|
|
400
399
|
|
|
400
|
+
uint64_t options_file_number;
|
|
401
|
+
|
|
401
402
|
// serialization interface to read and write the object
|
|
402
403
|
static Status Read(const std::string& data_str, CompactionServiceInput* obj);
|
|
403
404
|
Status Write(std::string* output);
|
|
404
405
|
|
|
405
|
-
// Initialize a dummy ColumnFamilyDescriptor
|
|
406
|
-
CompactionServiceInput() : column_family("", ColumnFamilyOptions()) {}
|
|
407
|
-
|
|
408
406
|
#ifndef NDEBUG
|
|
409
407
|
bool TEST_Equals(CompactionServiceInput* other);
|
|
410
408
|
bool TEST_Equals(CompactionServiceInput* other, std::string* mismatch);
|
|
@@ -418,19 +416,22 @@ struct CompactionServiceOutputFile {
|
|
|
418
416
|
SequenceNumber largest_seqno;
|
|
419
417
|
std::string smallest_internal_key;
|
|
420
418
|
std::string largest_internal_key;
|
|
421
|
-
uint64_t oldest_ancester_time;
|
|
422
|
-
uint64_t file_creation_time;
|
|
423
|
-
uint64_t epoch_number;
|
|
419
|
+
uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
|
|
420
|
+
uint64_t file_creation_time = kUnknownFileCreationTime;
|
|
421
|
+
uint64_t epoch_number = kUnknownEpochNumber;
|
|
422
|
+
std::string file_checksum = kUnknownFileChecksum;
|
|
423
|
+
std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
|
|
424
424
|
uint64_t paranoid_hash;
|
|
425
425
|
bool marked_for_compaction;
|
|
426
|
-
UniqueId64x2 unique_id;
|
|
426
|
+
UniqueId64x2 unique_id{};
|
|
427
427
|
|
|
428
428
|
CompactionServiceOutputFile() = default;
|
|
429
429
|
CompactionServiceOutputFile(
|
|
430
430
|
const std::string& name, SequenceNumber smallest, SequenceNumber largest,
|
|
431
431
|
std::string _smallest_internal_key, std::string _largest_internal_key,
|
|
432
432
|
uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
|
|
433
|
-
uint64_t _epoch_number,
|
|
433
|
+
uint64_t _epoch_number, const std::string& _file_checksum,
|
|
434
|
+
const std::string& _file_checksum_func_name, uint64_t _paranoid_hash,
|
|
434
435
|
bool _marked_for_compaction, UniqueId64x2 _unique_id)
|
|
435
436
|
: file_name(name),
|
|
436
437
|
smallest_seqno(smallest),
|
|
@@ -440,6 +441,8 @@ struct CompactionServiceOutputFile {
|
|
|
440
441
|
oldest_ancester_time(_oldest_ancester_time),
|
|
441
442
|
file_creation_time(_file_creation_time),
|
|
442
443
|
epoch_number(_epoch_number),
|
|
444
|
+
file_checksum(_file_checksum),
|
|
445
|
+
file_checksum_func_name(_file_checksum_func_name),
|
|
443
446
|
paranoid_hash(_paranoid_hash),
|
|
444
447
|
marked_for_compaction(_marked_for_compaction),
|
|
445
448
|
unique_id(std::move(_unique_id)) {}
|
|
@@ -456,9 +459,6 @@ struct CompactionServiceResult {
|
|
|
456
459
|
// location of the output files
|
|
457
460
|
std::string output_path;
|
|
458
461
|
|
|
459
|
-
// some statistics about the compaction
|
|
460
|
-
uint64_t num_output_records = 0;
|
|
461
|
-
uint64_t total_bytes = 0;
|
|
462
462
|
uint64_t bytes_read = 0;
|
|
463
463
|
uint64_t bytes_written = 0;
|
|
464
464
|
CompactionJobStats stats;
|
|
@@ -504,6 +504,9 @@ class CompactionServiceCompactionJob : private CompactionJob {
|
|
|
504
504
|
protected:
|
|
505
505
|
void RecordCompactionIOStats() override;
|
|
506
506
|
|
|
507
|
+
void UpdateCompactionJobStats(
|
|
508
|
+
const InternalStats::CompactionStats& stats) const override;
|
|
509
|
+
|
|
507
510
|
private:
|
|
508
511
|
// Get table file name in output_path
|
|
509
512
|
std::string GetTableFileName(uint64_t file_number) override;
|
|
@@ -50,7 +50,8 @@ void VerifyInitializationOfCompactionJobStats(
|
|
|
50
50
|
ASSERT_EQ(compaction_job_stats.num_output_records, 0U);
|
|
51
51
|
ASSERT_EQ(compaction_job_stats.num_output_files, 0U);
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
ASSERT_TRUE(compaction_job_stats.is_manual_compaction);
|
|
54
|
+
ASSERT_FALSE(compaction_job_stats.is_remote_compaction);
|
|
54
55
|
|
|
55
56
|
ASSERT_EQ(compaction_job_stats.total_input_bytes, 0U);
|
|
56
57
|
ASSERT_EQ(compaction_job_stats.total_output_bytes, 0U);
|
|
@@ -545,15 +546,14 @@ class CompactionJobTestBase : public testing::Test {
|
|
|
545
546
|
ASSERT_OK(s);
|
|
546
547
|
db_options_.info_log = info_log;
|
|
547
548
|
|
|
548
|
-
versions_.reset(
|
|
549
|
-
dbname_, &db_options_, env_options_, table_cache_.get(),
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
549
|
+
versions_.reset(
|
|
550
|
+
new VersionSet(dbname_, &db_options_, env_options_, table_cache_.get(),
|
|
551
|
+
&write_buffer_manager_, &write_controller_,
|
|
552
|
+
/*block_cache_tracer=*/nullptr, /*io_tracer=*/nullptr,
|
|
553
|
+
test::kUnitTestDbId, /*db_session_id=*/"",
|
|
554
|
+
/*daily_offpeak_time_utc=*/"",
|
|
555
|
+
/*error_handler=*/nullptr, /*read_only=*/false));
|
|
554
556
|
compaction_job_stats_.Reset();
|
|
555
|
-
ASSERT_OK(
|
|
556
|
-
SetIdentityFile(WriteOptions(), env_, dbname_, Temperature::kUnknown));
|
|
557
557
|
|
|
558
558
|
VersionEdit new_db;
|
|
559
559
|
new_db.SetLogNumber(0);
|
|
@@ -1569,17 +1569,7 @@ TEST_F(CompactionJobTest, InputSerialization) {
|
|
|
1569
1569
|
const int kStrMaxLen = 1000;
|
|
1570
1570
|
Random rnd(static_cast<uint32_t>(time(nullptr)));
|
|
1571
1571
|
Random64 rnd64(time(nullptr));
|
|
1572
|
-
input.
|
|
1573
|
-
input.column_family.options.comparator = ReverseBytewiseComparator();
|
|
1574
|
-
input.column_family.options.max_bytes_for_level_base =
|
|
1575
|
-
rnd64.Uniform(UINT64_MAX);
|
|
1576
|
-
input.column_family.options.disable_auto_compactions = rnd.OneIn(2);
|
|
1577
|
-
input.column_family.options.compression = kZSTD;
|
|
1578
|
-
input.column_family.options.compression_opts.level = 4;
|
|
1579
|
-
input.db_options.max_background_flushes = 10;
|
|
1580
|
-
input.db_options.paranoid_checks = rnd.OneIn(2);
|
|
1581
|
-
input.db_options.statistics = CreateDBStatistics();
|
|
1582
|
-
input.db_options.env = env_;
|
|
1572
|
+
input.cf_name = rnd.RandomString(rnd.Uniform(kStrMaxLen));
|
|
1583
1573
|
while (!rnd.OneIn(10)) {
|
|
1584
1574
|
input.snapshots.emplace_back(rnd64.Uniform(UINT64_MAX));
|
|
1585
1575
|
}
|
|
@@ -1607,10 +1597,10 @@ TEST_F(CompactionJobTest, InputSerialization) {
|
|
|
1607
1597
|
ASSERT_TRUE(deserialized1.TEST_Equals(&input));
|
|
1608
1598
|
|
|
1609
1599
|
// Test mismatch
|
|
1610
|
-
deserialized1.
|
|
1600
|
+
deserialized1.output_level += 10;
|
|
1611
1601
|
std::string mismatch;
|
|
1612
1602
|
ASSERT_FALSE(deserialized1.TEST_Equals(&input, &mismatch));
|
|
1613
|
-
ASSERT_EQ(mismatch, "
|
|
1603
|
+
ASSERT_EQ(mismatch, "output_level");
|
|
1614
1604
|
|
|
1615
1605
|
// Test unknown field
|
|
1616
1606
|
CompactionServiceInput deserialized2;
|
|
@@ -1666,20 +1656,30 @@ TEST_F(CompactionJobTest, ResultSerialization) {
|
|
|
1666
1656
|
};
|
|
1667
1657
|
result.status =
|
|
1668
1658
|
status_list.at(rnd.Uniform(static_cast<int>(status_list.size())));
|
|
1659
|
+
|
|
1660
|
+
std::string file_checksum = rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen));
|
|
1661
|
+
std::string file_checksum_func_name = "MyAwesomeChecksumGenerator";
|
|
1669
1662
|
while (!rnd.OneIn(10)) {
|
|
1670
1663
|
UniqueId64x2 id{rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX)};
|
|
1671
1664
|
result.output_files.emplace_back(
|
|
1672
|
-
rnd.RandomString(rnd.Uniform(kStrMaxLen))
|
|
1673
|
-
rnd64.Uniform(UINT64_MAX)
|
|
1674
|
-
|
|
1675
|
-
rnd.RandomBinaryString(
|
|
1676
|
-
|
|
1677
|
-
|
|
1665
|
+
rnd.RandomString(rnd.Uniform(kStrMaxLen)) /* file_name */,
|
|
1666
|
+
rnd64.Uniform(UINT64_MAX) /* smallest_seqno */,
|
|
1667
|
+
rnd64.Uniform(UINT64_MAX) /* largest_seqno */,
|
|
1668
|
+
rnd.RandomBinaryString(
|
|
1669
|
+
rnd.Uniform(kStrMaxLen)) /* smallest_internal_key */,
|
|
1670
|
+
rnd.RandomBinaryString(
|
|
1671
|
+
rnd.Uniform(kStrMaxLen)) /* largest_internal_key */,
|
|
1672
|
+
rnd64.Uniform(UINT64_MAX) /* oldest_ancester_time */,
|
|
1673
|
+
rnd64.Uniform(UINT64_MAX) /* file_creation_time */,
|
|
1674
|
+
rnd64.Uniform(UINT64_MAX) /* epoch_number */,
|
|
1675
|
+
file_checksum /* file_checksum */,
|
|
1676
|
+
file_checksum_func_name /* file_checksum_func_name */,
|
|
1677
|
+
rnd64.Uniform(UINT64_MAX) /* paranoid_hash */,
|
|
1678
|
+
rnd.OneIn(2) /* marked_for_compaction */, id);
|
|
1678
1679
|
}
|
|
1679
1680
|
result.output_level = rnd.Uniform(10);
|
|
1680
1681
|
result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
|
|
1681
|
-
result.num_output_records = rnd64.Uniform(UINT64_MAX);
|
|
1682
|
-
result.total_bytes = rnd64.Uniform(UINT64_MAX);
|
|
1682
|
+
result.stats.num_output_records = rnd64.Uniform(UINT64_MAX);
|
|
1683
1683
|
result.bytes_read = 123;
|
|
1684
1684
|
result.bytes_written = rnd64.Uniform(UINT64_MAX);
|
|
1685
1685
|
result.stats.elapsed_micros = rnd64.Uniform(UINT64_MAX);
|
|
@@ -1710,6 +1710,10 @@ TEST_F(CompactionJobTest, ResultSerialization) {
|
|
|
1710
1710
|
ASSERT_FALSE(deserialized_tmp.TEST_Equals(&result, &mismatch));
|
|
1711
1711
|
ASSERT_EQ(mismatch, "output_files.unique_id");
|
|
1712
1712
|
deserialized_tmp.status.PermitUncheckedError();
|
|
1713
|
+
|
|
1714
|
+
ASSERT_EQ(deserialized_tmp.output_files[0].file_checksum, file_checksum);
|
|
1715
|
+
ASSERT_EQ(deserialized_tmp.output_files[0].file_checksum_func_name,
|
|
1716
|
+
file_checksum_func_name);
|
|
1713
1717
|
}
|
|
1714
1718
|
|
|
1715
1719
|
// Test unknown field
|
|
@@ -62,8 +62,9 @@ class CompactionOutputs {
|
|
|
62
62
|
}
|
|
63
63
|
|
|
64
64
|
// TODO: Remove it when remote compaction support tiered compaction
|
|
65
|
-
void
|
|
65
|
+
void AddBytesWritten(uint64_t bytes) { stats_.bytes_written += bytes; }
|
|
66
66
|
void SetNumOutputRecords(uint64_t num) { stats_.num_output_records = num; }
|
|
67
|
+
void SetNumOutputFiles(uint64_t num) { stats_.num_output_files = num; }
|
|
67
68
|
|
|
68
69
|
// TODO: Move the BlobDB builder into CompactionOutputs
|
|
69
70
|
const std::vector<BlobFileAddition>& GetBlobFileAdditions() const {
|
|
@@ -133,7 +133,8 @@ CompactionPicker::CompactionPicker(const ImmutableOptions& ioptions,
|
|
|
133
133
|
CompactionPicker::~CompactionPicker() = default;
|
|
134
134
|
|
|
135
135
|
// Delete this compaction from the list of running compactions.
|
|
136
|
-
void CompactionPicker::ReleaseCompactionFiles(Compaction* c,
|
|
136
|
+
void CompactionPicker::ReleaseCompactionFiles(Compaction* c,
|
|
137
|
+
const Status& status) {
|
|
137
138
|
UnregisterCompaction(c);
|
|
138
139
|
if (!status.ok()) {
|
|
139
140
|
c->ResetNextCompactionIndex();
|
|
@@ -104,7 +104,7 @@ class CompactionPicker {
|
|
|
104
104
|
// Free up the files that participated in a compaction
|
|
105
105
|
//
|
|
106
106
|
// Requirement: DB mutex held
|
|
107
|
-
void ReleaseCompactionFiles(Compaction* c, Status status);
|
|
107
|
+
void ReleaseCompactionFiles(Compaction* c, const Status& status);
|
|
108
108
|
|
|
109
109
|
// Returns true if any one of the specified files are being compacted
|
|
110
110
|
bool AreFilesInCompaction(const std::vector<FileMetaData*>& files);
|
|
@@ -294,7 +294,7 @@ Compaction* FIFOCompactionPicker::PickSizeCompaction(
|
|
|
294
294
|
Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
|
|
295
295
|
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
|
|
296
296
|
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
|
|
297
|
-
LogBuffer* log_buffer) {
|
|
297
|
+
LogBuffer* log_buffer) const {
|
|
298
298
|
const std::vector<FileTemperatureAge>& ages =
|
|
299
299
|
mutable_cf_options.compaction_options_fifo
|
|
300
300
|
.file_temperature_age_thresholds;
|
|
@@ -344,12 +344,10 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
|
|
|
344
344
|
Temperature compaction_target_temp = Temperature::kLastTemperature;
|
|
345
345
|
if (current_time > min_age) {
|
|
346
346
|
uint64_t create_time_threshold = current_time - min_age;
|
|
347
|
-
uint64_t compaction_size = 0;
|
|
348
347
|
// We will ideally identify a file qualifying for temperature change by
|
|
349
348
|
// knowing the timestamp for the youngest entry in the file. However, right
|
|
350
349
|
// now we don't have the information. We infer it by looking at timestamp of
|
|
351
350
|
// the previous file's (which is just younger) oldest entry's timestamp.
|
|
352
|
-
Temperature cur_target_temp;
|
|
353
351
|
// avoid index underflow
|
|
354
352
|
assert(level_files.size() >= 1);
|
|
355
353
|
for (size_t index = level_files.size() - 1; index >= 1; --index) {
|
|
@@ -374,7 +372,7 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
|
|
|
374
372
|
// cur_file is too fresh
|
|
375
373
|
break;
|
|
376
374
|
}
|
|
377
|
-
cur_target_temp = ages[0].temperature;
|
|
375
|
+
Temperature cur_target_temp = ages[0].temperature;
|
|
378
376
|
for (size_t i = 1; i < ages.size(); ++i) {
|
|
379
377
|
if (current_time >= ages[i].age &&
|
|
380
378
|
oldest_ancestor_time <= current_time - ages[i].age) {
|
|
@@ -382,35 +380,20 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
|
|
|
382
380
|
}
|
|
383
381
|
}
|
|
384
382
|
if (cur_file->temperature == cur_target_temp) {
|
|
385
|
-
|
|
386
|
-
continue;
|
|
387
|
-
} else {
|
|
388
|
-
break;
|
|
389
|
-
}
|
|
383
|
+
continue;
|
|
390
384
|
}
|
|
391
385
|
|
|
392
386
|
// cur_file needs to change temperature
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
compaction_size += cur_file->fd.GetFileSize();
|
|
404
|
-
ROCKS_LOG_BUFFER(
|
|
405
|
-
log_buffer,
|
|
406
|
-
"[%s] FIFO compaction: picking file %" PRIu64
|
|
407
|
-
" with next file's oldest time %" PRIu64 " for temperature %s.",
|
|
408
|
-
cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time,
|
|
409
|
-
temperature_to_string[cur_target_temp].c_str());
|
|
410
|
-
}
|
|
411
|
-
if (compaction_size > mutable_cf_options.max_compaction_bytes) {
|
|
412
|
-
break;
|
|
413
|
-
}
|
|
387
|
+
assert(compaction_target_temp == Temperature::kLastTemperature);
|
|
388
|
+
compaction_target_temp = cur_target_temp;
|
|
389
|
+
inputs[0].files.push_back(cur_file);
|
|
390
|
+
ROCKS_LOG_BUFFER(
|
|
391
|
+
log_buffer,
|
|
392
|
+
"[%s] FIFO compaction: picking file %" PRIu64
|
|
393
|
+
" with next file's oldest time %" PRIu64 " for temperature %s.",
|
|
394
|
+
cf_name.c_str(), cur_file->fd.GetNumber(), oldest_ancestor_time,
|
|
395
|
+
temperature_to_string[cur_target_temp].c_str());
|
|
396
|
+
break;
|
|
414
397
|
}
|
|
415
398
|
}
|
|
416
399
|
|
|
@@ -418,7 +401,9 @@ Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
|
|
|
418
401
|
return nullptr;
|
|
419
402
|
}
|
|
420
403
|
assert(compaction_target_temp != Temperature::kLastTemperature);
|
|
421
|
-
|
|
404
|
+
// Only compact one file at a time.
|
|
405
|
+
assert(inputs.size() == 1);
|
|
406
|
+
assert(inputs[0].size() == 1);
|
|
422
407
|
Compaction* c = new Compaction(
|
|
423
408
|
vstorage, ioptions_, mutable_cf_options, mutable_db_options,
|
|
424
409
|
std::move(inputs), 0, 0 /* output file size limit */,
|
|
@@ -53,9 +53,10 @@ class FIFOCompactionPicker : public CompactionPicker {
|
|
|
53
53
|
VersionStorageInfo* version,
|
|
54
54
|
LogBuffer* log_buffer);
|
|
55
55
|
|
|
56
|
+
// Will pick one file to compact at a time, starting from the oldest file.
|
|
56
57
|
Compaction* PickTemperatureChangeCompaction(
|
|
57
58
|
const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
|
|
58
59
|
const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
|
|
59
|
-
LogBuffer* log_buffer);
|
|
60
|
+
LogBuffer* log_buffer) const;
|
|
60
61
|
};
|
|
61
62
|
} // namespace ROCKSDB_NAMESPACE
|