@nxtedition/rocksdb 14.0.0 → 15.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +52 -179
- package/deps/rocksdb/rocksdb/BUCK +7 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +29 -14
- package/deps/rocksdb/rocksdb/Directory.Build.props +9 -0
- package/deps/rocksdb/rocksdb/Makefile +6 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -4
- package/deps/rocksdb/rocksdb/ccache_msvc_compiler.bat +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +17 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +10 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +522 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +69 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +443 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +14 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +5 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +28 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +6 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +455 -98
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +13 -1
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +146 -0
- package/deps/rocksdb/rocksdb/db/db_follower_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +18 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +665 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +83 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +68 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +101 -0
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +44 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +22 -5
- package/deps/rocksdb/rocksdb/db/log_reader.h +4 -4
- package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
- package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +477 -139
- package/deps/rocksdb/rocksdb/db/version_edit.h +228 -8
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +333 -0
- package/deps/rocksdb/rocksdb/db/write_thread.h +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +247 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.cc +61 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +17 -28
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +6 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +46 -18
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +8 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +4 -4
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +1 -0
- package/deps/rocksdb/rocksdb/file/filename.cc +40 -0
- package/deps/rocksdb/rocksdb/file/filename.h +14 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +4 -3
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +59 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +24 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +13 -8
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +5 -0
- package/deps/rocksdb/rocksdb/src.mk +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +73 -16
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +10 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +32 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +18 -27
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +0 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +708 -217
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +11 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +5 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +27 -19
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +24 -6
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +51 -18
- package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +183 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +8 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +222 -36
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +246 -6
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +86 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +21 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +12 -12
- package/index.js +27 -37
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -51,7 +51,9 @@
|
|
|
51
51
|
#include "rocksdb/status.h"
|
|
52
52
|
#include "rocksdb/table.h"
|
|
53
53
|
#include "rocksdb/utilities/options_type.h"
|
|
54
|
+
#include "table/format.h"
|
|
54
55
|
#include "table/merging_iterator.h"
|
|
56
|
+
#include "table/meta_blocks.h"
|
|
55
57
|
#include "table/table_builder.h"
|
|
56
58
|
#include "table/unique_id_impl.h"
|
|
57
59
|
#include "test_util/sync_point.h"
|
|
@@ -253,7 +255,9 @@ void CompactionJob::ReportStartedCompaction(Compaction* compaction) {
|
|
|
253
255
|
|
|
254
256
|
void CompactionJob::Prepare(
|
|
255
257
|
std::optional<std::pair<std::optional<Slice>, std::optional<Slice>>>
|
|
256
|
-
known_single_subcompact
|
|
258
|
+
known_single_subcompact,
|
|
259
|
+
const CompactionProgress& compaction_progress,
|
|
260
|
+
log::Writer* compaction_progress_writer) {
|
|
257
261
|
db_mutex_->AssertHeld();
|
|
258
262
|
AutoThreadOperationStageUpdater stage_updater(
|
|
259
263
|
ThreadStatus::STAGE_COMPACTION_PREPARE);
|
|
@@ -303,6 +307,9 @@ void CompactionJob::Prepare(
|
|
|
303
307
|
/*sub_job_id*/ 0);
|
|
304
308
|
}
|
|
305
309
|
|
|
310
|
+
MaybeAssignCompactionProgressAndWriter(compaction_progress,
|
|
311
|
+
compaction_progress_writer);
|
|
312
|
+
|
|
306
313
|
// collect all seqno->time information from the input files which will be used
|
|
307
314
|
// to encode seqno->time to the output files.
|
|
308
315
|
SequenceNumber preserve_time_min_seqno = kMaxSequenceNumber;
|
|
@@ -401,6 +408,25 @@ void CompactionJob::Prepare(
|
|
|
401
408
|
options_file_number_ = versions_->options_file_number();
|
|
402
409
|
}
|
|
403
410
|
|
|
411
|
+
void CompactionJob::MaybeAssignCompactionProgressAndWriter(
|
|
412
|
+
const CompactionProgress& compaction_progress,
|
|
413
|
+
log::Writer* compaction_progress_writer) {
|
|
414
|
+
// LIMITATION: Only supports resuming single subcompaction for now
|
|
415
|
+
if (compact_->sub_compact_states.size() != 1) {
|
|
416
|
+
return;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
if (!compaction_progress.empty()) {
|
|
420
|
+
assert(compaction_progress.size() == 1);
|
|
421
|
+
SubcompactionState* sub_compact = &compact_->sub_compact_states[0];
|
|
422
|
+
const SubcompactionProgress& subcompaction_progress =
|
|
423
|
+
compaction_progress[0];
|
|
424
|
+
sub_compact->SetSubcompactionProgress(subcompaction_progress);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
compaction_progress_writer_ = compaction_progress_writer;
|
|
428
|
+
}
|
|
429
|
+
|
|
404
430
|
uint64_t CompactionJob::GetSubcompactionsLimit() {
|
|
405
431
|
return extra_num_subcompaction_threads_reserved_ +
|
|
406
432
|
std::max(
|
|
@@ -924,7 +950,8 @@ void CompactionJob::FinalizeCompactionRun(
|
|
|
924
950
|
UpdateCompactionJobInputStatsFromInternalStats(internal_stats_,
|
|
925
951
|
num_input_range_del);
|
|
926
952
|
}
|
|
927
|
-
UpdateCompactionJobOutputStatsFromInternalStats(
|
|
953
|
+
UpdateCompactionJobOutputStatsFromInternalStats(input_status,
|
|
954
|
+
internal_stats_);
|
|
928
955
|
RecordCompactionIOStats();
|
|
929
956
|
|
|
930
957
|
LogFlush(db_options_.info_log);
|
|
@@ -1249,8 +1276,8 @@ Status CompactionJob::SetupAndValidateCompactionFilter(
|
|
|
1249
1276
|
return Status::OK();
|
|
1250
1277
|
}
|
|
1251
1278
|
|
|
1252
|
-
void CompactionJob::
|
|
1253
|
-
|
|
1279
|
+
void CompactionJob::InitializeReadOptionsAndBoundaries(
|
|
1280
|
+
const size_t ts_sz, ReadOptions& read_options,
|
|
1254
1281
|
SubcompactionKeyBoundaries& boundaries) {
|
|
1255
1282
|
read_options.verify_checksums = true;
|
|
1256
1283
|
read_options.fill_cache = false;
|
|
@@ -1264,8 +1291,6 @@ void CompactionJob::InitializeReadOptions(
|
|
|
1264
1291
|
|
|
1265
1292
|
// Remove the timestamps from boundaries because boundaries created in
|
|
1266
1293
|
// GenSubcompactionBoundaries doesn't strip away the timestamp.
|
|
1267
|
-
const size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
1268
|
-
|
|
1269
1294
|
if (boundaries.start.has_value()) {
|
|
1270
1295
|
read_options.iterate_lower_bound = &(*boundaries.start);
|
|
1271
1296
|
if (ts_sz > 0) {
|
|
@@ -1282,30 +1307,7 @@ void CompactionJob::InitializeReadOptions(
|
|
|
1282
1307
|
read_options.iterate_upper_bound = &(*boundaries.end_without_ts);
|
|
1283
1308
|
}
|
|
1284
1309
|
}
|
|
1285
|
-
}
|
|
1286
|
-
|
|
1287
|
-
InternalIterator* CompactionJob::CreateInputIterator(
|
|
1288
|
-
SubcompactionState* sub_compact, ColumnFamilyData* cfd,
|
|
1289
|
-
SubcompactionInternalIterators& iterators,
|
|
1290
|
-
SubcompactionKeyBoundaries& boundaries, ReadOptions& read_options) {
|
|
1291
|
-
// This is assigned after creation of SubcompactionState to simplify that
|
|
1292
|
-
// creation across both CompactionJob and CompactionServiceCompactionJob
|
|
1293
|
-
sub_compact->AssignRangeDelAggregator(
|
|
1294
|
-
std::make_unique<CompactionRangeDelAggregator>(
|
|
1295
|
-
&cfd->internal_comparator(), job_context_->snapshot_seqs,
|
|
1296
|
-
&full_history_ts_low_, &trim_ts_));
|
|
1297
|
-
|
|
1298
|
-
InitializeReadOptions(cfd, read_options, boundaries);
|
|
1299
|
-
|
|
1300
|
-
// Although the v2 aggregator is what the level iterator(s) know about,
|
|
1301
|
-
// the AddTombstones calls will be propagated down to the v1 aggregator.
|
|
1302
|
-
iterators.raw_input =
|
|
1303
|
-
std::unique_ptr<InternalIterator>(versions_->MakeInputIterator(
|
|
1304
|
-
read_options, sub_compact->compaction, sub_compact->RangeDelAgg(),
|
|
1305
|
-
file_options_for_read_, boundaries.start, boundaries.end));
|
|
1306
|
-
InternalIterator* input = iterators.raw_input.get();
|
|
1307
1310
|
|
|
1308
|
-
const size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
1309
1311
|
if (ts_sz > 0) {
|
|
1310
1312
|
if (ts_sz <= strlen(boundaries.kMaxTs)) {
|
|
1311
1313
|
boundaries.ts_slice = Slice(boundaries.kMaxTs, ts_sz);
|
|
@@ -1314,7 +1316,6 @@ InternalIterator* CompactionJob::CreateInputIterator(
|
|
|
1314
1316
|
boundaries.ts_slice = Slice(boundaries.max_ts);
|
|
1315
1317
|
}
|
|
1316
1318
|
}
|
|
1317
|
-
|
|
1318
1319
|
if (boundaries.start.has_value()) {
|
|
1319
1320
|
boundaries.start_ikey.SetInternalKey(*boundaries.start, kMaxSequenceNumber,
|
|
1320
1321
|
kValueTypeForSeek);
|
|
@@ -1335,6 +1336,29 @@ InternalIterator* CompactionJob::CreateInputIterator(
|
|
|
1335
1336
|
boundaries.end_internal_key = boundaries.end_ikey.GetInternalKey();
|
|
1336
1337
|
boundaries.end_user_key = boundaries.end_ikey.GetUserKey();
|
|
1337
1338
|
}
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
InternalIterator* CompactionJob::CreateInputIterator(
|
|
1342
|
+
SubcompactionState* sub_compact, ColumnFamilyData* cfd,
|
|
1343
|
+
SubcompactionInternalIterators& iterators,
|
|
1344
|
+
SubcompactionKeyBoundaries& boundaries, ReadOptions& read_options) {
|
|
1345
|
+
const size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
1346
|
+
InitializeReadOptionsAndBoundaries(ts_sz, read_options, boundaries);
|
|
1347
|
+
|
|
1348
|
+
// This is assigned after creation of SubcompactionState to simplify that
|
|
1349
|
+
// creation across both CompactionJob and CompactionServiceCompactionJob
|
|
1350
|
+
sub_compact->AssignRangeDelAggregator(
|
|
1351
|
+
std::make_unique<CompactionRangeDelAggregator>(
|
|
1352
|
+
&cfd->internal_comparator(), job_context_->snapshot_seqs,
|
|
1353
|
+
&full_history_ts_low_, &trim_ts_));
|
|
1354
|
+
|
|
1355
|
+
// Although the v2 aggregator is what the level iterator(s) know about,
|
|
1356
|
+
// the AddTombstones calls will be propagated down to the v1 aggregator.
|
|
1357
|
+
iterators.raw_input =
|
|
1358
|
+
std::unique_ptr<InternalIterator>(versions_->MakeInputIterator(
|
|
1359
|
+
read_options, sub_compact->compaction, sub_compact->RangeDelAgg(),
|
|
1360
|
+
file_options_for_read_, boundaries.start, boundaries.end));
|
|
1361
|
+
InternalIterator* input = iterators.raw_input.get();
|
|
1338
1362
|
|
|
1339
1363
|
if (boundaries.start.has_value() || boundaries.end.has_value()) {
|
|
1340
1364
|
iterators.clip = std::make_unique<ClippingIterator>(
|
|
@@ -1404,7 +1428,8 @@ std::unique_ptr<CompactionIterator> CompactionJob::CreateCompactionIterator(
|
|
|
1404
1428
|
env_, ShouldReportDetailedTime(env_, stats_), sub_compact->RangeDelAgg(),
|
|
1405
1429
|
blob_resources.blob_file_builder.get(), db_options_.allow_data_in_errors,
|
|
1406
1430
|
db_options_.enforce_single_del_contracts, manual_compaction_canceled_,
|
|
1407
|
-
sub_compact->compaction
|
|
1431
|
+
sub_compact->compaction
|
|
1432
|
+
->DoesInputReferenceBlobFiles() /* must_count_input_entries */,
|
|
1408
1433
|
sub_compact->compaction, compaction_filter, shutting_down_,
|
|
1409
1434
|
db_options_.info_log, full_history_ts_low, preserve_seqno_after_);
|
|
1410
1435
|
}
|
|
@@ -1424,11 +1449,13 @@ CompactionJob::CreateFileHandlers(SubcompactionState* sub_compact,
|
|
|
1424
1449
|
|
|
1425
1450
|
const CompactionFileCloseFunc close_file_func =
|
|
1426
1451
|
[this, sub_compact, start_user_key, end_user_key](
|
|
1427
|
-
|
|
1428
|
-
const
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1452
|
+
const Status& status,
|
|
1453
|
+
const ParsedInternalKey& prev_table_last_internal_key,
|
|
1454
|
+
const Slice& next_table_min_key, const CompactionIterator* c_iter,
|
|
1455
|
+
CompactionOutputs& outputs) {
|
|
1456
|
+
return this->FinishCompactionOutputFile(
|
|
1457
|
+
status, prev_table_last_internal_key, next_table_min_key,
|
|
1458
|
+
start_user_key, end_user_key, c_iter, sub_compact, outputs);
|
|
1432
1459
|
};
|
|
1433
1460
|
|
|
1434
1461
|
return {open_file_func, close_file_func};
|
|
@@ -1442,6 +1469,9 @@ Status CompactionJob::ProcessKeyValue(
|
|
|
1442
1469
|
const uint64_t kRecordStatsEvery = 1000;
|
|
1443
1470
|
[[maybe_unused]] const std::optional<const Slice> end = sub_compact->end;
|
|
1444
1471
|
|
|
1472
|
+
IterKey last_output_key;
|
|
1473
|
+
ParsedInternalKey last_output_ikey;
|
|
1474
|
+
|
|
1445
1475
|
TEST_SYNC_POINT_CALLBACK(
|
|
1446
1476
|
"CompactionJob::ProcessKeyValueCompaction()::Processing",
|
|
1447
1477
|
static_cast<void*>(const_cast<Compaction*>(sub_compact->compaction)));
|
|
@@ -1491,8 +1521,9 @@ Status CompactionJob::ProcessKeyValue(
|
|
|
1491
1521
|
// and `close_file_func`.
|
|
1492
1522
|
// TODO: it would be better to have the compaction file open/close moved
|
|
1493
1523
|
// into `CompactionOutputs` which has the output file information.
|
|
1494
|
-
status =
|
|
1495
|
-
|
|
1524
|
+
status =
|
|
1525
|
+
sub_compact->AddToOutput(*c_iter, use_proximal_output, open_file_func,
|
|
1526
|
+
close_file_func, last_output_ikey);
|
|
1496
1527
|
if (!status.ok()) {
|
|
1497
1528
|
break;
|
|
1498
1529
|
}
|
|
@@ -1500,6 +1531,10 @@ Status CompactionJob::ProcessKeyValue(
|
|
|
1500
1531
|
TEST_SYNC_POINT_CALLBACK("CompactionJob::Run():PausingManualCompaction:2",
|
|
1501
1532
|
static_cast<void*>(const_cast<std::atomic<bool>*>(
|
|
1502
1533
|
&manual_compaction_canceled_)));
|
|
1534
|
+
|
|
1535
|
+
last_output_key.SetInternalKey(c_iter->key(), &last_output_ikey);
|
|
1536
|
+
last_output_ikey.sequence = ikey.sequence;
|
|
1537
|
+
last_output_ikey.type = ikey.type;
|
|
1503
1538
|
c_iter->Next();
|
|
1504
1539
|
|
|
1505
1540
|
#ifndef NDEBUG
|
|
@@ -1684,6 +1719,22 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1684
1719
|
ReadOptions read_options;
|
|
1685
1720
|
const WriteOptions write_options(Env::IOPriority::IO_LOW,
|
|
1686
1721
|
Env::IOActivity::kCompaction);
|
|
1722
|
+
|
|
1723
|
+
InternalIterator* input_iter = CreateInputIterator(
|
|
1724
|
+
sub_compact, cfd, iterators, boundaries, read_options);
|
|
1725
|
+
|
|
1726
|
+
assert(input_iter);
|
|
1727
|
+
|
|
1728
|
+
Status status =
|
|
1729
|
+
MaybeResumeSubcompactionProgressOnInputIterator(sub_compact, input_iter);
|
|
1730
|
+
|
|
1731
|
+
if (status.IsNotFound()) {
|
|
1732
|
+
input_iter->SeekToFirst();
|
|
1733
|
+
} else if (!status.ok()) {
|
|
1734
|
+
sub_compact->status = status;
|
|
1735
|
+
return;
|
|
1736
|
+
}
|
|
1737
|
+
|
|
1687
1738
|
MergeHelper merge(
|
|
1688
1739
|
env_, cfd->user_comparator(), cfd->ioptions().merge_operator.get(),
|
|
1689
1740
|
compaction_filter, db_options_.info_log.get(),
|
|
@@ -1692,11 +1743,6 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1692
1743
|
compact_->compaction->level(), db_options_.stats);
|
|
1693
1744
|
BlobFileResources blob_resources;
|
|
1694
1745
|
|
|
1695
|
-
InternalIterator* input_iter = CreateInputIterator(
|
|
1696
|
-
sub_compact, cfd, iterators, boundaries, read_options);
|
|
1697
|
-
assert(input_iter);
|
|
1698
|
-
input_iter->SeekToFirst();
|
|
1699
|
-
|
|
1700
1746
|
auto c_iter =
|
|
1701
1747
|
CreateCompactionIterator(sub_compact, cfd, input_iter, compaction_filter,
|
|
1702
1748
|
merge, blob_resources, write_options);
|
|
@@ -1711,9 +1757,8 @@ void CompactionJob::ProcessKeyValueCompaction(SubcompactionState* sub_compact) {
|
|
|
1711
1757
|
auto [open_file_func, close_file_func] =
|
|
1712
1758
|
CreateFileHandlers(sub_compact, boundaries);
|
|
1713
1759
|
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
close_file_func, prev_cpu_micros);
|
|
1760
|
+
status = ProcessKeyValue(sub_compact, cfd, c_iter.get(), open_file_func,
|
|
1761
|
+
close_file_func, prev_cpu_micros);
|
|
1717
1762
|
|
|
1718
1763
|
status = FinalizeProcessKeyValueStatus(cfd, input_iter, c_iter.get(), status);
|
|
1719
1764
|
|
|
@@ -1795,9 +1840,11 @@ void CompactionJob::RecordDroppedKeys(
|
|
|
1795
1840
|
}
|
|
1796
1841
|
|
|
1797
1842
|
Status CompactionJob::FinishCompactionOutputFile(
|
|
1798
|
-
const Status& input_status,
|
|
1799
|
-
|
|
1800
|
-
const Slice
|
|
1843
|
+
const Status& input_status,
|
|
1844
|
+
const ParsedInternalKey& prev_table_last_internal_key,
|
|
1845
|
+
const Slice& next_table_min_key, const Slice* comp_start_user_key,
|
|
1846
|
+
const Slice* comp_end_user_key, const CompactionIterator* c_iter,
|
|
1847
|
+
SubcompactionState* sub_compact, CompactionOutputs& outputs) {
|
|
1801
1848
|
AutoThreadOperationStageUpdater stage_updater(
|
|
1802
1849
|
ThreadStatus::STAGE_COMPACTION_SYNC_FILE);
|
|
1803
1850
|
assert(sub_compact != nullptr);
|
|
@@ -1971,10 +2018,94 @@ Status CompactionJob::FinishCompactionOutputFile(
|
|
|
1971
2018
|
}
|
|
1972
2019
|
}
|
|
1973
2020
|
|
|
2021
|
+
if (s.ok() && ShouldUpdateSubcompactionProgress(sub_compact, c_iter,
|
|
2022
|
+
prev_table_last_internal_key,
|
|
2023
|
+
next_table_min_key, meta)) {
|
|
2024
|
+
UpdateSubcompactionProgress(c_iter, next_table_min_key, sub_compact);
|
|
2025
|
+
s = PersistSubcompactionProgress(sub_compact);
|
|
2026
|
+
}
|
|
1974
2027
|
outputs.ResetBuilder();
|
|
1975
2028
|
return s;
|
|
1976
2029
|
}
|
|
1977
2030
|
|
|
2031
|
+
bool CompactionJob::ShouldUpdateSubcompactionProgress(
|
|
2032
|
+
const SubcompactionState* sub_compact, const CompactionIterator* c_iter,
|
|
2033
|
+
const ParsedInternalKey& prev_table_last_internal_key,
|
|
2034
|
+
const Slice& next_table_min_internal_key, const FileMetaData* meta) const {
|
|
2035
|
+
const auto* cfd = sub_compact->compaction->column_family_data();
|
|
2036
|
+
// No need to update when the output will not get persisted
|
|
2037
|
+
if (compaction_progress_writer_ == nullptr) {
|
|
2038
|
+
return false;
|
|
2039
|
+
}
|
|
2040
|
+
|
|
2041
|
+
// No need to update for a new empty output
|
|
2042
|
+
if (meta == nullptr) {
|
|
2043
|
+
return false;
|
|
2044
|
+
}
|
|
2045
|
+
|
|
2046
|
+
// TODO(hx235): save progress even on the last output file
|
|
2047
|
+
if (next_table_min_internal_key.empty()) {
|
|
2048
|
+
return false;
|
|
2049
|
+
}
|
|
2050
|
+
|
|
2051
|
+
// LIMITATION: Persisting compaction progress with timestamp
|
|
2052
|
+
// is not supported since the feature of persisting timestamp of the key in
|
|
2053
|
+
// SST files itself is still experimental
|
|
2054
|
+
size_t ts_sz = cfd->user_comparator()->timestamp_size();
|
|
2055
|
+
if (ts_sz > 0) {
|
|
2056
|
+
return false;
|
|
2057
|
+
}
|
|
2058
|
+
|
|
2059
|
+
// LIMITATION: Compaction progress persistence disabled for file boundaries
|
|
2060
|
+
// contaning range deletions. Range deletions can span file boundaries, making
|
|
2061
|
+
// it difficult (but possible) to ensure adjacent output tables have different
|
|
2062
|
+
// user keys. See the last check for why different users keys of adjacent
|
|
2063
|
+
// output tables are needed
|
|
2064
|
+
const ValueType next_table_min_internal_key_type =
|
|
2065
|
+
ExtractValueType(next_table_min_internal_key);
|
|
2066
|
+
const ValueType prev_table_last_internal_key_type =
|
|
2067
|
+
prev_table_last_internal_key.user_key.empty()
|
|
2068
|
+
? ValueType::kTypeValue
|
|
2069
|
+
: prev_table_last_internal_key.type;
|
|
2070
|
+
|
|
2071
|
+
if (next_table_min_internal_key_type == ValueType::kTypeRangeDeletion ||
|
|
2072
|
+
prev_table_last_internal_key_type == ValueType::kTypeRangeDeletion) {
|
|
2073
|
+
return false;
|
|
2074
|
+
}
|
|
2075
|
+
|
|
2076
|
+
// LIMITATION: Compaction progress persistence disabled when adjacent output
|
|
2077
|
+
// tables share the same user key at boundaries. This ensures a simple Seek()
|
|
2078
|
+
// of the next key when resuming can process all versions of a user key
|
|
2079
|
+
const Slice next_table_min_user_key =
|
|
2080
|
+
ExtractUserKey(next_table_min_internal_key);
|
|
2081
|
+
const Slice prev_table_last_user_key =
|
|
2082
|
+
prev_table_last_internal_key.user_key.empty()
|
|
2083
|
+
? Slice()
|
|
2084
|
+
: prev_table_last_internal_key.user_key;
|
|
2085
|
+
|
|
2086
|
+
if (cfd->user_comparator()->EqualWithoutTimestamp(next_table_min_user_key,
|
|
2087
|
+
prev_table_last_user_key)) {
|
|
2088
|
+
return false;
|
|
2089
|
+
}
|
|
2090
|
+
|
|
2091
|
+
// LIMITATION: Don't save progress if the current key has already been scanned
|
|
2092
|
+
// (looked ahead) in the input but not yet output. This can happen with merge
|
|
2093
|
+
// operations, single deletes, and deletes at the bottommost level where
|
|
2094
|
+
// CompactionIterator needs to look ahead to process multiple entries for the
|
|
2095
|
+
// same user key before outputting a result. If we saved progress and resumed
|
|
2096
|
+
// at this boundary, the resumed session would see and process the same input
|
|
2097
|
+
// key again through Seek(), leading to incorrect double-counting in
|
|
2098
|
+
// number of processed input entries and input count verification failure
|
|
2099
|
+
//
|
|
2100
|
+
// TODO(hx235): Offset num_processed_input_records to avoid double counting
|
|
2101
|
+
// instead of disabling progress persistence.
|
|
2102
|
+
if (c_iter->IsCurrentKeyAlreadyScanned()) {
|
|
2103
|
+
return false;
|
|
2104
|
+
}
|
|
2105
|
+
|
|
2106
|
+
return true;
|
|
2107
|
+
}
|
|
2108
|
+
|
|
1978
2109
|
Status CompactionJob::InstallCompactionResults(bool* compaction_released) {
|
|
1979
2110
|
assert(compact_);
|
|
1980
2111
|
|
|
@@ -2120,15 +2251,8 @@ Status CompactionJob::OpenCompactionOutputFile(SubcompactionState* sub_compact,
|
|
|
2120
2251
|
|
|
2121
2252
|
// Pass temperature of the last level files to FileSystem.
|
|
2122
2253
|
FileOptions fo_copy = file_options_;
|
|
2123
|
-
|
|
2124
|
-
|
|
2125
|
-
sub_compact->compaction->mutable_cf_options().last_level_temperature;
|
|
2126
|
-
// Here last_level_temperature supersedes default_write_temperature, when
|
|
2127
|
-
// enabled and applicable
|
|
2128
|
-
if (last_level_temp != Temperature::kUnknown &&
|
|
2129
|
-
sub_compact->compaction->is_last_level() && !outputs.IsProximalLevel()) {
|
|
2130
|
-
temperature = last_level_temp;
|
|
2131
|
-
}
|
|
2254
|
+
auto temperature =
|
|
2255
|
+
sub_compact->compaction->GetOutputTemperature(outputs.IsProximalLevel());
|
|
2132
2256
|
fo_copy.temperature = temperature;
|
|
2133
2257
|
fo_copy.write_hint = write_hint_;
|
|
2134
2258
|
|
|
@@ -2404,6 +2528,7 @@ void CompactionJob::UpdateCompactionJobInputStatsFromInternalStats(
|
|
|
2404
2528
|
}
|
|
2405
2529
|
|
|
2406
2530
|
void CompactionJob::UpdateCompactionJobOutputStatsFromInternalStats(
|
|
2531
|
+
const Status& status,
|
|
2407
2532
|
const InternalStats::CompactionStatsFull& internal_stats) const {
|
|
2408
2533
|
assert(job_stats_);
|
|
2409
2534
|
job_stats_->elapsed_micros = internal_stats.output_level_stats.micros;
|
|
@@ -2434,7 +2559,7 @@ void CompactionJob::UpdateCompactionJobOutputStatsFromInternalStats(
|
|
|
2434
2559
|
internal_stats.proximal_level_stats.num_output_files_blob;
|
|
2435
2560
|
}
|
|
2436
2561
|
|
|
2437
|
-
if (job_stats_->num_output_files > 0) {
|
|
2562
|
+
if (status.ok() && job_stats_->num_output_files > 0) {
|
|
2438
2563
|
CopyPrefix(compact_->SmallestUserKey(),
|
|
2439
2564
|
CompactionJobStats::kMaxPrefixLength,
|
|
2440
2565
|
&job_stats_->smallest_output_key_prefix);
|
|
@@ -2515,6 +2640,344 @@ Env::IOPriority CompactionJob::GetRateLimiterPriority() {
|
|
|
2515
2640
|
return Env::IO_LOW;
|
|
2516
2641
|
}
|
|
2517
2642
|
|
|
2643
|
+
Status CompactionJob::ReadTablePropertiesDirectly(
|
|
2644
|
+
const ImmutableOptions& ioptions, const MutableCFOptions& moptions,
|
|
2645
|
+
const FileMetaData* file_meta, const ReadOptions& read_options,
|
|
2646
|
+
std::shared_ptr<const TableProperties>* tp) {
|
|
2647
|
+
std::unique_ptr<FSRandomAccessFile> file;
|
|
2648
|
+
std::string file_name = GetTableFileName(file_meta->fd.GetNumber());
|
|
2649
|
+
Status s = ioptions.fs->NewRandomAccessFile(file_name, file_options_, &file,
|
|
2650
|
+
nullptr /* dbg */);
|
|
2651
|
+
if (!s.ok()) {
|
|
2652
|
+
return s;
|
|
2653
|
+
}
|
|
2654
|
+
|
|
2655
|
+
std::unique_ptr<RandomAccessFileReader> file_reader(
|
|
2656
|
+
new RandomAccessFileReader(
|
|
2657
|
+
std::move(file), file_name, ioptions.clock, io_tracer_,
|
|
2658
|
+
ioptions.stats, Histograms::SST_READ_MICROS /* hist_type */,
|
|
2659
|
+
nullptr /* file_read_hist */, ioptions.rate_limiter.get(),
|
|
2660
|
+
ioptions.listeners));
|
|
2661
|
+
|
|
2662
|
+
std::unique_ptr<TableProperties> props;
|
|
2663
|
+
|
|
2664
|
+
uint64_t magic_number = kBlockBasedTableMagicNumber;
|
|
2665
|
+
|
|
2666
|
+
const auto* table_factory = moptions.table_factory.get();
|
|
2667
|
+
if (table_factory == nullptr) {
|
|
2668
|
+
return Status::Incomplete("Table factory is not set");
|
|
2669
|
+
} else {
|
|
2670
|
+
const auto& table_factory_name = table_factory->Name();
|
|
2671
|
+
if (table_factory_name == TableFactory::kPlainTableName()) {
|
|
2672
|
+
magic_number = kPlainTableMagicNumber;
|
|
2673
|
+
} else if (table_factory_name == TableFactory::kCuckooTableName()) {
|
|
2674
|
+
magic_number = kCuckooTableMagicNumber;
|
|
2675
|
+
}
|
|
2676
|
+
}
|
|
2677
|
+
|
|
2678
|
+
s = ReadTableProperties(file_reader.get(), file_meta->fd.GetFileSize(),
|
|
2679
|
+
magic_number, ioptions, read_options, &props);
|
|
2680
|
+
if (!s.ok()) {
|
|
2681
|
+
return s;
|
|
2682
|
+
}
|
|
2683
|
+
|
|
2684
|
+
*tp = std::move(props);
|
|
2685
|
+
return s;
|
|
2686
|
+
}
|
|
2687
|
+
|
|
2688
|
+
Status CompactionJob::ReadOutputFilesTableProperties(
|
|
2689
|
+
const autovector<FileMetaData>& output_files,
|
|
2690
|
+
const ReadOptions& read_options,
|
|
2691
|
+
std::vector<std::shared_ptr<const TableProperties>>&
|
|
2692
|
+
output_files_table_properties,
|
|
2693
|
+
bool is_proximal_level) {
|
|
2694
|
+
assert(!output_files.empty());
|
|
2695
|
+
|
|
2696
|
+
static const char* level_type =
|
|
2697
|
+
is_proximal_level ? "proximal output" : "output";
|
|
2698
|
+
|
|
2699
|
+
output_files_table_properties.reserve(output_files.size());
|
|
2700
|
+
|
|
2701
|
+
Status s;
|
|
2702
|
+
|
|
2703
|
+
for (const FileMetaData& metadata : output_files) {
|
|
2704
|
+
std::shared_ptr<const TableProperties> tp;
|
|
2705
|
+
s = ReadTablePropertiesDirectly(compact_->compaction->immutable_options(),
|
|
2706
|
+
compact_->compaction->mutable_cf_options(),
|
|
2707
|
+
&metadata, read_options, &tp);
|
|
2708
|
+
if (!s.ok()) {
|
|
2709
|
+
ROCKS_LOG_ERROR(
|
|
2710
|
+
db_options_.info_log,
|
|
2711
|
+
"Failed to read table properties for %s level output file #%" PRIu64
|
|
2712
|
+
": %s",
|
|
2713
|
+
level_type, metadata.fd.GetNumber(), s.ToString().c_str());
|
|
2714
|
+
return s;
|
|
2715
|
+
}
|
|
2716
|
+
|
|
2717
|
+
if (tp == nullptr) {
|
|
2718
|
+
ROCKS_LOG_ERROR(db_options_.info_log,
|
|
2719
|
+
"Empty table property for %s level output file #%" PRIu64
|
|
2720
|
+
"",
|
|
2721
|
+
level_type, metadata.fd.GetNumber());
|
|
2722
|
+
|
|
2723
|
+
s = Status::Corruption("Empty table property for " +
|
|
2724
|
+
std::string(level_type) +
|
|
2725
|
+
" level output files during resuming");
|
|
2726
|
+
return s;
|
|
2727
|
+
}
|
|
2728
|
+
output_files_table_properties.push_back(tp);
|
|
2729
|
+
}
|
|
2730
|
+
return s;
|
|
2731
|
+
}
|
|
2732
|
+
|
|
2733
|
+
void CompactionJob::RestoreCompactionOutputs(
|
|
2734
|
+
const ColumnFamilyData* cfd,
|
|
2735
|
+
const std::vector<std::shared_ptr<const TableProperties>>&
|
|
2736
|
+
output_files_table_properties,
|
|
2737
|
+
SubcompactionProgressPerLevel& subcompaction_progress_per_level,
|
|
2738
|
+
CompactionOutputs* outputs_to_restore) {
|
|
2739
|
+
assert(outputs_to_restore->GetOutputs().size() == 0);
|
|
2740
|
+
|
|
2741
|
+
const auto& output_files = subcompaction_progress_per_level.GetOutputFiles();
|
|
2742
|
+
|
|
2743
|
+
for (size_t i = 0; i < output_files.size(); i++) {
|
|
2744
|
+
FileMetaData file_copy = output_files[i];
|
|
2745
|
+
|
|
2746
|
+
outputs_to_restore->AddOutput(std::move(file_copy),
|
|
2747
|
+
cfd->internal_comparator(),
|
|
2748
|
+
paranoid_file_checks_, true /* finished */);
|
|
2749
|
+
|
|
2750
|
+
outputs_to_restore->UpdateTableProperties(
|
|
2751
|
+
*output_files_table_properties[i]);
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
outputs_to_restore->SetNumOutputRecords(
|
|
2755
|
+
subcompaction_progress_per_level.GetNumProcessedOutputRecords());
|
|
2756
|
+
}
|
|
2757
|
+
|
|
2758
|
+
// Attempt to resume compaction from a previously persisted compaction progress.
|
|
2759
|
+
//
|
|
2760
|
+
// RETURNS:
|
|
2761
|
+
// - Status::OK():
|
|
2762
|
+
// * Input iterator positioned at next unprocessed key
|
|
2763
|
+
// * CompactionOutputs objects fully restored for both output and proximal
|
|
2764
|
+
// output levels in SubcompactionState
|
|
2765
|
+
// * Compaction job statistics accurately reflect input and output records
|
|
2766
|
+
// processed for record count verification
|
|
2767
|
+
// * File number generation advanced to prevent conflicts with existing outputs
|
|
2768
|
+
// - Status::NotFound(): No valid progress to resume from
|
|
2769
|
+
// - Status::Corruption(): Resume key is invalid, beyond input range, or output
|
|
2770
|
+
// restoration failed
|
|
2771
|
+
// - Other non-OK status: Iterator errors or file system issues during
|
|
2772
|
+
// restoration
|
|
2773
|
+
//
|
|
2774
|
+
// The caller must check for Status::IsIncomplete() to distinguish between
|
|
2775
|
+
// "no resume needed" (proceed with `InternalIterator::SeekToFirst()`) vs
|
|
2776
|
+
// "resume failed" scenarios.
|
|
2777
|
+
Status CompactionJob::MaybeResumeSubcompactionProgressOnInputIterator(
|
|
2778
|
+
SubcompactionState* sub_compact, InternalIterator* input_iter) {
|
|
2779
|
+
const ReadOptions read_options(Env::IOActivity::kCompaction);
|
|
2780
|
+
ColumnFamilyData* cfd = sub_compact->compaction->column_family_data();
|
|
2781
|
+
SubcompactionProgress& subcompaction_progress =
|
|
2782
|
+
sub_compact->GetSubcompactionProgressRef();
|
|
2783
|
+
|
|
2784
|
+
if (subcompaction_progress.output_level_progress
|
|
2785
|
+
.GetNumProcessedOutputRecords() == 0 &&
|
|
2786
|
+
subcompaction_progress.proximal_output_level_progress
|
|
2787
|
+
.GetNumProcessedOutputRecords() == 0) {
|
|
2788
|
+
return Status::NotFound("No subcompaction progress to resume");
|
|
2789
|
+
}
|
|
2790
|
+
|
|
2791
|
+
ROCKS_LOG_INFO(db_options_.info_log, "[%s] [JOB %d] Resuming compaction : %s",
|
|
2792
|
+
cfd->GetName().c_str(), job_id_,
|
|
2793
|
+
subcompaction_progress.ToString().c_str());
|
|
2794
|
+
|
|
2795
|
+
input_iter->Seek(subcompaction_progress.next_internal_key_to_compact);
|
|
2796
|
+
|
|
2797
|
+
if (!input_iter->Valid()) {
|
|
2798
|
+
ROCKS_LOG_ERROR(db_options_.info_log,
|
|
2799
|
+
"[%s] [JOB %d] Iterator is invalid after "
|
|
2800
|
+
"seeking to the key to resume. This indicates the key is "
|
|
2801
|
+
"incorrectly beyond the input data range.",
|
|
2802
|
+
cfd->GetName().c_str(), job_id_);
|
|
2803
|
+
return Status::Corruption(
|
|
2804
|
+
"The key to resume is beyond the input data range");
|
|
2805
|
+
} else if (!input_iter->status().ok()) {
|
|
2806
|
+
ROCKS_LOG_ERROR(db_options_.info_log,
|
|
2807
|
+
"[%s] [JOB %d] Iterator has error after seeking to "
|
|
2808
|
+
"the key to resume: %s",
|
|
2809
|
+
cfd->GetName().c_str(), job_id_,
|
|
2810
|
+
input_iter->status().ToString().c_str());
|
|
2811
|
+
return Status::Corruption(
|
|
2812
|
+
"Iterator has error status after seeking to the key: " +
|
|
2813
|
+
input_iter->status().ToString());
|
|
2814
|
+
}
|
|
2815
|
+
|
|
2816
|
+
sub_compact->compaction_job_stats.has_accurate_num_input_records =
|
|
2817
|
+
subcompaction_progress.num_processed_input_records != 0;
|
|
2818
|
+
|
|
2819
|
+
sub_compact->compaction_job_stats.num_input_records =
|
|
2820
|
+
subcompaction_progress.num_processed_input_records;
|
|
2821
|
+
|
|
2822
|
+
for (const bool& is_proximal_level : {false, true}) {
|
|
2823
|
+
if (is_proximal_level &&
|
|
2824
|
+
!sub_compact->compaction->SupportsPerKeyPlacement()) {
|
|
2825
|
+
continue;
|
|
2826
|
+
}
|
|
2827
|
+
|
|
2828
|
+
Status s;
|
|
2829
|
+
SubcompactionProgressPerLevel& subcompaction_progress_per_level =
|
|
2830
|
+
is_proximal_level
|
|
2831
|
+
? subcompaction_progress.proximal_output_level_progress
|
|
2832
|
+
: subcompaction_progress.output_level_progress;
|
|
2833
|
+
|
|
2834
|
+
const auto& output_files =
|
|
2835
|
+
subcompaction_progress_per_level.GetOutputFiles();
|
|
2836
|
+
|
|
2837
|
+
std::vector<std::shared_ptr<const TableProperties>>
|
|
2838
|
+
output_files_table_properties;
|
|
2839
|
+
|
|
2840
|
+
// TODO(hx235): investigate if we can skip reading properties to save read
|
|
2841
|
+
// IO
|
|
2842
|
+
s = ReadOutputFilesTableProperties(output_files, read_options,
|
|
2843
|
+
output_files_table_properties);
|
|
2844
|
+
if (!s.ok()) {
|
|
2845
|
+
ROCKS_LOG_ERROR(
|
|
2846
|
+
db_options_.info_log,
|
|
2847
|
+
"[%s] [JOB %d] Failed to read table properties for %s output level"
|
|
2848
|
+
"files "
|
|
2849
|
+
"during resume: %s.",
|
|
2850
|
+
cfd->GetName().c_str(), job_id_, is_proximal_level ? "proximal" : "",
|
|
2851
|
+
s.ToString().c_str());
|
|
2852
|
+
return Status::Corruption(
|
|
2853
|
+
"Not able to resume due to table property reading error " +
|
|
2854
|
+
s.ToString());
|
|
2855
|
+
}
|
|
2856
|
+
|
|
2857
|
+
RestoreCompactionOutputs(cfd, output_files_table_properties,
|
|
2858
|
+
subcompaction_progress_per_level,
|
|
2859
|
+
sub_compact->Outputs(is_proximal_level));
|
|
2860
|
+
|
|
2861
|
+
// Skip past all the used file numbers to avoid creating new output files
|
|
2862
|
+
// after resumption that conflict with the existing output files
|
|
2863
|
+
for (const auto& file_meta : output_files) {
|
|
2864
|
+
uint64_t file_number = file_meta.fd.GetNumber();
|
|
2865
|
+
while (versions_->NewFileNumber() <= file_number) {
|
|
2866
|
+
versions_->FetchAddFileNumber(1);
|
|
2867
|
+
}
|
|
2868
|
+
}
|
|
2869
|
+
}
|
|
2870
|
+
|
|
2871
|
+
return Status::OK();
|
|
2872
|
+
}
|
|
2873
|
+
|
|
2874
|
+
void CompactionJob::UpdateSubcompactionProgress(
|
|
2875
|
+
const CompactionIterator* c_iter, const Slice next_table_min_key,
|
|
2876
|
+
SubcompactionState* sub_compact) {
|
|
2877
|
+
assert(c_iter);
|
|
2878
|
+
SubcompactionProgress& subcompaction_progress =
|
|
2879
|
+
sub_compact->GetSubcompactionProgressRef();
|
|
2880
|
+
|
|
2881
|
+
IterKey next_ikey_to_compact;
|
|
2882
|
+
next_ikey_to_compact.SetInternalKey(ExtractUserKey(next_table_min_key),
|
|
2883
|
+
kMaxSequenceNumber, kValueTypeForSeek);
|
|
2884
|
+
subcompaction_progress.next_internal_key_to_compact =
|
|
2885
|
+
next_ikey_to_compact.GetInternalKey().ToString();
|
|
2886
|
+
|
|
2887
|
+
// Track total processed input records for progress reporting by combining:
|
|
2888
|
+
// - Resumed count: records already processed before compaction was
|
|
2889
|
+
// interrupted
|
|
2890
|
+
// - Current count: records scanned in the current compaction session
|
|
2891
|
+
// Only update when both tracking mechanisms provide accurate counts to ensure
|
|
2892
|
+
// reliability.
|
|
2893
|
+
subcompaction_progress.num_processed_input_records =
|
|
2894
|
+
c_iter->HasNumInputEntryScanned() &&
|
|
2895
|
+
sub_compact->compaction_job_stats.has_accurate_num_input_records
|
|
2896
|
+
? c_iter->NumInputEntryScanned() +
|
|
2897
|
+
sub_compact->compaction_job_stats.num_input_records
|
|
2898
|
+
: 0;
|
|
2899
|
+
|
|
2900
|
+
UpdateSubcompactionProgressPerLevel(
|
|
2901
|
+
sub_compact, false /* is_proximal_level */, subcompaction_progress);
|
|
2902
|
+
|
|
2903
|
+
if (sub_compact->compaction->SupportsPerKeyPlacement()) {
|
|
2904
|
+
UpdateSubcompactionProgressPerLevel(
|
|
2905
|
+
sub_compact, true /* is_proximal_level */, subcompaction_progress);
|
|
2906
|
+
}
|
|
2907
|
+
}
|
|
2908
|
+
|
|
2909
|
+
void CompactionJob::UpdateSubcompactionProgressPerLevel(
|
|
2910
|
+
SubcompactionState* sub_compact, bool is_proximal_level,
|
|
2911
|
+
SubcompactionProgress& subcompaction_progress) {
|
|
2912
|
+
SubcompactionProgressPerLevel& subcompaction_progress_per_level =
|
|
2913
|
+
is_proximal_level ? subcompaction_progress.proximal_output_level_progress
|
|
2914
|
+
: subcompaction_progress.output_level_progress;
|
|
2915
|
+
|
|
2916
|
+
subcompaction_progress_per_level.SetNumProcessedOutputRecords(
|
|
2917
|
+
sub_compact->OutputStats(is_proximal_level)->num_output_records);
|
|
2918
|
+
|
|
2919
|
+
const auto& prev_output_files =
|
|
2920
|
+
subcompaction_progress_per_level.GetOutputFiles();
|
|
2921
|
+
|
|
2922
|
+
const auto& current_output_files =
|
|
2923
|
+
sub_compact->Outputs(is_proximal_level)->GetOutputs();
|
|
2924
|
+
|
|
2925
|
+
for (size_t i = prev_output_files.size(); i < current_output_files.size();
|
|
2926
|
+
i++) {
|
|
2927
|
+
subcompaction_progress_per_level.AddToOutputFiles(
|
|
2928
|
+
current_output_files[i].meta);
|
|
2929
|
+
}
|
|
2930
|
+
}
|
|
2931
|
+
|
|
2932
|
+
Status CompactionJob::PersistSubcompactionProgress(
|
|
2933
|
+
SubcompactionState* sub_compact) {
|
|
2934
|
+
SubcompactionProgress& subcompaction_progress =
|
|
2935
|
+
sub_compact->GetSubcompactionProgressRef();
|
|
2936
|
+
|
|
2937
|
+
assert(compaction_progress_writer_);
|
|
2938
|
+
|
|
2939
|
+
VersionEdit edit;
|
|
2940
|
+
edit.SetSubcompactionProgress(subcompaction_progress);
|
|
2941
|
+
|
|
2942
|
+
std::string record;
|
|
2943
|
+
if (!edit.EncodeTo(&record)) {
|
|
2944
|
+
ROCKS_LOG_ERROR(
|
|
2945
|
+
db_options_.info_log,
|
|
2946
|
+
"[%s] [JOB %d] Failed to encode subcompaction "
|
|
2947
|
+
"progress",
|
|
2948
|
+
compact_->compaction->column_family_data()->GetName().c_str(), job_id_);
|
|
2949
|
+
return Status::Corruption("Failed to encode subcompaction progress");
|
|
2950
|
+
}
|
|
2951
|
+
|
|
2952
|
+
WriteOptions write_options(Env::IOActivity::kCompaction);
|
|
2953
|
+
Status s = compaction_progress_writer_->AddRecord(write_options, record);
|
|
2954
|
+
IOOptions opts;
|
|
2955
|
+
if (s.ok()) {
|
|
2956
|
+
s = WritableFileWriter::PrepareIOOptions(write_options, opts);
|
|
2957
|
+
}
|
|
2958
|
+
if (s.ok()) {
|
|
2959
|
+
s = compaction_progress_writer_->file()->Sync(opts, db_options_.use_fsync);
|
|
2960
|
+
}
|
|
2961
|
+
|
|
2962
|
+
if (!s.ok()) {
|
|
2963
|
+
ROCKS_LOG_ERROR(
|
|
2964
|
+
db_options_.info_log,
|
|
2965
|
+
"[%s] [JOB %d] Failed to persist subcompaction "
|
|
2966
|
+
"progress: %s",
|
|
2967
|
+
compact_->compaction->column_family_data()->GetName().c_str(), job_id_,
|
|
2968
|
+
s.ToString().c_str());
|
|
2969
|
+
return s;
|
|
2970
|
+
}
|
|
2971
|
+
|
|
2972
|
+
subcompaction_progress.output_level_progress
|
|
2973
|
+
.UpdateLastPersistedOutputFilesCount();
|
|
2974
|
+
|
|
2975
|
+
subcompaction_progress.proximal_output_level_progress
|
|
2976
|
+
.UpdateLastPersistedOutputFilesCount();
|
|
2977
|
+
|
|
2978
|
+
return Status::OK();
|
|
2979
|
+
}
|
|
2980
|
+
|
|
2518
2981
|
Status CompactionJob::VerifyInputRecordCount(
|
|
2519
2982
|
uint64_t num_input_range_del) const {
|
|
2520
2983
|
size_t ts_sz = compact_->compaction->column_family_data()
|
|
@@ -2585,5 +3048,4 @@ Status CompactionJob::VerifyOutputRecordCount() const {
|
|
|
2585
3048
|
}
|
|
2586
3049
|
return Status::OK();
|
|
2587
3050
|
}
|
|
2588
|
-
|
|
2589
3051
|
} // namespace ROCKSDB_NAMESPACE
|