@nxtedition/rocksdb 8.0.0 → 8.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. package/BUILDING.md +2 -2
  2. package/binding.cc +2 -7
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -9
  4. package/deps/rocksdb/rocksdb/Makefile +2 -2
  5. package/deps/rocksdb/rocksdb/TARGETS +4 -2
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +0 -5
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +8 -29
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +146 -0
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +13 -1
  10. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +57 -146
  11. package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +32 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +11 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.cc +11 -9
  14. package/deps/rocksdb/rocksdb/db/column_family.h +20 -0
  15. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -33
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +27 -8
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -1
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +4 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -6
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +65 -7
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +5 -0
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -32
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +28 -47
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +28 -22
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -14
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -8
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +5 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +170 -140
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +5 -1
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +5 -4
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -2
  35. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  36. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +266 -138
  37. package/deps/rocksdb/rocksdb/db/corruption_test.cc +86 -1
  38. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +72 -5
  39. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +119 -10
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +585 -264
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +46 -18
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +5 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +6 -15
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +8 -8
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +10 -0
  49. package/deps/rocksdb/rocksdb/db/db_iter.cc +57 -36
  50. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -1
  51. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +250 -2
  52. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -0
  53. package/deps/rocksdb/rocksdb/db/db_test2.cc +307 -8
  54. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +129 -0
  55. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +21 -0
  56. package/deps/rocksdb/rocksdb/db/dbformat.cc +25 -0
  57. package/deps/rocksdb/rocksdb/db/dbformat.h +2 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +5 -2
  60. package/deps/rocksdb/rocksdb/db/flush_job.cc +5 -2
  61. package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
  62. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +56 -53
  63. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/memtable.cc +55 -9
  65. package/deps/rocksdb/rocksdb/db/merge_helper.cc +76 -102
  66. package/deps/rocksdb/rocksdb/db/merge_helper.h +2 -11
  67. package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +10 -10
  68. package/deps/rocksdb/rocksdb/db/repair.cc +64 -22
  69. package/deps/rocksdb/rocksdb/db/repair_test.cc +54 -0
  70. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +26 -26
  71. package/deps/rocksdb/rocksdb/db/table_cache.cc +2 -0
  72. package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -1
  73. package/deps/rocksdb/rocksdb/db/version_builder.cc +90 -43
  74. package/deps/rocksdb/rocksdb/db/version_builder.h +20 -0
  75. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +190 -67
  76. package/deps/rocksdb/rocksdb/db/version_edit.cc +15 -1
  77. package/deps/rocksdb/rocksdb/db/version_edit.h +16 -4
  78. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +41 -11
  79. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +27 -12
  80. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +18 -16
  81. package/deps/rocksdb/rocksdb/db/version_set.cc +219 -38
  82. package/deps/rocksdb/rocksdb/db/version_set.h +34 -4
  83. package/deps/rocksdb/rocksdb/db/version_set_test.cc +45 -25
  84. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +122 -61
  85. package/deps/rocksdb/rocksdb/db/write_thread.cc +5 -2
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +0 -1
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +0 -4
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +12 -17
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +6 -4
  90. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -1
  91. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +1 -0
  92. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +0 -48
  93. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
  94. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +196 -171
  95. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +9 -3
  97. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +25 -18
  98. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +27 -5
  99. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  100. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +3 -0
  101. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +3 -0
  102. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  103. package/deps/rocksdb/rocksdb/logging/logging.h +13 -19
  104. package/deps/rocksdb/rocksdb/memory/arena.cc +4 -3
  105. package/deps/rocksdb/rocksdb/memory/arena_test.cc +30 -0
  106. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
  107. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  108. package/deps/rocksdb/rocksdb/src.mk +2 -1
  109. package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -2
  110. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -10
  111. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -29
  112. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +1 -1
  113. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -39
  114. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +0 -1
  115. package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -3
  116. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +142 -0
  117. package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +241 -0
  118. package/deps/rocksdb/rocksdb/table/format.cc +24 -20
  119. package/deps/rocksdb/rocksdb/table/format.h +5 -2
  120. package/deps/rocksdb/rocksdb/table/get_context.cc +52 -11
  121. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +97 -115
  122. package/deps/rocksdb/rocksdb/table/merging_iterator.h +82 -1
  123. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +2 -2
  124. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
  125. package/deps/rocksdb/rocksdb/table/table_test.cc +7 -6
  126. package/deps/rocksdb/rocksdb/test_util/testutil.h +10 -0
  127. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +0 -6
  128. package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +2 -2
  129. package/deps/rocksdb/rocksdb/util/bloom_test.cc +1 -1
  130. package/deps/rocksdb/rocksdb/util/crc32c.cc +1 -1
  131. package/deps/rocksdb/rocksdb/util/status.cc +7 -0
  132. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +5 -0
  133. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -0
  134. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -67
  135. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -3
  136. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -0
  137. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +59 -0
  138. package/deps/rocksdb/rocksdb.gyp +2 -1
  139. package/package.json +1 -1
  140. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  141. package/prebuilds/linux-x64/node.napi.node +0 -0
  142. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +0 -580
  143. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +0 -476
  144. package/max_rev_operator.h +0 -100
@@ -1059,16 +1059,31 @@ void DBImpl::DumpStats() {
1059
1059
  return;
1060
1060
  }
1061
1061
 
1062
+ // Also probe block cache(s) for problems, dump to info log
1063
+ UnorderedSet<Cache*> probed_caches;
1062
1064
  TEST_SYNC_POINT("DBImpl::DumpStats:StartRunning");
1063
1065
  {
1064
1066
  InstrumentedMutexLock l(&mutex_);
1065
1067
  for (auto cfd : versions_->GetRefedColumnFamilySet()) {
1066
- if (cfd->initialized()) {
1067
- // Release DB mutex for gathering cache entry stats. Pass over all
1068
- // column families for this first so that other stats are dumped
1069
- // near-atomically.
1070
- InstrumentedMutexUnlock u(&mutex_);
1071
- cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false);
1068
+ if (!cfd->initialized()) {
1069
+ continue;
1070
+ }
1071
+
1072
+ // Release DB mutex for gathering cache entry stats. Pass over all
1073
+ // column families for this first so that other stats are dumped
1074
+ // near-atomically.
1075
+ InstrumentedMutexUnlock u(&mutex_);
1076
+ cfd->internal_stats()->CollectCacheEntryStats(/*foreground=*/false);
1077
+
1078
+ // Probe block cache for problems (if not already via another CF)
1079
+ if (immutable_db_options_.info_log) {
1080
+ auto* table_factory = cfd->ioptions()->table_factory.get();
1081
+ assert(table_factory != nullptr);
1082
+ Cache* cache =
1083
+ table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
1084
+ if (cache && probed_caches.insert(cache).second) {
1085
+ cache->ReportProblems(immutable_db_options_.info_log);
1086
+ }
1072
1087
  }
1073
1088
  }
1074
1089
 
@@ -1555,21 +1570,31 @@ Status DBImpl::ApplyWALToManifest(VersionEdit* synced_wals) {
1555
1570
  }
1556
1571
 
1557
1572
  Status DBImpl::LockWAL() {
1558
- log_write_mutex_.Lock();
1559
- auto cur_log_writer = logs_.back().writer;
1560
- IOStatus status = cur_log_writer->WriteBuffer();
1561
- if (!status.ok()) {
1562
- ROCKS_LOG_ERROR(immutable_db_options_.info_log, "WAL flush error %s",
1563
- status.ToString().c_str());
1564
- // In case there is a fs error we should set it globally to prevent the
1565
- // future writes
1566
- WriteStatusCheck(status);
1573
+ {
1574
+ InstrumentedMutexLock lock(&mutex_);
1575
+ WriteThread::Writer w;
1576
+ write_thread_.EnterUnbatched(&w, &mutex_);
1577
+ WriteThread::Writer nonmem_w;
1578
+ if (two_write_queues_) {
1579
+ nonmem_write_thread_.EnterUnbatched(&nonmem_w, &mutex_);
1580
+ }
1581
+
1582
+ lock_wal_write_token_ = write_controller_.GetStopToken();
1583
+
1584
+ if (two_write_queues_) {
1585
+ nonmem_write_thread_.ExitUnbatched(&nonmem_w);
1586
+ }
1587
+ write_thread_.ExitUnbatched(&w);
1567
1588
  }
1568
- return static_cast<Status>(status);
1589
+ return FlushWAL(/*sync=*/false);
1569
1590
  }
1570
1591
 
1571
1592
  Status DBImpl::UnlockWAL() {
1572
- log_write_mutex_.Unlock();
1593
+ {
1594
+ InstrumentedMutexLock lock(&mutex_);
1595
+ lock_wal_write_token_.reset();
1596
+ }
1597
+ bg_cv_.SignalAll();
1573
1598
  return Status::OK();
1574
1599
  }
1575
1600
 
@@ -1808,7 +1833,8 @@ InternalIterator* DBImpl::NewInternalIterator(
1808
1833
  MergeIteratorBuilder merge_iter_builder(
1809
1834
  &cfd->internal_comparator(), arena,
1810
1835
  !read_options.total_order_seek &&
1811
- super_version->mutable_cf_options.prefix_extractor != nullptr);
1836
+ super_version->mutable_cf_options.prefix_extractor != nullptr,
1837
+ read_options.iterate_upper_bound);
1812
1838
  // Collect iterator for mutable memtable
1813
1839
  auto mem_iter = super_version->mem->NewIterator(read_options, arena);
1814
1840
  Status s;
@@ -5302,6 +5328,7 @@ Status DBImpl::IngestExternalFiles(
5302
5328
  // Run ingestion jobs.
5303
5329
  if (status.ok()) {
5304
5330
  for (size_t i = 0; i != num_cfs; ++i) {
5331
+ mutex_.AssertHeld();
5305
5332
  status = ingestion_jobs[i].Run();
5306
5333
  if (!status.ok()) {
5307
5334
  break;
@@ -5506,6 +5533,7 @@ Status DBImpl::CreateColumnFamilyWithImport(
5506
5533
 
5507
5534
  num_running_ingest_file_++;
5508
5535
  assert(!cfd->IsDropped());
5536
+ mutex_.AssertHeld();
5509
5537
  status = import_job.Run();
5510
5538
 
5511
5539
  // Install job edit [Mutex will be unlocked here]
@@ -1161,7 +1161,7 @@ class DBImpl : public DB {
1161
1161
  int TEST_BGCompactionsAllowed() const;
1162
1162
  int TEST_BGFlushesAllowed() const;
1163
1163
  size_t TEST_GetWalPreallocateBlockSize(uint64_t write_buffer_size) const;
1164
- void TEST_WaitForPeridicTaskRun(std::function<void()> callback) const;
1164
+ void TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const;
1165
1165
  SeqnoToTimeMapping TEST_GetSeqnoToTimeMapping() const;
1166
1166
  size_t TEST_EstimateInMemoryStatsHistorySize() const;
1167
1167
 
@@ -2680,6 +2680,10 @@ class DBImpl : public DB {
2680
2680
  // seqno_time_mapping_ stores the sequence number to time mapping, it's not
2681
2681
  // thread safe, both read and write need db mutex hold.
2682
2682
  SeqnoToTimeMapping seqno_time_mapping_;
2683
+
2684
+ // stop write token that is acquired when LockWal() is called. Destructed
2685
+ // when UnlockWal() is called.
2686
+ std::unique_ptr<WriteControllerToken> lock_wal_write_token_;
2683
2687
  };
2684
2688
 
2685
2689
  class GetWithTimestampReadCallback : public ReadCallback {
@@ -1344,18 +1344,8 @@ Status DBImpl::CompactFilesImpl(
1344
1344
  }
1345
1345
  }
1346
1346
 
1347
- SequenceNumber earliest_mem_seqno = kMaxSequenceNumber;
1348
- if (cfd->mem() != nullptr) {
1349
- earliest_mem_seqno =
1350
- std::min(cfd->mem()->GetEarliestSequenceNumber(), earliest_mem_seqno);
1351
- }
1352
- if (cfd->imm() != nullptr && cfd->imm()->current() != nullptr) {
1353
- earliest_mem_seqno =
1354
- std::min(cfd->imm()->current()->GetEarliestSequenceNumber(false),
1355
- earliest_mem_seqno);
1356
- }
1357
1347
  Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
1358
- &input_set, cf_meta, output_level, earliest_mem_seqno);
1348
+ &input_set, cf_meta, output_level);
1359
1349
  if (!s.ok()) {
1360
1350
  return s;
1361
1351
  }
@@ -1697,14 +1687,15 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
1697
1687
 
1698
1688
  VersionEdit edit;
1699
1689
  edit.SetColumnFamily(cfd->GetID());
1690
+
1700
1691
  for (const auto& f : vstorage->LevelFiles(level)) {
1701
1692
  edit.DeleteFile(level, f->fd.GetNumber());
1702
1693
  edit.AddFile(
1703
1694
  to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(),
1704
1695
  f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
1705
1696
  f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
1706
- f->oldest_ancester_time, f->file_creation_time, f->file_checksum,
1707
- f->file_checksum_func_name, f->unique_id);
1697
+ f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
1698
+ f->file_checksum, f->file_checksum_func_name, f->unique_id);
1708
1699
  }
1709
1700
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
1710
1701
  "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
@@ -3344,8 +3335,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
3344
3335
  f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno,
3345
3336
  f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
3346
3337
  f->oldest_blob_file_number, f->oldest_ancester_time,
3347
- f->file_creation_time, f->file_checksum, f->file_checksum_func_name,
3348
- f->unique_id);
3338
+ f->file_creation_time, f->epoch_number, f->file_checksum,
3339
+ f->file_checksum_func_name, f->unique_id);
3349
3340
 
3350
3341
  ROCKS_LOG_BUFFER(
3351
3342
  log_buffer,
@@ -290,7 +290,7 @@ size_t DBImpl::TEST_GetWalPreallocateBlockSize(
290
290
  }
291
291
 
292
292
  #ifndef ROCKSDB_LITE
293
- void DBImpl::TEST_WaitForPeridicTaskRun(std::function<void()> callback) const {
293
+ void DBImpl::TEST_WaitForPeriodicTaskRun(std::function<void()> callback) const {
294
294
  periodic_task_scheduler_.TEST_WaitForRun(callback);
295
295
  }
296
296
 
@@ -136,7 +136,7 @@ Status DBImpl::PromoteL0(ColumnFamilyHandle* column_family, int target_level) {
136
136
  f->fd.smallest_seqno, f->fd.largest_seqno,
137
137
  f->marked_for_compaction, f->temperature,
138
138
  f->oldest_blob_file_number, f->oldest_ancester_time,
139
- f->file_creation_time, f->file_checksum,
139
+ f->file_creation_time, f->epoch_number, f->file_checksum,
140
140
  f->file_checksum_func_name, f->unique_id);
141
141
  }
142
142
 
@@ -315,6 +315,7 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
315
315
  }
316
316
  log_write_mutex_.Unlock();
317
317
  mutex_.Unlock();
318
+ TEST_SYNC_POINT_CALLBACK("FindObsoleteFiles::PostMutexUnlock", nullptr);
318
319
  log_write_mutex_.Lock();
319
320
  while (!logs_.empty() && logs_.front().number < min_log_number) {
320
321
  auto& log = logs_.front();
@@ -360,6 +361,8 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
360
361
  }
361
362
  TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion",
362
363
  &file_deletion_status);
364
+ TEST_SYNC_POINT_CALLBACK("DBImpl::DeleteObsoleteFileImpl:AfterDeletion2",
365
+ const_cast<std::string*>(&fname));
363
366
  if (file_deletion_status.ok()) {
364
367
  ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
365
368
  "[JOB %d] Delete %s type=%d #%" PRIu64 " -- %s\n", job_id,
@@ -1515,7 +1515,7 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1515
1515
  .PermitUncheckedError(); // ignore error
1516
1516
  const uint64_t current_time = static_cast<uint64_t>(_current_time);
1517
1517
  meta.oldest_ancester_time = current_time;
1518
-
1518
+ meta.epoch_number = cfd->NewEpochNumber();
1519
1519
  {
1520
1520
  auto write_hint = cfd->CalculateSSTWriteHint(0);
1521
1521
  mutex_.Unlock();
@@ -1583,13 +1583,13 @@ Status DBImpl::WriteLevel0TableForRecovery(int job_id, ColumnFamilyData* cfd,
1583
1583
  constexpr int level = 0;
1584
1584
 
1585
1585
  if (s.ok() && has_output) {
1586
- edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
1587
- meta.fd.GetFileSize(), meta.smallest, meta.largest,
1588
- meta.fd.smallest_seqno, meta.fd.largest_seqno,
1589
- meta.marked_for_compaction, meta.temperature,
1590
- meta.oldest_blob_file_number, meta.oldest_ancester_time,
1591
- meta.file_creation_time, meta.file_checksum,
1592
- meta.file_checksum_func_name, meta.unique_id);
1586
+ edit->AddFile(
1587
+ level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
1588
+ meta.smallest, meta.largest, meta.fd.smallest_seqno,
1589
+ meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
1590
+ meta.oldest_blob_file_number, meta.oldest_ancester_time,
1591
+ meta.file_creation_time, meta.epoch_number, meta.file_checksum,
1592
+ meta.file_checksum_func_name, meta.unique_id);
1593
1593
 
1594
1594
  for (const auto& blob : blob_file_additions) {
1595
1595
  edit->AddBlobFile(blob);
@@ -924,6 +924,15 @@ Status DBImpl::WriteImplWALOnly(
924
924
  write_thread->ExitAsBatchGroupLeader(write_group, status);
925
925
  return status;
926
926
  }
927
+ } else {
928
+ InstrumentedMutexLock lock(&mutex_);
929
+ Status status = DelayWrite(/*num_bytes=*/0ull, write_options);
930
+ if (!status.ok()) {
931
+ WriteThread::WriteGroup write_group;
932
+ write_thread->EnterAsBatchGroupLeader(&w, &write_group);
933
+ write_thread->ExitAsBatchGroupLeader(write_group, status);
934
+ return status;
935
+ }
927
936
  }
928
937
 
929
938
  WriteThread::WriteGroup write_group;
@@ -1762,6 +1771,7 @@ uint64_t DBImpl::GetMaxTotalWalSize() const {
1762
1771
  // REQUIRES: this thread is currently at the front of the writer queue
1763
1772
  Status DBImpl::DelayWrite(uint64_t num_bytes,
1764
1773
  const WriteOptions& write_options) {
1774
+ mutex_.AssertHeld();
1765
1775
  uint64_t time_delayed = 0;
1766
1776
  bool delayed = false;
1767
1777
  {
@@ -158,6 +158,8 @@ void DBIter::Next() {
158
158
 
159
159
  local_stats_.next_count_++;
160
160
  if (ok && iter_.Valid()) {
161
+ ClearSavedValue();
162
+
161
163
  if (prefix_same_as_start_) {
162
164
  assert(prefix_extractor_ != nullptr);
163
165
  const Slice prefix = prefix_.GetUserKey();
@@ -544,8 +546,7 @@ bool DBIter::MergeValuesNewToOld() {
544
546
  // hit a put, merge the put value with operands and store the
545
547
  // final result in saved_value_. We are done!
546
548
  const Slice val = iter_.value();
547
- Status s = Merge(&val, ikey.user_key);
548
- if (!s.ok()) {
549
+ if (!Merge(&val, ikey.user_key)) {
549
550
  return false;
550
551
  }
551
552
  // iter_ is positioned after put
@@ -574,8 +575,7 @@ bool DBIter::MergeValuesNewToOld() {
574
575
  return false;
575
576
  }
576
577
  valid_ = true;
577
- Status s = Merge(&blob_value_, ikey.user_key);
578
- if (!s.ok()) {
578
+ if (!Merge(&blob_value_, ikey.user_key)) {
579
579
  return false;
580
580
  }
581
581
 
@@ -589,11 +589,18 @@ bool DBIter::MergeValuesNewToOld() {
589
589
  }
590
590
  return true;
591
591
  } else if (kTypeWideColumnEntity == ikey.type) {
592
- // TODO: support wide-column entities
593
- status_ = Status::NotSupported(
594
- "Merge currently not supported for wide-column entities");
595
- valid_ = false;
596
- return false;
592
+ if (!MergeEntity(iter_.value(), ikey.user_key)) {
593
+ return false;
594
+ }
595
+
596
+ // iter_ is positioned after put
597
+ iter_.Next();
598
+ if (!iter_.status().ok()) {
599
+ valid_ = false;
600
+ return false;
601
+ }
602
+
603
+ return true;
597
604
  } else {
598
605
  valid_ = false;
599
606
  status_ = Status::Corruption(
@@ -612,8 +619,7 @@ bool DBIter::MergeValuesNewToOld() {
612
619
  // a deletion marker.
613
620
  // feed null as the existing value to the merge operator, such that
614
621
  // client can differentiate this scenario and do things accordingly.
615
- Status s = Merge(nullptr, saved_key_.GetUserKey());
616
- if (!s.ok()) {
622
+ if (!Merge(nullptr, saved_key_.GetUserKey())) {
617
623
  return false;
618
624
  }
619
625
  assert(status_.ok());
@@ -636,6 +642,8 @@ void DBIter::Prev() {
636
642
  }
637
643
  }
638
644
  if (ok) {
645
+ ClearSavedValue();
646
+
639
647
  Slice prefix;
640
648
  if (prefix_same_as_start_) {
641
649
  assert(prefix_extractor_ != nullptr);
@@ -960,8 +968,7 @@ bool DBIter::FindValueForCurrentKey() {
960
968
  if (last_not_merge_type == kTypeDeletion ||
961
969
  last_not_merge_type == kTypeSingleDeletion ||
962
970
  last_not_merge_type == kTypeDeletionWithTimestamp) {
963
- s = Merge(nullptr, saved_key_.GetUserKey());
964
- if (!s.ok()) {
971
+ if (!Merge(nullptr, saved_key_.GetUserKey())) {
965
972
  return false;
966
973
  }
967
974
  return true;
@@ -976,8 +983,7 @@ bool DBIter::FindValueForCurrentKey() {
976
983
  return false;
977
984
  }
978
985
  valid_ = true;
979
- s = Merge(&blob_value_, saved_key_.GetUserKey());
980
- if (!s.ok()) {
986
+ if (!Merge(&blob_value_, saved_key_.GetUserKey())) {
981
987
  return false;
982
988
  }
983
989
 
@@ -985,15 +991,14 @@ bool DBIter::FindValueForCurrentKey() {
985
991
 
986
992
  return true;
987
993
  } else if (last_not_merge_type == kTypeWideColumnEntity) {
988
- // TODO: support wide-column entities
989
- status_ = Status::NotSupported(
990
- "Merge currently not supported for wide-column entities");
991
- valid_ = false;
992
- return false;
994
+ if (!MergeEntity(pinned_value_, saved_key_.GetUserKey())) {
995
+ return false;
996
+ }
997
+
998
+ return true;
993
999
  } else {
994
1000
  assert(last_not_merge_type == kTypeValue);
995
- s = Merge(&pinned_value_, saved_key_.GetUserKey());
996
- if (!s.ok()) {
1001
+ if (!Merge(&pinned_value_, saved_key_.GetUserKey())) {
997
1002
  return false;
998
1003
  }
999
1004
  return true;
@@ -1177,8 +1182,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1177
1182
 
1178
1183
  if (ikey.type == kTypeValue) {
1179
1184
  const Slice val = iter_.value();
1180
- Status s = Merge(&val, saved_key_.GetUserKey());
1181
- if (!s.ok()) {
1185
+ if (!Merge(&val, saved_key_.GetUserKey())) {
1182
1186
  return false;
1183
1187
  }
1184
1188
  return true;
@@ -1197,8 +1201,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1197
1201
  return false;
1198
1202
  }
1199
1203
  valid_ = true;
1200
- Status s = Merge(&blob_value_, saved_key_.GetUserKey());
1201
- if (!s.ok()) {
1204
+ if (!Merge(&blob_value_, saved_key_.GetUserKey())) {
1202
1205
  return false;
1203
1206
  }
1204
1207
 
@@ -1206,11 +1209,11 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1206
1209
 
1207
1210
  return true;
1208
1211
  } else if (ikey.type == kTypeWideColumnEntity) {
1209
- // TODO: support wide-column entities
1210
- status_ = Status::NotSupported(
1211
- "Merge currently not supported for wide-column entities");
1212
- valid_ = false;
1213
- return false;
1212
+ if (!MergeEntity(iter_.value(), saved_key_.GetUserKey())) {
1213
+ return false;
1214
+ }
1215
+
1216
+ return true;
1214
1217
  } else {
1215
1218
  valid_ = false;
1216
1219
  status_ = Status::Corruption(
@@ -1220,8 +1223,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1220
1223
  }
1221
1224
  }
1222
1225
 
1223
- Status s = Merge(nullptr, saved_key_.GetUserKey());
1224
- if (!s.ok()) {
1226
+ if (!Merge(nullptr, saved_key_.GetUserKey())) {
1225
1227
  return false;
1226
1228
  }
1227
1229
 
@@ -1244,7 +1246,7 @@ bool DBIter::FindValueForCurrentKeyUsingSeek() {
1244
1246
  return true;
1245
1247
  }
1246
1248
 
1247
- Status DBIter::Merge(const Slice* val, const Slice& user_key) {
1249
+ bool DBIter::Merge(const Slice* val, const Slice& user_key) {
1248
1250
  Status s = MergeHelper::TimedFullMerge(
1249
1251
  merge_operator_, user_key, val, merge_context_.GetOperands(),
1250
1252
  &saved_value_, logger_, statistics_, clock_, &pinned_value_,
@@ -1252,14 +1254,33 @@ Status DBIter::Merge(const Slice* val, const Slice& user_key) {
1252
1254
  if (!s.ok()) {
1253
1255
  valid_ = false;
1254
1256
  status_ = s;
1255
- return s;
1257
+ return false;
1256
1258
  }
1257
1259
 
1258
1260
  SetValueAndColumnsFromPlain(pinned_value_.data() ? pinned_value_
1259
1261
  : saved_value_);
1260
1262
 
1261
1263
  valid_ = true;
1262
- return s;
1264
+ return true;
1265
+ }
1266
+
1267
+ bool DBIter::MergeEntity(const Slice& entity, const Slice& user_key) {
1268
+ Status s = MergeHelper::TimedFullMergeWithEntity(
1269
+ merge_operator_, user_key, entity, merge_context_.GetOperands(),
1270
+ &saved_value_, logger_, statistics_, clock_,
1271
+ /* update_num_ops_stats */ true);
1272
+ if (!s.ok()) {
1273
+ valid_ = false;
1274
+ status_ = s;
1275
+ return false;
1276
+ }
1277
+
1278
+ if (!SetValueAndColumnsFromEntity(saved_value_)) {
1279
+ return false;
1280
+ }
1281
+
1282
+ valid_ = true;
1283
+ return true;
1263
1284
  }
1264
1285
 
1265
1286
  // Move backwards until the key smaller than saved_key_.
@@ -318,7 +318,8 @@ class DBIter final : public Iterator {
318
318
  }
319
319
 
320
320
  // If user-defined timestamp is enabled, `user_key` includes timestamp.
321
- Status Merge(const Slice* val, const Slice& user_key);
321
+ bool Merge(const Slice* val, const Slice& user_key);
322
+ bool MergeEntity(const Slice& entity, const Slice& user_key);
322
323
 
323
324
  const SliceTransform* prefix_extractor_;
324
325
  Env* const env_;