npm - @nxtedition/rocksdb - Versions diffs - 8.0.2 → 8.0.4 - Mend

@nxtedition/rocksdb 8.0.2 → 8.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc CHANGED Viewed

@@ -532,23 +532,18 @@ Status CompactionOutputs::AddRangeDels(
       // Pretend the smallest key has the same user key as lower_bound
       // (the max key in the previous table or subcompaction) in order for
       // files to appear key-space partitioned.
-      //
-      // When lower_bound is chosen by a subcompaction, we know that
-      // subcompactions over smaller keys cannot contain any keys at
-      // lower_bound. We also know that smaller subcompactions exist,
-      // because otherwise the subcompaction woud be unbounded on the left.
-      // As a result, we know that no other files on the output level will
-      // contain actual keys at lower_bound (an output file may have a
-      // largest key of lower_bound@kMaxSequenceNumber, but this only
-      // indicates a large range tombstone was truncated). Therefore, it is
-      // safe to use the tombstone's sequence number, to ensure that keys at
-      // lower_bound at lower levels are covered by truncated tombstones.
-      //
-      // If lower_bound was chosen by the smallest data key in the file,
-      // choose lowest seqnum so this file's smallest internal key comes
-      // after the previous file's largest. The fake seqnum is OK because
-      // the read path's file-picking code only considers user key.
       if (lower_bound_from_sub_compact) {
+        // When lower_bound is chosen by a subcompaction
+        // (lower_bound_from_sub_compact), we know that subcompactions over
+        // smaller keys cannot contain any keys at lower_bound. We also know
+        // that smaller subcompactions exist, because otherwise the
+        // subcompaction woud be unbounded on the left. As a result, we know
+        // that no other files on the output level will contain actual keys at
+        // lower_bound (an output file may have a largest key of
+        // lower_bound@kMaxSequenceNumber, but this only indicates a large range
+        // tombstone was truncated). Therefore, it is safe to use the
+        // tombstone's sequence number, to ensure that keys at lower_bound at
+        // lower levels are covered by truncated tombstones.
         if (ts_sz) {
           assert(tombstone.ts_.size() == ts_sz);
           smallest_candidate = InternalKey(*lower_bound, tombstone.seq_,
@@ -558,6 +553,7 @@ Status CompactionOutputs::AddRangeDels(
               InternalKey(*lower_bound, tombstone.seq_, kTypeRangeDeletion);
         }
       } else if (lower_bound_from_range_tombstone) {
+        // When lower_bound is chosen from a range tombtone start key:
         // Range tombstone keys can be truncated at file boundaries of the files
         // that contain them.
         //
@@ -591,6 +587,10 @@ Status CompactionOutputs::AddRangeDels(
           smallest_candidate = range_tombstone_lower_bound_;
         }
       } else {
+        // If lower_bound was chosen by the smallest data key in the file,
+        // choose lowest seqnum so this file's smallest internal key comes
+        // after the previous file's largest. The fake seqnum is OK because
+        // the read path's file-picking code only considers user key.
         smallest_candidate = InternalKey(*lower_bound, 0, kTypeRangeDeletion);
       }
     }
@@ -640,7 +640,7 @@ Status CompactionOutputs::AddRangeDels(
     // it cannot have a seqnum of 0 (unless the smallest data key in a file
     // has a seqnum of 0). Otherwise, the truncated tombstone may expose
     // deleted keys at lower levels.
-    assert(smallest_ikey_seqnum == 0 ||
+    assert(smallest_ikey_seqnum == 0 || lower_bound_from_range_tombstone ||
            ExtractInternalKeyFooter(meta.smallest.Encode()) !=
                PackSequenceAndType(0, kTypeRangeDeletion));
   }

package/deps/rocksdb/rocksdb/db/db_compaction_test.cc CHANGED Viewed

@@ -1026,6 +1026,70 @@ TEST_F(DBCompactionTest, CompactionSstPartitioner) {
   ASSERT_EQ("B", Get("bbbb1"));
 }
+TEST_F(DBCompactionTest, CompactionSstPartitionWithManualCompaction) {
+  Options options = CurrentOptions();
+  options.compaction_style = kCompactionStyleLevel;
+  options.level0_file_num_compaction_trigger = 3;
+  DestroyAndReopen(options);
+  // create first file and flush to l0
+  ASSERT_OK(Put("000015", "A"));
+  ASSERT_OK(Put("000025", "B"));
+  ASSERT_OK(Flush());
+  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
+  // create second file and flush to l0
+  ASSERT_OK(Put("000015", "A2"));
+  ASSERT_OK(Put("000025", "B2"));
+  ASSERT_OK(Flush());
+  ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
+  // CONTROL 1: compact without partitioner
+  CompactRangeOptions compact_options;
+  compact_options.bottommost_level_compaction =
+      BottommostLevelCompaction::kForceOptimized;
+  ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
+  // Check (compacted but no partitioning yet)
+  std::vector<LiveFileMetaData> files;
+  dbfull()->GetLiveFilesMetaData(&files);
+  ASSERT_EQ(1, files.size());
+  // Install partitioner
+  std::shared_ptr<SstPartitionerFactory> factory(
+      NewSstPartitionerFixedPrefixFactory(5));
+  options.sst_partitioner_factory = factory;
+  Reopen(options);
+  // CONTROL 2: request compaction on range with no partition boundary and no
+  // overlap with actual entries
+  Slice from("000017");
+  Slice to("000019");
+  ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));
+  // Check (no partitioning yet)
+  files.clear();
+  dbfull()->GetLiveFilesMetaData(&files);
+  ASSERT_EQ(1, files.size());
+  ASSERT_EQ("A2", Get("000015"));
+  ASSERT_EQ("B2", Get("000025"));
+  // TEST: request compaction overlapping with partition boundary but no
+  // actual entries
+  // NOTE: `to` is INCLUSIVE
+  from = Slice("000019");
+  to = Slice("000020");
+  ASSERT_OK(dbfull()->CompactRange(compact_options, &from, &to));
+  // Check (must be partitioned)
+  files.clear();
+  dbfull()->GetLiveFilesMetaData(&files);
+  ASSERT_EQ(2, files.size());
+  ASSERT_EQ("A2", Get("000015"));
+  ASSERT_EQ("B2", Get("000025"));
+}
 TEST_F(DBCompactionTest, CompactionSstPartitionerNonTrivial) {
   Options options = CurrentOptions();
   options.compaction_style = kCompactionStyleLevel;

package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc CHANGED Viewed

@@ -1087,6 +1087,22 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
     {
       SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
       Version* current_version = super_version->current;
+      // Might need to query the partitioner
+      SstPartitionerFactory* partitioner_factory =
+          current_version->cfd()->ioptions()->sst_partitioner_factory.get();
+      std::unique_ptr<SstPartitioner> partitioner;
+      if (partitioner_factory && begin != nullptr && end != nullptr) {
+        SstPartitioner::Context context;
+        context.is_full_compaction = false;
+        context.is_manual_compaction = true;
+        context.output_level = /*unknown*/ -1;
+        // Small lies about compaction range
+        context.smallest_user_key = *begin;
+        context.largest_user_key = *end;
+        partitioner = partitioner_factory->CreatePartitioner(context);
+      }
       ReadOptions ro;
       ro.total_order_seek = true;
       bool overlap;
@@ -1094,14 +1110,50 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
            level < current_version->storage_info()->num_non_empty_levels();
            level++) {
         overlap = true;
+        // Whether to look at specific keys within files for overlap with
+        // compaction range, other than largest and smallest keys of the file
+        // known in Version metadata.
+        bool check_overlap_within_file = false;
         if (begin != nullptr && end != nullptr) {
+          // Typically checking overlap within files in this case
+          check_overlap_within_file = true;
+          // WART: Not known why we don't check within file in one-sided bound
+          // cases
+          if (partitioner) {
+            // Especially if the partitioner is new, the manual compaction
+            // might be used to enforce the partitioning. Checking overlap
+            // within files might miss cases where compaction is needed to
+            // partition the files, as in this example:
+            // * File has two keys "001" and "111"
+            // * Compaction range is ["011", "101")
+            // * Partition boundary at "100"
+            // In cases like this, file-level overlap with the compaction
+            // range is sufficient to force any partitioning that is needed
+            // within the compaction range.
+            //
+            // But if there's no partitioning boundary within the compaction
+            // range, we can be sure there's no need to fix partitioning
+            // within that range, thus safe to check overlap within file.
+            //
+            // Use a hypothetical trivial move query to check for partition
+            // boundary in range. (NOTE: in defiance of all conventions,
+            // `begin` and `end` here are both INCLUSIVE bounds, which makes
+            // this analogy to CanDoTrivialMove() accurate even when `end` is
+            // the first key in a partition.)
+            if (!partitioner->CanDoTrivialMove(*begin, *end)) {
+              check_overlap_within_file = false;
+            }
+          }
+        }
+        if (check_overlap_within_file) {
           Status status = current_version->OverlapWithLevelIterator(
               ro, file_options_, *begin, *end, level, &overlap);
           if (!status.ok()) {
-            overlap = current_version->storage_info()->OverlapInLevel(
-                level, begin, end);
+            check_overlap_within_file = false;
           }
-        } else {
+        }
+        if (!check_overlap_within_file) {
           overlap = current_version->storage_info()->OverlapInLevel(level,
                                                                     begin, end);
         }

package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc CHANGED Viewed

@@ -354,7 +354,7 @@ TEST_F(RangeTombstoneFragmenterTest,
   FragmentedRangeTombstoneList fragment_list(
       std::move(range_del_iter), bytewise_icmp, true /* for_compaction */,
-      {20, 9} /* upper_bounds */);
+      {9, 20} /* snapshots */);
   FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp,
                                         kMaxSequenceNumber /* upper_bound */);
   VerifyFragmentedRangeDels(&iter, {{"a", "c", 10},

package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h CHANGED Viewed

@@ -12,13 +12,15 @@
 #include "db_stress_tool/db_stress_common.h"
 namespace ROCKSDB_NAMESPACE {
-class DbStressEnvWrapper : public EnvWrapper {
+class DbStressFSWrapper : public FileSystemWrapper {
  public:
-  explicit DbStressEnvWrapper(Env* t) : EnvWrapper(t) {}
-  static const char* kClassName() { return "DbStressEnv"; }
+  explicit DbStressFSWrapper(const std::shared_ptr<FileSystem>& t)
+      : FileSystemWrapper(t) {}
+  static const char* kClassName() { return "DbStressFS"; }
   const char* Name() const override { return kClassName(); }
-  Status DeleteFile(const std::string& f) override {
+  IOStatus DeleteFile(const std::string& f, const IOOptions& opts,
+                      IODebugContext* dbg) override {
     // We determine whether it is a manifest file by searching a strong,
     // so that there will be false positive if the directory path contains the
     // keyword but it is unlikely.
@@ -28,11 +30,11 @@ class DbStressEnvWrapper : public EnvWrapper {
         f.find("checkpoint") != std::string::npos ||
         f.find(".backup") != std::string::npos ||
         f.find(".restore") != std::string::npos) {
-      return target()->DeleteFile(f);
+      return target()->DeleteFile(f, opts, dbg);
     }
     // Rename the file instead of deletion to keep the history, and
     // at the same time it is not visible to RocksDB.
-    return target()->RenameFile(f, f + "_renamed_");
+    return target()->RenameFile(f, f + "_renamed_", opts, dbg);
   }
   // If true, all manifest files will not be delted in DeleteFile().

package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc CHANGED Viewed

@@ -1046,9 +1046,11 @@ void StressTest::OperateDb(ThreadState* thread) {
           TestIterateAgainstExpected(thread, read_opts, rand_column_families,
                                      rand_keys);
         } else {
-          int num_seeks = static_cast<int>(
-              std::min(static_cast<uint64_t>(thread->rand.Uniform(4)),
-                       FLAGS_ops_per_thread - i - 1));
+          int num_seeks = static_cast<int>(std::min(
+              std::max(static_cast<uint64_t>(thread->rand.Uniform(4)),
+                       static_cast<uint64_t>(1)),
+              std::max(static_cast<uint64_t>(FLAGS_ops_per_thread - i - 1),
+                       static_cast<uint64_t>(1))));
           rand_keys = GenerateNKeys(thread, num_seeks, i);
           i += num_seeks - 1;
           TestIterate(thread, read_opts, rand_column_families, rand_keys);
@@ -3025,7 +3027,7 @@ bool InitializeOptionsFromFile(Options& options) {
               FLAGS_options_file.c_str(), s.ToString().c_str());
       exit(1);
     }
-    db_options.env = new DbStressEnvWrapper(db_stress_env);
+    db_options.env = new CompositeEnvWrapper(db_stress_env);
     options = Options(db_options, cf_descriptors[0].options);
     return true;
   }

package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc CHANGED Viewed

@@ -29,8 +29,8 @@
 namespace ROCKSDB_NAMESPACE {
 namespace {
 static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
-static std::shared_ptr<ROCKSDB_NAMESPACE::DbStressEnvWrapper> env_wrapper_guard;
-static std::shared_ptr<ROCKSDB_NAMESPACE::DbStressEnvWrapper>
+static std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_wrapper_guard;
+static std::shared_ptr<ROCKSDB_NAMESPACE::CompositeEnvWrapper>
     dbsl_env_wrapper_guard;
 static std::shared_ptr<CompositeEnvWrapper> fault_env_guard;
 }  // namespace
@@ -77,7 +77,7 @@ int db_stress_tool(int argc, char** argv) {
             s.ToString().c_str());
     exit(1);
   }
-  dbsl_env_wrapper_guard = std::make_shared<DbStressEnvWrapper>(raw_env);
+  dbsl_env_wrapper_guard = std::make_shared<CompositeEnvWrapper>(raw_env);
   db_stress_listener_env = dbsl_env_wrapper_guard.get();
   if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
@@ -96,17 +96,16 @@ int db_stress_tool(int argc, char** argv) {
     raw_env = fault_env_guard.get();
   }
-  env_wrapper_guard = std::make_shared<DbStressEnvWrapper>(raw_env);
-  db_stress_env = env_wrapper_guard.get();
-  if (FLAGS_write_fault_one_in) {
-    // In the write injection case, we need to use the FS interface and returns
-    // the IOStatus with different error and flags. Therefore,
-    // DbStressEnvWrapper cannot be used which will swallow the FS
-    // implementations. We should directly use the raw_env which is the
-    // CompositeEnvWrapper of env and fault_fs.
-    db_stress_env = raw_env;
+  env_wrapper_guard = std::make_shared<CompositeEnvWrapper>(
+      raw_env, std::make_shared<DbStressFSWrapper>(raw_env->GetFileSystem()));
+  if (!env_opts) {
+    // If using the default Env (Posix), wrap DbStressEnvWrapper with the
+    // legacy EnvWrapper. This is a temporary fix for the ReadAsync interface
+    // not being properly supported with Posix and db_stress. The EnvWrapper
+    // has a default implementation of ReadAsync that redirects to Read.
+    env_wrapper_guard = std::make_shared<EnvWrapper>(env_wrapper_guard);
   }
+  db_stress_env = env_wrapper_guard.get();
   FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());

package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc CHANGED Viewed

@@ -700,6 +700,11 @@ class NonBatchedOpsStressTest : public StressTest {
     uint64_t count = 0;
     Status s;
+    if (fault_fs_guard) {
+      fault_fs_guard->EnableErrorInjection();
+      SharedState::ignore_read_error = false;
+    }
     for (iter->Seek(prefix); iter->Valid() && iter->key().starts_with(prefix);
          iter->Next()) {
       ++count;
@@ -733,13 +738,20 @@ class NonBatchedOpsStressTest : public StressTest {
       s = iter->status();
     }
-    if (!s.ok()) {
+    uint64_t error_count = 0;
+    if (fault_fs_guard) {
+      error_count = fault_fs_guard->GetAndResetErrorCount();
+    }
+    if (!s.ok() && (!fault_fs_guard || (fault_fs_guard && !error_count))) {
       fprintf(stderr, "TestPrefixScan error: %s\n", s.ToString().c_str());
       thread->stats.AddErrors(1);
       return s;
     }
+    if (fault_fs_guard) {
+      fault_fs_guard->DisableErrorInjection();
+    }
     thread->stats.AddPrefixes(1, count);
     return Status::OK();

package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc CHANGED Viewed

@@ -247,10 +247,14 @@ void FilePrefetchBuffer::AbortAllIOs() {
   // Release io_handles.
   if (bufs_[curr_].io_handle_ != nullptr && bufs_[curr_].del_fn_ != nullptr) {
     DestroyAndClearIOHandle(curr_);
+  } else {
+    bufs_[curr_].async_read_in_progress_ = false;
   }
   if (bufs_[second].io_handle_ != nullptr && bufs_[second].del_fn_ != nullptr) {
     DestroyAndClearIOHandle(second);
+  } else {
+    bufs_[second].async_read_in_progress_ = false;
   }
 }
@@ -325,7 +329,16 @@ Status FilePrefetchBuffer::HandleOverlappingData(
     uint64_t& tmp_offset, size_t& tmp_length) {
   Status s;
   size_t alignment = reader->file()->GetRequiredBufferAlignment();
-  uint32_t second = curr_ ^ 1;
+  uint32_t second;
+  // Check if the first buffer has the required offset and the async read is
+  // still in progress. This should only happen if a prefetch was initiated
+  // by Seek, but the next access is at another offset.
+  if (bufs_[curr_].async_read_in_progress_ &&
+      IsOffsetInBufferWithAsyncProgress(offset, curr_)) {
+    PollAndUpdateBuffersIfNeeded(offset);
+  }
+  second = curr_ ^ 1;
   // If data is overlapping over two buffers, copy the data from curr_ and
   // call ReadAsync on curr_.

package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h CHANGED Viewed

@@ -39,9 +39,13 @@ TBlockIter* BlockBasedTable::NewDataBlockIterator(
   if (rep_->uncompression_dict_reader && block_type == BlockType::kData) {
     CachableEntry<UncompressionDict> uncompression_dict;
     const bool no_io = (ro.read_tier == kBlockCacheTier);
+    // For async scans, don't use the prefetch buffer since an async prefetch
+    // might already be under way and this would invalidate it. Also, the
+    // uncompression dict is typically at the end of the file and would
+    // most likely break the sequentiality of the access pattern.
     s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
-        prefetch_buffer, no_io, ro.verify_checksums, get_context,
-        lookup_context, &uncompression_dict);
+        ro.async_io ? nullptr : prefetch_buffer, no_io, ro.verify_checksums,
+        get_context, lookup_context, &uncompression_dict);
     if (!s.ok()) {
       iter->Invalidate(s);
       return iter;

package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc CHANGED Viewed

@@ -26,6 +26,7 @@
 #include "test_util/sync_point.h"
 #include "util/coding.h"
 #include "util/crc32c.h"
+#include "util/mutexlock.h"
 #include "util/random.h"
 #include "util/string_util.h"
 #include "util/xxhash.h"
@@ -412,6 +413,35 @@ IOStatus TestFSRandomAccessFile::Read(uint64_t offset, size_t n,
   return s;
 }
+IOStatus TestFSRandomAccessFile::ReadAsync(
+    FSReadRequest& req, const IOOptions& opts,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, IODebugContext* /*dbg*/) {
+  IOStatus ret;
+  IOStatus s;
+  FSReadRequest res;
+  if (!fs_->IsFilesystemActive()) {
+    ret = fs_->GetError();
+  } else {
+    ret = fs_->InjectThreadSpecificReadError(
+        FaultInjectionTestFS::ErrorOperation::kRead, &res.result,
+        use_direct_io(), req.scratch, /*need_count_increase=*/true,
+        /*fault_injected=*/nullptr);
+  }
+  if (ret.ok()) {
+    if (fs_->ShouldInjectRandomReadError()) {
+      ret = IOStatus::IOError("Injected read error");
+    } else {
+      s = target_->ReadAsync(req, opts, cb, cb_arg, io_handle, del_fn, nullptr);
+    }
+  }
+  if (!ret.ok()) {
+    res.status = ret;
+    cb(res, cb_arg);
+  }
+  return s;
+}
 IOStatus TestFSRandomAccessFile::MultiRead(FSReadRequest* reqs, size_t num_reqs,
                                            const IOOptions& options,
                                            IODebugContext* dbg) {
@@ -803,6 +833,15 @@ IOStatus FaultInjectionTestFS::LinkFile(const std::string& s,
   return io_s;
 }
+IOStatus FaultInjectionTestFS::Poll(std::vector<void*>& io_handles,
+                                    size_t min_completions) {
+  return target()->Poll(io_handles, min_completions);
+}
+IOStatus FaultInjectionTestFS::AbortIO(std::vector<void*>& io_handles) {
+  return target()->AbortIO(io_handles);
+}
 void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) {
   MutexLock l(&mutex_);
   if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {

package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h CHANGED Viewed

@@ -141,6 +141,10 @@ class TestFSRandomAccessFile : public FSRandomAccessFile {
   IOStatus Read(uint64_t offset, size_t n, const IOOptions& options,
                 Slice* result, char* scratch,
                 IODebugContext* dbg) const override;
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     IODebugContext* dbg) override;
   IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
                      const IOOptions& options, IODebugContext* dbg) override;
   size_t GetRequiredBufferAlignment() const override {
@@ -266,6 +270,11 @@ class FaultInjectionTestFS : public FileSystemWrapper {
     return io_s;
   }
+  virtual IOStatus Poll(std::vector<void*>& io_handles,
+                        size_t min_completions) override;
+  virtual IOStatus AbortIO(std::vector<void*>& io_handles) override;
   void WritableFileClosed(const FSFileState& state);
   void WritableFileSynced(const FSFileState& state);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@nxtedition/rocksdb",
-  "version": "8.0.2",
+  "version": "8.0.4",
   "description": "A low-level Node.js RocksDB binding",
   "license": "MIT",
   "main": "index.js",

package/prebuilds/darwin-arm64/node.napi.node CHANGED Viewed

Binary file

package/prebuilds/linux-x64/node.napi.node CHANGED Viewed

Binary file