@nxtedition/rocksdb 8.0.4 → 8.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/BUILDING.md +2 -2
  2. package/binding.cc +7 -2
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -0
  4. package/deps/rocksdb/rocksdb/Makefile +13 -1
  5. package/deps/rocksdb/rocksdb/db/builder.cc +13 -4
  6. package/deps/rocksdb/rocksdb/db/builder.h +2 -1
  7. package/deps/rocksdb/rocksdb/db/c.cc +6 -0
  8. package/deps/rocksdb/rocksdb/db/column_family.cc +1 -0
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +18 -4
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +2 -0
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +22 -2
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +5 -1
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +14 -14
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1 -2
  16. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +2 -3
  17. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +225 -0
  18. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +8 -9
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -8
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +63 -23
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +12 -8
  23. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +115 -2
  24. package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
  25. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -0
  26. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +88 -12
  27. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +38 -1
  28. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +14 -110
  29. package/deps/rocksdb/rocksdb/db/flush_job.cc +2 -3
  30. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/repair.cc +2 -1
  32. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +41 -39
  33. package/deps/rocksdb/rocksdb/db/version_edit.cc +12 -0
  34. package/deps/rocksdb/rocksdb/db/version_edit.h +18 -6
  35. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +9 -9
  36. package/deps/rocksdb/rocksdb/db/version_set.cc +12 -6
  37. package/deps/rocksdb/rocksdb/db/version_set_test.cc +23 -9
  38. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -0
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -0
  41. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +4 -0
  42. package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -1
  43. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -1
  44. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +69 -9
  45. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +245 -74
  46. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +195 -4
  47. package/max_rev_operator.h +100 -0
  48. package/package.json +1 -1
  49. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  50. package/prebuilds/darwin-x64/node.napi.node +0 -0
  51. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -2960,7 +2960,7 @@ bool Version::MaybeInitializeFileMetaData(FileMetaData* file_meta) {
2960
2960
  file_meta->num_deletions = tp->num_deletions;
2961
2961
  file_meta->raw_value_size = tp->raw_value_size;
2962
2962
  file_meta->raw_key_size = tp->raw_key_size;
2963
-
2963
+ file_meta->num_range_deletions = tp->num_range_deletions;
2964
2964
  return true;
2965
2965
  }
2966
2966
 
@@ -3062,11 +3062,15 @@ void VersionStorageInfo::ComputeCompensatedSizes() {
3062
3062
  // size of deletion entries in a stable workload, the deletion
3063
3063
  // compensation logic might introduce unwanted effet which changes the
3064
3064
  // shape of LSM tree.
3065
- if (file_meta->num_deletions * 2 >= file_meta->num_entries) {
3065
+ if ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 >=
3066
+ file_meta->num_entries) {
3066
3067
  file_meta->compensated_file_size +=
3067
- (file_meta->num_deletions * 2 - file_meta->num_entries) *
3068
+ ((file_meta->num_deletions - file_meta->num_range_deletions) * 2 -
3069
+ file_meta->num_entries) *
3068
3070
  average_value_size * kDeletionWeightOnCompaction;
3069
3071
  }
3072
+ file_meta->compensated_file_size +=
3073
+ file_meta->compensated_range_deletion_size;
3070
3074
  }
3071
3075
  }
3072
3076
  }
@@ -6215,7 +6219,8 @@ Status VersionSet::WriteCurrentStateToManifest(
6215
6219
  f->marked_for_compaction, f->temperature,
6216
6220
  f->oldest_blob_file_number, f->oldest_ancester_time,
6217
6221
  f->file_creation_time, f->epoch_number, f->file_checksum,
6218
- f->file_checksum_func_name, f->unique_id);
6222
+ f->file_checksum_func_name, f->unique_id,
6223
+ f->compensated_range_deletion_size);
6219
6224
  }
6220
6225
  }
6221
6226
 
@@ -6293,8 +6298,9 @@ uint64_t VersionSet::ApproximateSize(const SizeApproximationOptions& options,
6293
6298
  const int num_non_empty_levels = vstorage->num_non_empty_levels();
6294
6299
  end_level = (end_level == -1) ? num_non_empty_levels
6295
6300
  : std::min(end_level, num_non_empty_levels);
6296
-
6297
- assert(start_level <= end_level);
6301
+ if (end_level <= start_level) {
6302
+ return 0;
6303
+ }
6298
6304
 
6299
6305
  // Outline of the optimization that uses options.files_size_error_margin.
6300
6306
  // When approximating the files total size that is used to store a keys range,
@@ -51,7 +51,7 @@ class GenerateLevelFilesBriefTest : public testing::Test {
51
51
  largest_seq, /* marked_for_compact */ false, Temperature::kUnknown,
52
52
  kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
53
53
  kUnknownFileCreationTime, kUnknownEpochNumber, kUnknownFileChecksum,
54
- kUnknownFileChecksumFuncName, kNullUniqueId64x2);
54
+ kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
55
55
  files_.push_back(f);
56
56
  }
57
57
 
@@ -143,16 +143,19 @@ class VersionStorageInfoTestBase : public testing::Test {
143
143
 
144
144
  void Add(int level, uint32_t file_number, const char* smallest,
145
145
  const char* largest, uint64_t file_size = 0,
146
- uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
146
+ uint64_t oldest_blob_file_number = kInvalidBlobFileNumber,
147
+ uint64_t compensated_range_deletion_size = 0) {
147
148
  constexpr SequenceNumber dummy_seq = 0;
148
149
 
149
150
  Add(level, file_number, GetInternalKey(smallest, dummy_seq),
150
- GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number);
151
+ GetInternalKey(largest, dummy_seq), file_size, oldest_blob_file_number,
152
+ compensated_range_deletion_size);
151
153
  }
152
154
 
153
155
  void Add(int level, uint32_t file_number, const InternalKey& smallest,
154
156
  const InternalKey& largest, uint64_t file_size = 0,
155
- uint64_t oldest_blob_file_number = kInvalidBlobFileNumber) {
157
+ uint64_t oldest_blob_file_number = kInvalidBlobFileNumber,
158
+ uint64_t compensated_range_deletion_size = 0) {
156
159
  assert(level < vstorage_.num_levels());
157
160
  FileMetaData* f = new FileMetaData(
158
161
  file_number, 0, file_size, smallest, largest, /* smallest_seq */ 0,
@@ -160,8 +163,7 @@ class VersionStorageInfoTestBase : public testing::Test {
160
163
  Temperature::kUnknown, oldest_blob_file_number,
161
164
  kUnknownOldestAncesterTime, kUnknownFileCreationTime,
162
165
  kUnknownEpochNumber, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
163
- kNullUniqueId64x2);
164
- f->compensated_file_size = file_size;
166
+ kNullUniqueId64x2, compensated_range_deletion_size);
165
167
  vstorage_.AddFile(level, f);
166
168
  }
167
169
 
@@ -2136,6 +2138,17 @@ TEST_F(VersionSetTest, AtomicGroupWithWalEdits) {
2136
2138
  }
2137
2139
  }
2138
2140
 
2141
+ TEST_F(VersionStorageInfoTest, AddRangeDeletionCompensatedFileSize) {
2142
+ // Tests that compensated range deletion size is added to compensated file
2143
+ // size.
2144
+ Add(4, 100U, "1", "2", 100U, kInvalidBlobFileNumber, 1000U);
2145
+
2146
+ UpdateVersionStorageInfo();
2147
+
2148
+ auto meta = vstorage_.GetFileMetaDataByNumber(100U);
2149
+ ASSERT_EQ(meta->compensated_file_size, 100U + 1000U);
2150
+ }
2151
+
2139
2152
  class VersionSetWithTimestampTest : public VersionSetTest {
2140
2153
  public:
2141
2154
  static const std::string kNewCfName;
@@ -3242,7 +3255,8 @@ class VersionSetTestMissingFiles : public VersionSetTestBase,
3242
3255
  file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey,
3243
3256
  ikey, 0, 0, false, Temperature::kUnknown, 0, 0,
3244
3257
  0, info.epoch_number, kUnknownFileChecksum,
3245
- kUnknownFileChecksumFuncName, kNullUniqueId64x2);
3258
+ kUnknownFileChecksumFuncName, kNullUniqueId64x2,
3259
+ 0);
3246
3260
  }
3247
3261
  }
3248
3262
 
@@ -3299,7 +3313,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestFarBehindSst) {
3299
3313
  file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
3300
3314
  largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
3301
3315
  file_num /* epoch_number */, kUnknownFileChecksum,
3302
- kUnknownFileChecksumFuncName, kNullUniqueId64x2);
3316
+ kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
3303
3317
  added_files.emplace_back(0, meta);
3304
3318
  }
3305
3319
  WriteFileAdditionAndDeletionToManifest(
@@ -3360,7 +3374,7 @@ TEST_F(VersionSetTestMissingFiles, ManifestAheadofSst) {
3360
3374
  file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
3361
3375
  largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
3362
3376
  file_num /* epoch_number */, kUnknownFileChecksum,
3363
- kUnknownFileChecksumFuncName, kNullUniqueId64x2);
3377
+ kUnknownFileChecksumFuncName, kNullUniqueId64x2, 0);
3364
3378
  added_files.emplace_back(0, meta);
3365
3379
  }
3366
3380
  WriteFileAdditionAndDeletionToManifest(
@@ -150,6 +150,7 @@ DECLARE_string(cache_type);
150
150
  DECLARE_uint64(subcompactions);
151
151
  DECLARE_uint64(periodic_compaction_seconds);
152
152
  DECLARE_uint64(compaction_ttl);
153
+ DECLARE_bool(fifo_allow_compaction);
153
154
  DECLARE_bool(allow_concurrent_memtable_write);
154
155
  DECLARE_double(experimental_mempurge_threshold);
155
156
  DECLARE_bool(enable_write_thread_adaptive_yield);
@@ -376,6 +376,10 @@ DEFINE_uint64(periodic_compaction_seconds, 1000,
376
376
  DEFINE_uint64(compaction_ttl, 1000,
377
377
  "Files older than TTL will be compacted to the next level.");
378
378
 
379
+ DEFINE_bool(fifo_allow_compaction, false,
380
+ "If true, set `Options::compaction_options_fifo.allow_compaction = "
381
+ "true`. It only take effect when FIFO compaction is used.");
382
+
379
383
  DEFINE_bool(allow_concurrent_memtable_write, false,
380
384
  "Allow multi-writers to update mem tables in parallel.");
381
385
 
@@ -3123,6 +3123,11 @@ void InitializeOptionsFromFlags(
3123
3123
  options.max_background_flushes = FLAGS_max_background_flushes;
3124
3124
  options.compaction_style =
3125
3125
  static_cast<ROCKSDB_NAMESPACE::CompactionStyle>(FLAGS_compaction_style);
3126
+ if (options.compaction_style ==
3127
+ ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleFIFO) {
3128
+ options.compaction_options_fifo.allow_compaction =
3129
+ FLAGS_fifo_allow_compaction;
3130
+ }
3126
3131
  options.compaction_pri =
3127
3132
  static_cast<ROCKSDB_NAMESPACE::CompactionPri>(FLAGS_compaction_pri);
3128
3133
  options.num_levels = FLAGS_num_levels;
@@ -986,6 +986,10 @@ rocksdb_block_based_options_set_partition_filters(
986
986
  rocksdb_block_based_table_options_t* options,
987
987
  unsigned char partition_filters);
988
988
  extern ROCKSDB_LIBRARY_API void
989
+ rocksdb_block_based_options_set_optimize_filters_for_memory(
990
+ rocksdb_block_based_table_options_t* options,
991
+ unsigned char optimize_filters_for_memory);
992
+ extern ROCKSDB_LIBRARY_API void
989
993
  rocksdb_block_based_options_set_use_delta_encoding(
990
994
  rocksdb_block_based_table_options_t* options,
991
995
  unsigned char use_delta_encoding);
@@ -140,7 +140,10 @@ enum class CompactionReason : int {
140
140
  // According to the comments in flush_job.cc, RocksDB treats flush as
141
141
  // a level 0 compaction in internal stats.
142
142
  kFlush,
143
- // Compaction caused by external sst file ingestion
143
+ // [InternalOnly] External sst file ingestion treated as a compaction
144
+ // with placeholder input level L0 as file ingestion
145
+ // technically does not have an input level like other compactions.
146
+ // Used only for internal stats and conflict checking with other compactions
144
147
  kExternalSstIngestion,
145
148
  // Compaction due to SST file being too old
146
149
  kPeriodicCompaction,
@@ -151,6 +154,9 @@ enum class CompactionReason : int {
151
154
  // A special TTL compaction for RoundRobin policy, which basically the same as
152
155
  // kLevelMaxLevelSize, but the goal is to compact TTLed files.
153
156
  kRoundRobinTtl,
157
+ // [InternalOnly] DBImpl::ReFitLevel treated as a compaction,
158
+ // Used only for internal conflict checking with other compactions
159
+ kRefitLevel,
154
160
  // total number of compaction reasons, new reasons must be added above this.
155
161
  kNumOfReasons,
156
162
  };
@@ -1933,7 +1933,8 @@ struct IngestExternalFileOptions {
1933
1933
  // that where created before the file was ingested.
1934
1934
  bool snapshot_consistency = true;
1935
1935
  // If set to false, IngestExternalFile() will fail if the file key range
1936
- // overlaps with existing keys or tombstones in the DB.
1936
+ // overlaps with existing keys or tombstones or output of ongoing compaction
1937
+ // during file ingestion in the DB.
1937
1938
  bool allow_global_seqno = true;
1938
1939
  // If set to false and the file key range overlaps with the memtable key range
1939
1940
  // (memtable flush required), IngestExternalFile will fail.
@@ -11,6 +11,7 @@
11
11
  #ifndef ROCKSDB_LITE
12
12
 
13
13
  #include <cstdint>
14
+ #include <forward_list>
14
15
  #include <functional>
15
16
  #include <map>
16
17
  #include <string>
@@ -23,6 +24,8 @@
23
24
  #include "rocksdb/status.h"
24
25
 
25
26
  namespace ROCKSDB_NAMESPACE {
27
+ class BackupEngineReadOnlyBase;
28
+ class BackupEngine;
26
29
 
27
30
  // The default DB file checksum function name.
28
31
  constexpr char kDbFileChecksumFuncName[] = "FileChecksumCrc32c";
@@ -270,6 +273,28 @@ inline BackupEngineOptions::ShareFilesNaming operator|(
270
273
  return static_cast<BackupEngineOptions::ShareFilesNaming>(l | r);
271
274
  }
272
275
 
276
+ // Identifying information about a backup shared file that is (or might be)
277
+ // excluded from a backup using exclude_files_callback.
278
+ struct BackupExcludedFileInfo {
279
+ explicit BackupExcludedFileInfo(const std::string& _relative_file)
280
+ : relative_file(_relative_file) {}
281
+
282
+ // File name and path relative to the backup dir.
283
+ std::string relative_file;
284
+ };
285
+
286
+ // An auxiliary structure for exclude_files_callback
287
+ struct MaybeExcludeBackupFile {
288
+ explicit MaybeExcludeBackupFile(BackupExcludedFileInfo&& _info)
289
+ : info(std::move(_info)) {}
290
+
291
+ // Identifying information about a backup shared file that could be excluded
292
+ const BackupExcludedFileInfo info;
293
+
294
+ // API user sets to true if the file should be excluded from this backup
295
+ bool exclude_decision = false;
296
+ };
297
+
273
298
  struct CreateBackupOptions {
274
299
  // Flush will always trigger if 2PC is enabled.
275
300
  // If write-ahead logs are disabled, set flush_before_backup=true to
@@ -278,10 +303,31 @@ struct CreateBackupOptions {
278
303
 
279
304
  // Callback for reporting progress, based on callback_trigger_interval_size.
280
305
  //
281
- // RocksDB callbacks are NOT exception-safe. A callback completing with an
282
- // exception can lead to undefined behavior in RocksDB, including data loss,
283
- // unreported corruption, deadlocks, and more.
284
- std::function<void()> progress_callback = []() {};
306
+ // An exception thrown from the callback will result in Status::Aborted from
307
+ // the operation.
308
+ std::function<void()> progress_callback = {};
309
+
310
+ // A callback that allows the API user to select files for exclusion, such
311
+ // as if the files are known to exist in an alternate backup directory.
312
+ // Only "shared" files can be excluded from backups. This is an advanced
313
+ // feature because the BackupEngine user is trusted to keep track of files
314
+ // such that the DB can be restored.
315
+ //
316
+ // Input to the callback is a [begin,end) range of sharable files live in
317
+ // the DB being backed up, and the callback implementation sets
318
+ // exclude_decision=true for files to exclude. A callback offers maximum
319
+ // flexibility, e.g. if remote files are unavailable at backup time but
320
+ // whose existence has been recorded somewhere. In case of an empty or
321
+ // no-op callback, all files are included in the backup .
322
+ //
323
+ // To restore the DB, RestoreOptions::alternate_dirs must be used to provide
324
+ // the excluded files.
325
+ //
326
+ // An exception thrown from the callback will result in Status::Aborted from
327
+ // the operation.
328
+ std::function<void(MaybeExcludeBackupFile* files_begin,
329
+ MaybeExcludeBackupFile* files_end)>
330
+ exclude_files_callback = {};
285
331
 
286
332
  // If false, background_thread_cpu_priority is ignored.
287
333
  // Otherwise, the cpu priority can be decreased,
@@ -300,6 +346,11 @@ struct RestoreOptions {
300
346
  // Default: false
301
347
  bool keep_log_files;
302
348
 
349
+ // For backups that were created using exclude_files_callback, this
350
+ // option enables restoring those backups by providing BackupEngines on
351
+ // directories known to contain the required files.
352
+ std::forward_list<BackupEngineReadOnlyBase*> alternate_dirs;
353
+
303
354
  explicit RestoreOptions(bool _keep_log_files = false)
304
355
  : keep_log_files(_keep_log_files) {}
305
356
  };
@@ -324,9 +375,15 @@ struct BackupInfo {
324
375
  // Backup API user metadata
325
376
  std::string app_metadata;
326
377
 
327
- // Backup file details, if requested with include_file_details=true
378
+ // Backup file details, if requested with include_file_details=true.
379
+ // Does not include excluded_files.
328
380
  std::vector<BackupFileInfo> file_details;
329
381
 
382
+ // Identifying information about shared files that were excluded from the
383
+ // created backup. See exclude_files_callback and alternate_dirs.
384
+ // This information is only provided if include_file_details=true.
385
+ std::vector<BackupExcludedFileInfo> excluded_files;
386
+
330
387
  // DB "name" (a directory in the backup_env) for opening this backup as a
331
388
  // read-only DB. This should also be used as the DBOptions::wal_dir, such
332
389
  // as by default setting wal_dir="". See also env_for_open.
@@ -348,8 +405,8 @@ struct BackupInfo {
348
405
 
349
406
  BackupInfo() {}
350
407
 
351
- BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
352
- uint32_t _number_files, const std::string& _app_metadata)
408
+ explicit BackupInfo(BackupID _backup_id, int64_t _timestamp, uint64_t _size,
409
+ uint32_t _number_files, const std::string& _app_metadata)
353
410
  : backup_id(_backup_id),
354
411
  timestamp(_timestamp),
355
412
  size(_size),
@@ -364,8 +421,8 @@ class BackupStatistics {
364
421
  number_fail_backup = 0;
365
422
  }
366
423
 
367
- BackupStatistics(uint32_t _number_success_backup,
368
- uint32_t _number_fail_backup)
424
+ explicit BackupStatistics(uint32_t _number_success_backup,
425
+ uint32_t _number_fail_backup)
369
426
  : number_success_backup(_number_success_backup),
370
427
  number_fail_backup(_number_fail_backup) {}
371
428
 
@@ -462,6 +519,9 @@ class BackupEngineReadOnlyBase {
462
519
  // Returns Status::OK() if all checks are good
463
520
  virtual IOStatus VerifyBackup(BackupID backup_id,
464
521
  bool verify_with_checksum = false) const = 0;
522
+
523
+ // Internal use only
524
+ virtual BackupEngine* AsBackupEngine() = 0;
465
525
  };
466
526
 
467
527
  // Append-only functions of a BackupEngine. See BackupEngine comment for