@nxtedition/rocksdb 7.0.27 → 7.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. package/binding.cc +170 -30
  2. package/chained-batch.js +1 -1
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -0
  4. package/deps/rocksdb/rocksdb/Makefile +3 -0
  5. package/deps/rocksdb/rocksdb/TARGETS +10 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +17 -7
  7. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -0
  9. package/deps/rocksdb/rocksdb/cache/charged_cache.cc +117 -0
  10. package/deps/rocksdb/rocksdb/cache/charged_cache.h +121 -0
  11. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +270 -180
  12. package/deps/rocksdb/rocksdb/cache/clock_cache.h +412 -124
  13. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +1 -0
  14. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +1 -1
  15. package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -2
  16. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
  17. package/deps/rocksdb/rocksdb/cache/sharded_cache.h +1 -1
  18. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +71 -9
  19. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +11 -2
  20. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +21 -14
  21. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +68 -7
  22. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +16 -0
  23. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +519 -12
  24. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +120 -0
  25. package/deps/rocksdb/rocksdb/db/builder.cc +15 -5
  26. package/deps/rocksdb/rocksdb/db/builder.h +3 -0
  27. package/deps/rocksdb/rocksdb/db/c.cc +18 -0
  28. package/deps/rocksdb/rocksdb/db/c_test.c +18 -0
  29. package/deps/rocksdb/rocksdb/db/column_family.h +2 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +3 -2
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +9 -4
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +15 -10
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +36 -34
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +50 -13
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +12 -0
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +2 -1
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +13 -17
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +26 -9
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +0 -11
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +93 -0
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +3 -8
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +8 -1
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +17 -5
  46. package/deps/rocksdb/rocksdb/db/db_test.cc +0 -3
  47. package/deps/rocksdb/rocksdb/db/db_test2.cc +39 -12
  48. package/deps/rocksdb/rocksdb/db/db_test_util.cc +9 -0
  49. package/deps/rocksdb/rocksdb/db/db_test_util.h +2 -0
  50. package/deps/rocksdb/rocksdb/db/dbformat.cc +0 -38
  51. package/deps/rocksdb/rocksdb/db/dbformat.h +14 -13
  52. package/deps/rocksdb/rocksdb/db/dbformat_test.cc +5 -2
  53. package/deps/rocksdb/rocksdb/db/event_helpers.cc +13 -1
  54. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +0 -10
  55. package/deps/rocksdb/rocksdb/db/flush_job.cc +19 -15
  56. package/deps/rocksdb/rocksdb/db/flush_job.h +7 -0
  57. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +21 -15
  58. package/deps/rocksdb/rocksdb/db/forward_iterator.h +4 -3
  59. package/deps/rocksdb/rocksdb/db/memtable_list.cc +9 -0
  60. package/deps/rocksdb/rocksdb/db/memtable_list.h +5 -0
  61. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +53 -12
  62. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +14 -2
  63. package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +10 -10
  64. package/deps/rocksdb/rocksdb/db/repair.cc +8 -6
  65. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +890 -0
  66. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +324 -0
  67. package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +186 -0
  68. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -0
  69. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -4
  70. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +23 -2
  71. package/deps/rocksdb/rocksdb/env/env_test.cc +74 -1
  72. package/deps/rocksdb/rocksdb/env/io_posix.cc +11 -8
  73. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +28 -0
  74. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +14 -1
  75. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
  76. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +30 -23
  77. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +1 -1
  78. package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +3 -13
  79. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +5 -0
  80. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +1 -2
  81. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -0
  82. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  83. package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +26 -26
  84. package/deps/rocksdb/rocksdb/options/cf_options.cc +14 -1
  85. package/deps/rocksdb/rocksdb/options/cf_options.h +5 -0
  86. package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -56
  87. package/deps/rocksdb/rocksdb/options/db_options.cc +4 -5
  88. package/deps/rocksdb/rocksdb/options/options.cc +11 -1
  89. package/deps/rocksdb/rocksdb/options/options_helper.cc +8 -0
  90. package/deps/rocksdb/rocksdb/options/options_helper.h +4 -0
  91. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +4 -0
  92. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -0
  93. package/deps/rocksdb/rocksdb/src.mk +3 -0
  94. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +6 -1
  95. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +4 -0
  96. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +36 -3
  97. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +36 -1
  98. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +14 -3
  99. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  100. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +6 -0
  101. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +5 -0
  102. package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +3 -0
  103. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -7
  104. package/deps/rocksdb/rocksdb/table/table_builder.h +7 -3
  105. package/deps/rocksdb/rocksdb/table/table_properties.cc +9 -0
  106. package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +3 -2
  107. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +58 -30
  108. package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +1 -0
  109. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +20 -0
  110. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +29 -154
  111. package/deps/rocksdb/rocksdb/util/rate_limiter.h +16 -34
  112. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +0 -92
  113. package/deps/rocksdb/rocksdb/util/timer.h +6 -0
  114. package/deps/rocksdb/rocksdb/util/vector_iterator.h +4 -3
  115. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -45
  116. package/deps/rocksdb/rocksdb/utilities/debug.cc +40 -0
  117. package/deps/rocksdb/rocksdb.gyp +2 -0
  118. package/index.js +19 -2
  119. package/package.json +1 -1
  120. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  121. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -1401,7 +1401,7 @@ TEST_P(EnvPosixTestWithParam, MultiRead) {
1401
1401
  }
1402
1402
  });
1403
1403
 
1404
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1404
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1405
1405
  std::unique_ptr<RandomAccessFile> file;
1406
1406
  std::vector<ReadRequest> reqs(3);
1407
1407
  std::vector<std::unique_ptr<char, Deleter>> data;
@@ -1522,6 +1522,79 @@ TEST_F(EnvPosixTest, MultiReadNonAlignedLargeNum) {
1522
1522
  }
1523
1523
  }
1524
1524
 
1525
+ #ifndef ROCKSDB_LITE
1526
+ TEST_F(EnvPosixTest, NonAlignedDirectIOMultiReadBeyondFileSize) {
1527
+ EnvOptions soptions;
1528
+ soptions.use_direct_reads = true;
1529
+ soptions.use_direct_writes = false;
1530
+ std::string fname = test::PerThreadDBPath(env_, "testfile");
1531
+
1532
+ Random rnd(301);
1533
+ std::unique_ptr<WritableFile> wfile;
1534
+ size_t alignment = 0;
1535
+ // Create file.
1536
+ {
1537
+ ASSERT_OK(env_->NewWritableFile(fname, &wfile, soptions));
1538
+ auto data_ptr = NewAligned(4095, 'b');
1539
+ Slice data_b(data_ptr.get(), 4095);
1540
+ ASSERT_OK(wfile->PositionedAppend(data_b, 0U));
1541
+ ASSERT_OK(wfile->Close());
1542
+ }
1543
+
1544
+ #if !defined(OS_MACOSX) && !defined(OS_WIN) && !defined(OS_SOLARIS) && \
1545
+ !defined(OS_AIX) && !defined(OS_OPENBSD) && !defined(OS_FREEBSD)
1546
+ if (soptions.use_direct_reads) {
1547
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1548
+ "NewRandomAccessFile:O_DIRECT", [&](void* arg) {
1549
+ int* val = static_cast<int*>(arg);
1550
+ *val &= ~O_DIRECT;
1551
+ });
1552
+ }
1553
+ #endif
1554
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1555
+
1556
+ const int num_reads = 2;
1557
+ // Create requests
1558
+ std::vector<std::string> scratches;
1559
+ scratches.reserve(num_reads);
1560
+ std::vector<ReadRequest> reqs(num_reads);
1561
+
1562
+ std::unique_ptr<RandomAccessFile> file;
1563
+ ASSERT_OK(env_->NewRandomAccessFile(fname, &file, soptions));
1564
+ alignment = file->GetRequiredBufferAlignment();
1565
+ ASSERT_EQ(num_reads, reqs.size());
1566
+
1567
+ std::vector<std::unique_ptr<char, Deleter>> data;
1568
+
1569
+ std::vector<size_t> offsets = {0, 2047};
1570
+ std::vector<size_t> lens = {2047, 4096 - 2047};
1571
+
1572
+ for (size_t i = 0; i < num_reads; i++) {
1573
+ // Do alignment
1574
+ reqs[i].offset = static_cast<uint64_t>(TruncateToPageBoundary(
1575
+ alignment, static_cast<size_t>(/*offset=*/offsets[i])));
1576
+ reqs[i].len =
1577
+ Roundup(static_cast<size_t>(/*offset=*/offsets[i]) + /*length=*/lens[i],
1578
+ alignment) -
1579
+ reqs[i].offset;
1580
+
1581
+ size_t new_capacity = Roundup(reqs[i].len, alignment);
1582
+ data.emplace_back(NewAligned(new_capacity, 0));
1583
+ reqs[i].scratch = data.back().get();
1584
+ }
1585
+
1586
+ // Query the data
1587
+ ASSERT_OK(file->MultiRead(reqs.data(), reqs.size()));
1588
+
1589
+ // Validate results
1590
+ for (size_t i = 0; i < num_reads; ++i) {
1591
+ ASSERT_OK(reqs[i].status);
1592
+ }
1593
+
1594
+ ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1595
+ }
1596
+ #endif // ROCKSDB_LITE
1597
+
1525
1598
  #if defined(ROCKSDB_IOURING_PRESENT)
1526
1599
  void GenerateFilesAndRequest(Env* env, const std::string& fname,
1527
1600
  std::vector<ReadRequest>* ret_reqs,
@@ -750,14 +750,17 @@ IOStatus PosixRandomAccessFile::MultiRead(FSReadRequest* reqs,
750
750
  bytes_read, read_again);
751
751
  int32_t res = cqe->res;
752
752
  if (res >= 0) {
753
- if (bytes_read == 0 && read_again) {
754
- Slice tmp_slice;
755
- req->status =
756
- Read(req->offset + req_wrap->finished_len,
757
- req->len - req_wrap->finished_len, options, &tmp_slice,
758
- req->scratch + req_wrap->finished_len, dbg);
759
- req->result =
760
- Slice(req->scratch, req_wrap->finished_len + tmp_slice.size());
753
+ if (bytes_read == 0) {
754
+ if (read_again) {
755
+ Slice tmp_slice;
756
+ req->status =
757
+ Read(req->offset + req_wrap->finished_len,
758
+ req->len - req_wrap->finished_len, options, &tmp_slice,
759
+ req->scratch + req_wrap->finished_len, dbg);
760
+ req->result =
761
+ Slice(req->scratch, req_wrap->finished_len + tmp_slice.size());
762
+ }
763
+ // else It means EOF so no need to do anything.
761
764
  } else if (bytes_read < req_wrap->iov.iov_len) {
762
765
  incomplete_rq_list.push_back(req_wrap);
763
766
  }
@@ -246,6 +246,11 @@ enum UpdateStatus { // Return status For inplace update callback
246
246
  UPDATED = 2, // No inplace update. Merged value set
247
247
  };
248
248
 
249
+ enum class PrepopulateBlobCache : uint8_t {
250
+ kDisable = 0x0, // Disable prepopulate blob cache
251
+ kFlushOnly = 0x1, // Prepopulate blobs during flush only
252
+ };
253
+
249
254
  struct AdvancedColumnFamilyOptions {
250
255
  // The maximum number of write buffers that are built up in memory.
251
256
  // The default and the minimum number is 2, so that when 1 write buffer
@@ -874,6 +879,15 @@ struct AdvancedColumnFamilyOptions {
874
879
  // Dynamically changeable through the SetOptions() API
875
880
  Temperature bottommost_temperature = Temperature::kUnknown;
876
881
 
882
+ // EXPERIMENTAL
883
+ // The feature is still in development and is incomplete.
884
+ // If this option is set, when data insert time is within this time range, it
885
+ // will be precluded from the last level.
886
+ // 0 means no key will be precluded from the last level.
887
+ //
888
+ // Default: 0 (disable the feature)
889
+ uint64_t preclude_last_level_data_seconds = 0;
890
+
877
891
  // When set, large values (blobs) are written to separate blob files, and
878
892
  // only pointers to them are stored in SST files. This can reduce write
879
893
  // amplification for large-value use cases at the cost of introducing a level
@@ -983,6 +997,20 @@ struct AdvancedColumnFamilyOptions {
983
997
  // Default: nullptr (disabled)
984
998
  std::shared_ptr<Cache> blob_cache = nullptr;
985
999
 
1000
+ // If enabled, prepopulate warm/hot blobs which are already in memory into
1001
+ // blob cache at the time of flush. On a flush, the blob that is in memory (in
1002
+ // memtables) get flushed to the device. If using Direct IO, additional IO is
1003
+ // incurred to read this blob back into memory again, which is avoided by
1004
+ // enabling this option. This further helps if the workload exhibits high
1005
+ // temporal locality, where most of the reads go to recently written data.
1006
+ // This also helps in case of the remote file system since it involves network
1007
+ // traffic and higher latencies.
1008
+ //
1009
+ // Default: disabled
1010
+ //
1011
+ // Dynamically changeable through the SetOptions() API
1012
+ PrepopulateBlobCache prepopulate_blob_cache = PrepopulateBlobCache::kDisable;
1013
+
986
1014
  // Create ColumnFamilyOptions with default values for all fields
987
1015
  AdvancedColumnFamilyOptions();
988
1016
  // Create ColumnFamilyOptions from Options
@@ -1302,6 +1302,17 @@ extern ROCKSDB_LIBRARY_API int rocksdb_options_get_blob_file_starting_level(
1302
1302
  extern ROCKSDB_LIBRARY_API void rocksdb_options_set_blob_cache(
1303
1303
  rocksdb_options_t* opt, rocksdb_cache_t* blob_cache);
1304
1304
 
1305
+ enum {
1306
+ rocksdb_prepopulate_blob_disable = 0,
1307
+ rocksdb_prepopulate_blob_flush_only = 1
1308
+ };
1309
+
1310
+ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prepopulate_blob_cache(
1311
+ rocksdb_options_t* opt, int val);
1312
+
1313
+ extern ROCKSDB_LIBRARY_API int rocksdb_options_get_prepopulate_blob_cache(
1314
+ rocksdb_options_t* opt);
1315
+
1305
1316
  /* returns a pointer to a malloc()-ed, null terminated string */
1306
1317
  extern ROCKSDB_LIBRARY_API char* rocksdb_options_statistics_get_string(
1307
1318
  rocksdb_options_t* opt);
@@ -2061,13 +2072,15 @@ extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete(
2061
2072
  extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete_with_ts(
2062
2073
  rocksdb_sstfilewriter_t* writer, const char* key, size_t keylen,
2063
2074
  const char* ts, size_t tslen, char** errptr);
2075
+ extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_delete_range(
2076
+ rocksdb_sstfilewriter_t* writer, const char* begin_key, size_t begin_keylen,
2077
+ const char* end_key, size_t end_keylen, char** errptr);
2064
2078
  extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_finish(
2065
2079
  rocksdb_sstfilewriter_t* writer, char** errptr);
2066
2080
  extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_file_size(
2067
2081
  rocksdb_sstfilewriter_t* writer, uint64_t* file_size);
2068
2082
  extern ROCKSDB_LIBRARY_API void rocksdb_sstfilewriter_destroy(
2069
2083
  rocksdb_sstfilewriter_t* writer);
2070
-
2071
2084
  extern ROCKSDB_LIBRARY_API rocksdb_ingestexternalfileoptions_t*
2072
2085
  rocksdb_ingestexternalfileoptions_create(void);
2073
2086
  extern ROCKSDB_LIBRARY_API void
@@ -571,12 +571,12 @@ enum class CacheEntryRole {
571
571
  // Filter's charge to account for
572
572
  // (new) bloom and ribbon filter construction's memory usage
573
573
  kFilterConstruction,
574
- // BlockBasedTableReader's charge to account for
575
- // its memory usage
574
+ // BlockBasedTableReader's charge to account for its memory usage
576
575
  kBlockBasedTableReader,
577
- // FileMetadata's charge to account for
578
- // its memory usage
576
+ // FileMetadata's charge to account for its memory usage
579
577
  kFileMetadata,
578
+ // Blob cache's charge to account for its memory usage
579
+ kBlobCache,
580
580
  // Default bucket, for miscellaneous cache entries. Do not use for
581
581
  // entries that could potentially add up to large usage.
582
582
  kMisc,
@@ -17,6 +17,22 @@ namespace ROCKSDB_NAMESPACE {
17
17
 
18
18
  class Slice;
19
19
 
20
+ // The general interface for comparing two Slices are defined for both of
21
+ // Comparator and some internal data structures.
22
+ class CompareInterface {
23
+ public:
24
+ virtual ~CompareInterface() {}
25
+
26
+ // Three-way comparison. Returns value:
27
+ // < 0 iff "a" < "b",
28
+ // == 0 iff "a" == "b",
29
+ // > 0 iff "a" > "b"
30
+ // Note that Compare(a, b) also compares timestamp if timestamp size is
31
+ // non-zero. For the same user key with different timestamps, larger (newer)
32
+ // timestamp comes first.
33
+ virtual int Compare(const Slice& a, const Slice& b) const = 0;
34
+ };
35
+
20
36
  // A Comparator object provides a total order across slices that are
21
37
  // used as keys in an sstable or a database. A Comparator implementation
22
38
  // must be thread-safe since rocksdb may invoke its methods concurrently
@@ -25,7 +41,7 @@ class Slice;
25
41
  // Exceptions MUST NOT propagate out of overridden functions into RocksDB,
26
42
  // because RocksDB is not exception-safe. This could cause undefined behavior
27
43
  // including data loss, unreported corruption, deadlocks, and more.
28
- class Comparator : public Customizable {
44
+ class Comparator : public Customizable, public CompareInterface {
29
45
  public:
30
46
  Comparator() : timestamp_size_(0) {}
31
47
 
@@ -47,24 +63,6 @@ class Comparator : public Customizable {
47
63
  const Comparator** comp);
48
64
  static const char* Type() { return "Comparator"; }
49
65
 
50
- // Three-way comparison. Returns value:
51
- // < 0 iff "a" < "b",
52
- // == 0 iff "a" == "b",
53
- // > 0 iff "a" > "b"
54
- // Note that Compare(a, b) also compares timestamp if timestamp size is
55
- // non-zero. For the same user key with different timestamps, larger (newer)
56
- // timestamp comes first.
57
- virtual int Compare(const Slice& a, const Slice& b) const = 0;
58
-
59
- // Compares two slices for equality. The following invariant should always
60
- // hold (and is the default implementation):
61
- // Equal(a, b) iff Compare(a, b) == 0
62
- // Overwrite only if equality comparisons can be done more efficiently than
63
- // three-way comparisons.
64
- virtual bool Equal(const Slice& a, const Slice& b) const {
65
- return Compare(a, b) == 0;
66
- }
67
-
68
66
  // The name of the comparator. Used to check for comparator
69
67
  // mismatches (i.e., a DB created with one comparator is
70
68
  // accessed using a different comparator.
@@ -77,6 +75,15 @@ class Comparator : public Customizable {
77
75
  // by any clients of this package.
78
76
  const char* Name() const override = 0;
79
77
 
78
+ // Compares two slices for equality. The following invariant should always
79
+ // hold (and is the default implementation):
80
+ // Equal(a, b) iff Compare(a, b) == 0
81
+ // Overwrite only if equality comparisons can be done more efficiently than
82
+ // three-way comparisons.
83
+ virtual bool Equal(const Slice& a, const Slice& b) const {
84
+ return Compare(a, b) == 0;
85
+ }
86
+
80
87
  // Advanced functions: these are used to reduce the space requirements
81
88
  // for internal data structures like index blocks.
82
89
 
@@ -91,10 +98,6 @@ class Comparator : public Customizable {
91
98
  // i.e., an implementation of this method that does nothing is correct.
92
99
  virtual void FindShortSuccessor(std::string* key) const = 0;
93
100
 
94
- // if it is a wrapped comparator, may return the root one.
95
- // return itself it is not wrapped.
96
- virtual const Comparator* GetRootComparator() const { return this; }
97
-
98
101
  // given two keys, determine if t is the successor of s
99
102
  // BUG: only return true if no other keys starting with `t` are ordered
100
103
  // before `t`. Otherwise, the auto_prefix_mode can omit entries within
@@ -111,6 +114,10 @@ class Comparator : public Customizable {
111
114
  // with the customized comparator.
112
115
  virtual bool CanKeysWithDifferentByteContentsBeEqual() const { return true; }
113
116
 
117
+ // if it is a wrapped comparator, may return the root one.
118
+ // return itself it is not wrapped.
119
+ virtual const Comparator* GetRootComparator() const { return this; }
120
+
114
121
  inline size_t timestamp_size() const { return timestamp_size_; }
115
122
 
116
123
  int CompareWithoutTimestamp(const Slice& a, const Slice& b) const {
@@ -410,7 +410,7 @@ class DB {
410
410
  // UNDER CONSTRUCTION -- DO NOT USE
411
411
  virtual Status PutEntity(const WriteOptions& options,
412
412
  ColumnFamilyHandle* column_family, const Slice& key,
413
- const WideColumns& columns) = 0;
413
+ const WideColumns& columns);
414
414
 
415
415
  // Remove the database entry (if any) for "key". Returns OK on
416
416
  // success, and a non-OK status on error. It is not an error if "key"
@@ -9,7 +9,6 @@
9
9
 
10
10
  #pragma once
11
11
 
12
- #include "rocksdb/customizable.h"
13
12
  #include "rocksdb/env.h"
14
13
  #include "rocksdb/statistics.h"
15
14
  #include "rocksdb/status.h"
@@ -19,7 +18,7 @@ namespace ROCKSDB_NAMESPACE {
19
18
  // Exceptions MUST NOT propagate out of overridden functions into RocksDB,
20
19
  // because RocksDB is not exception-safe. This could cause undefined behavior
21
20
  // including data loss, unreported corruption, deadlocks, and more.
22
- class RateLimiter : public Customizable {
21
+ class RateLimiter {
23
22
  public:
24
23
  enum class OpType {
25
24
  kRead,
@@ -32,20 +31,11 @@ class RateLimiter : public Customizable {
32
31
  kAllIo,
33
32
  };
34
33
 
35
- static const char* Type() { return "RateLimiter"; }
36
- static Status CreateFromString(const ConfigOptions& options,
37
- const std::string& value,
38
- std::shared_ptr<RateLimiter>* result);
39
-
40
34
  // For API compatibility, default to rate-limiting writes only.
41
- explicit RateLimiter(Mode mode = Mode::kWritesOnly);
35
+ explicit RateLimiter(Mode mode = Mode::kWritesOnly) : mode_(mode) {}
42
36
 
43
37
  virtual ~RateLimiter() {}
44
38
 
45
- // Deprecated. Will be removed in a major release. Derived classes
46
- // should implement this method.
47
- virtual const char* Name() const override { return ""; }
48
-
49
39
  // This API allows user to dynamically change rate limiter's bytes per second.
50
40
  // REQUIRED: bytes_per_second > 0
51
41
  virtual void SetBytesPerSecond(int64_t bytes_per_second) = 0;
@@ -135,7 +125,7 @@ class RateLimiter : public Customizable {
135
125
  Mode GetMode() { return mode_; }
136
126
 
137
127
  private:
138
- Mode mode_;
128
+ const Mode mode_;
139
129
  };
140
130
 
141
131
  // Create a RateLimiter object, which can be shared among RocksDB instances to
@@ -69,6 +69,7 @@ struct TablePropertiesNames {
69
69
  static const std::string kFileCreationTime;
70
70
  static const std::string kSlowCompressionEstimatedDataSize;
71
71
  static const std::string kFastCompressionEstimatedDataSize;
72
+ static const std::string kSequenceNumberTimeMapping;
72
73
  };
73
74
 
74
75
  // `TablePropertiesCollector` provides the mechanism for users to collect
@@ -220,6 +221,7 @@ struct TableProperties {
220
221
  // TODO(sagar0): Should be changed to latest_key_time ... but don't know the
221
222
  // full implications of backward compatibility. Hence retaining for now.
222
223
  uint64_t creation_time = 0;
224
+
223
225
  // Timestamp of the earliest key. 0 means unknown.
224
226
  uint64_t oldest_key_time = 0;
225
227
  // Actual SST file creation time. 0 means unknown.
@@ -284,6 +286,9 @@ struct TableProperties {
284
286
  // Compression options used to compress the SST files.
285
287
  std::string compression_options;
286
288
 
289
+ // Sequence number to time mapping, delta encoded.
290
+ std::string seqno_to_time_mapping;
291
+
287
292
  // user collected properties
288
293
  UserCollectedProperties user_collected_properties;
289
294
  UserCollectedProperties readable_properties;
@@ -25,9 +25,8 @@ struct KeyVersion {
25
25
  std::string user_key;
26
26
  std::string value;
27
27
  SequenceNumber sequence;
28
- // TODO(ajkr): we should provide a helper function that converts the int to a
29
- // string describing the type for easier debugging.
30
28
  int type;
29
+ std::string GetTypeName() const;
31
30
  };
32
31
 
33
32
  // Returns listing of all versions of keys in the provided user key range.
@@ -70,6 +70,7 @@ class LDBCommand {
70
70
  static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD;
71
71
  static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE;
72
72
  static const std::string ARG_BLOB_FILE_STARTING_LEVEL;
73
+ static const std::string ARG_PREPOPULATE_BLOB_CACHE;
73
74
  static const std::string ARG_DECODE_BLOB_INDEX;
74
75
  static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
75
76
 
@@ -12,7 +12,7 @@
12
12
  // NOTE: in 'main' development branch, this should be the *next*
13
13
  // minor or major version number planned for release.
14
14
  #define ROCKSDB_MAJOR 7
15
- #define ROCKSDB_MINOR 5
15
+ #define ROCKSDB_MINOR 6
16
16
  #define ROCKSDB_PATCH 0
17
17
 
18
18
  // Do not use these. We made the mistake of declaring macros starting with
@@ -67,10 +67,10 @@ TEST_F(StatsHistoryTest, RunStatsDumpPeriodSec) {
67
67
 
68
68
  // Wait for the first stats persist to finish, as the initial delay could be
69
69
  // different.
70
- dbfull()->TEST_WaitForStatsDumpRun(
70
+ dbfull()->TEST_WaitForPeridicWorkerRun(
71
71
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
72
72
 
73
- dbfull()->TEST_WaitForStatsDumpRun(
73
+ dbfull()->TEST_WaitForPeridicWorkerRun(
74
74
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
75
75
  ASSERT_GE(counter, 1);
76
76
 
@@ -99,17 +99,17 @@ TEST_F(StatsHistoryTest, StatsPersistScheduling) {
99
99
 
100
100
  // Wait for the first stats persist to finish, as the initial delay could be
101
101
  // different.
102
- dbfull()->TEST_WaitForStatsDumpRun(
102
+ dbfull()->TEST_WaitForPeridicWorkerRun(
103
103
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
104
104
 
105
- dbfull()->TEST_WaitForStatsDumpRun(
105
+ dbfull()->TEST_WaitForPeridicWorkerRun(
106
106
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
107
107
  ASSERT_GE(counter, 1);
108
108
 
109
109
  // Test cancel job through SetOptions
110
110
  ASSERT_OK(dbfull()->SetDBOptions({{"stats_persist_period_sec", "0"}}));
111
111
  int old_val = counter;
112
- dbfull()->TEST_WaitForStatsDumpRun(
112
+ dbfull()->TEST_WaitForPeridicWorkerRun(
113
113
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec * 2); });
114
114
  ASSERT_EQ(counter, old_val);
115
115
 
@@ -131,7 +131,7 @@ TEST_F(StatsHistoryTest, PersistentStatsFreshInstall) {
131
131
  {{"stats_persist_period_sec", std::to_string(kPeriodSec)}}));
132
132
  ASSERT_EQ(kPeriodSec, dbfull()->GetDBOptions().stats_persist_period_sec);
133
133
 
134
- dbfull()->TEST_WaitForStatsDumpRun(
134
+ dbfull()->TEST_WaitForPeridicWorkerRun(
135
135
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
136
136
  ASSERT_GE(counter, 1);
137
137
  Close();
@@ -150,11 +150,11 @@ TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) {
150
150
  ReopenWithColumnFamilies({"default", "pikachu"}, options);
151
151
 
152
152
  // make sure the first stats persist to finish
153
- dbfull()->TEST_WaitForStatsDumpRun(
153
+ dbfull()->TEST_WaitForPeridicWorkerRun(
154
154
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
155
155
 
156
156
  // Wait for stats persist to finish
157
- dbfull()->TEST_WaitForStatsDumpRun(
157
+ dbfull()->TEST_WaitForPeridicWorkerRun(
158
158
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
159
159
 
160
160
  std::unique_ptr<StatsHistoryIterator> stats_iter;
@@ -172,7 +172,7 @@ TEST_F(StatsHistoryTest, GetStatsHistoryInMemory) {
172
172
  ASSERT_GT(stats_count, 0);
173
173
  // Wait a bit and verify no more stats are found
174
174
  for (int i = 0; i < 10; ++i) {
175
- dbfull()->TEST_WaitForStatsDumpRun(
175
+ dbfull()->TEST_WaitForPeridicWorkerRun(
176
176
  [&] { mock_clock_->MockSleepForSeconds(1); });
177
177
  }
178
178
  ASSERT_OK(db_->GetStatsHistory(0, mock_clock_->NowSeconds(), &stats_iter));
@@ -227,7 +227,7 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
227
227
 
228
228
  const int kIterations = 10;
229
229
  for (int i = 0; i < kIterations; ++i) {
230
- dbfull()->TEST_WaitForStatsDumpRun(
230
+ dbfull()->TEST_WaitForPeridicWorkerRun(
231
231
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
232
232
  }
233
233
 
@@ -251,7 +251,7 @@ TEST_F(StatsHistoryTest, InMemoryStatsHistoryPurging) {
251
251
 
252
252
  // Wait for stats persist to finish
253
253
  for (int i = 0; i < kIterations; ++i) {
254
- dbfull()->TEST_WaitForStatsDumpRun(
254
+ dbfull()->TEST_WaitForPeridicWorkerRun(
255
255
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
256
256
  }
257
257
 
@@ -300,11 +300,11 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
300
300
 
301
301
  // Wait for the first stats persist to finish, as the initial delay could be
302
302
  // different.
303
- dbfull()->TEST_WaitForStatsDumpRun(
303
+ dbfull()->TEST_WaitForPeridicWorkerRun(
304
304
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
305
305
 
306
306
  // Wait for stats persist to finish
307
- dbfull()->TEST_WaitForStatsDumpRun(
307
+ dbfull()->TEST_WaitForPeridicWorkerRun(
308
308
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
309
309
 
310
310
  auto iter =
@@ -312,14 +312,14 @@ TEST_F(StatsHistoryTest, GetStatsHistoryFromDisk) {
312
312
  int key_count1 = countkeys(iter);
313
313
  delete iter;
314
314
 
315
- dbfull()->TEST_WaitForStatsDumpRun(
315
+ dbfull()->TEST_WaitForPeridicWorkerRun(
316
316
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
317
317
  iter =
318
318
  db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
319
319
  int key_count2 = countkeys(iter);
320
320
  delete iter;
321
321
 
322
- dbfull()->TEST_WaitForStatsDumpRun(
322
+ dbfull()->TEST_WaitForPeridicWorkerRun(
323
323
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
324
324
  iter =
325
325
  db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
@@ -393,32 +393,32 @@ TEST_F(StatsHistoryTest, PersitentStatsVerifyValue) {
393
393
 
394
394
  // Wait for the first stats persist to finish, as the initial delay could be
395
395
  // different.
396
- dbfull()->TEST_WaitForStatsDumpRun(
396
+ dbfull()->TEST_WaitForPeridicWorkerRun(
397
397
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
398
398
 
399
399
  // Wait for stats persist to finish
400
- dbfull()->TEST_WaitForStatsDumpRun(
400
+ dbfull()->TEST_WaitForPeridicWorkerRun(
401
401
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
402
402
  auto iter =
403
403
  db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
404
404
  countkeys(iter);
405
405
  delete iter;
406
406
 
407
- dbfull()->TEST_WaitForStatsDumpRun(
407
+ dbfull()->TEST_WaitForPeridicWorkerRun(
408
408
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
409
409
  iter =
410
410
  db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
411
411
  countkeys(iter);
412
412
  delete iter;
413
413
 
414
- dbfull()->TEST_WaitForStatsDumpRun(
414
+ dbfull()->TEST_WaitForPeridicWorkerRun(
415
415
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
416
416
  iter =
417
417
  db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
418
418
  countkeys(iter);
419
419
  delete iter;
420
420
 
421
- dbfull()->TEST_WaitForStatsDumpRun(
421
+ dbfull()->TEST_WaitForPeridicWorkerRun(
422
422
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
423
423
 
424
424
  std::map<std::string, uint64_t> stats_map_after;
@@ -482,10 +482,10 @@ TEST_F(StatsHistoryTest, PersistentStatsCreateColumnFamilies) {
482
482
  ASSERT_EQ(Get(2, "foo"), "bar");
483
483
 
484
484
  // make sure the first stats persist to finish
485
- dbfull()->TEST_WaitForStatsDumpRun(
485
+ dbfull()->TEST_WaitForPeridicWorkerRun(
486
486
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
487
487
 
488
- dbfull()->TEST_WaitForStatsDumpRun(
488
+ dbfull()->TEST_WaitForPeridicWorkerRun(
489
489
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
490
490
  auto iter =
491
491
  db_->NewIterator(ReadOptions(), dbfull()->PersistentStatsColumnFamily());
@@ -582,7 +582,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
582
582
 
583
583
  // Wait for the first stats persist to finish, as the initial delay could be
584
584
  // different.
585
- dbfull()->TEST_WaitForStatsDumpRun(
585
+ dbfull()->TEST_WaitForPeridicWorkerRun(
586
586
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec - 1); });
587
587
 
588
588
  ColumnFamilyData* cfd_default =
@@ -601,7 +601,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
601
601
  ASSERT_OK(Put(1, "Eevee", "v0"));
602
602
  ASSERT_EQ("v0", Get(1, "Eevee"));
603
603
 
604
- dbfull()->TEST_WaitForStatsDumpRun(
604
+ dbfull()->TEST_WaitForPeridicWorkerRun(
605
605
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
606
606
  // writing to all three cf, flush default cf
607
607
  // LogNumbers: default: 16, stats: 10, pikachu: 5
@@ -630,7 +630,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
630
630
  ASSERT_EQ("v2", Get("bar2"));
631
631
  ASSERT_EQ("v2", Get("foo2"));
632
632
 
633
- dbfull()->TEST_WaitForStatsDumpRun(
633
+ dbfull()->TEST_WaitForPeridicWorkerRun(
634
634
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
635
635
  // writing to default and stats cf, flushing default cf
636
636
  // LogNumbers: default: 19, stats: 19, pikachu: 19
@@ -645,7 +645,7 @@ TEST_F(StatsHistoryTest, ForceManualFlushStatsCF) {
645
645
  ASSERT_OK(Put(1, "Jolteon", "v3"));
646
646
  ASSERT_EQ("v3", Get(1, "Jolteon"));
647
647
 
648
- dbfull()->TEST_WaitForStatsDumpRun(
648
+ dbfull()->TEST_WaitForPeridicWorkerRun(
649
649
  [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
650
650
  // writing to all three cf, flushing test cf
651
651
  // LogNumbers: default: 19, stats: 19, pikachu: 22
@@ -451,6 +451,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
451
451
  {offsetof(struct MutableCFOptions, blob_file_starting_level),
452
452
  OptionType::kInt, OptionVerificationType::kNormal,
453
453
  OptionTypeFlags::kMutable}},
454
+ {"prepopulate_blob_cache",
455
+ OptionTypeInfo::Enum<PrepopulateBlobCache>(
456
+ offsetof(struct MutableCFOptions, prepopulate_blob_cache),
457
+ &prepopulate_blob_cache_string_map, OptionTypeFlags::kMutable)},
454
458
  {"sample_for_compression",
455
459
  {offsetof(struct MutableCFOptions, sample_for_compression),
456
460
  OptionType::kUInt64T, OptionVerificationType::kNormal,
@@ -547,6 +551,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
547
551
  {offsetof(struct ImmutableCFOptions, force_consistency_checks),
548
552
  OptionType::kBoolean, OptionVerificationType::kNormal,
549
553
  OptionTypeFlags::kNone}},
554
+ {"preclude_last_level_data_seconds",
555
+ {offsetof(struct ImmutableCFOptions, preclude_last_level_data_seconds),
556
+ OptionType::kUInt64T, OptionVerificationType::kNormal,
557
+ OptionTypeFlags::kNone}},
550
558
  // Need to keep this around to be able to read old OPTIONS files.
551
559
  {"max_mem_compaction_level",
552
560
  {0, OptionType::kInt, OptionVerificationType::kDeprecated,
@@ -880,6 +888,8 @@ ImmutableCFOptions::ImmutableCFOptions(const ColumnFamilyOptions& cf_options)
880
888
  num_levels(cf_options.num_levels),
881
889
  optimize_filters_for_hits(cf_options.optimize_filters_for_hits),
882
890
  force_consistency_checks(cf_options.force_consistency_checks),
891
+ preclude_last_level_data_seconds(
892
+ cf_options.preclude_last_level_data_seconds),
883
893
  memtable_insert_with_hint_prefix_extractor(
884
894
  cf_options.memtable_insert_with_hint_prefix_extractor),
885
895
  cf_paths(cf_options.cf_paths),
@@ -1091,7 +1101,10 @@ void MutableCFOptions::Dump(Logger* log) const {
1091
1101
  blob_compaction_readahead_size);
1092
1102
  ROCKS_LOG_INFO(log, " blob_file_starting_level: %d",
1093
1103
  blob_file_starting_level);
1094
-
1104
+ ROCKS_LOG_INFO(log, " prepopulate_blob_cache: %s",
1105
+ prepopulate_blob_cache == PrepopulateBlobCache::kFlushOnly
1106
+ ? "flush only"
1107
+ : "disable");
1095
1108
  ROCKS_LOG_INFO(log, " bottommost_temperature: %d",
1096
1109
  static_cast<int>(bottommost_temperature));
1097
1110
  }