@nxtedition/rocksdb 13.5.9 → 13.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/deps/rocksdb/rocksdb/BUCK +2 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
  3. package/deps/rocksdb/rocksdb/Makefile +1 -1
  4. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -5
  5. package/deps/rocksdb/rocksdb/db/c.cc +13 -0
  6. package/deps/rocksdb/rocksdb/db/c_test.c +0 -12
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -8
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +2 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +5 -4
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -10
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +11 -6
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +10 -16
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +2 -4
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -17
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +164 -0
  17. package/deps/rocksdb/rocksdb/db/corruption_test.cc +74 -3
  18. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +39 -4
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -83
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -4
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -11
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +0 -3
  23. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -9
  24. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +16 -54
  25. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +0 -6
  26. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +186 -0
  27. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +3 -40
  28. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -54
  29. package/deps/rocksdb/rocksdb/db/db_test.cc +0 -292
  30. package/deps/rocksdb/rocksdb/db/db_test2.cc +0 -1235
  31. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -0
  32. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +11 -4
  33. package/deps/rocksdb/rocksdb/db/log_reader.cc +11 -11
  34. package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
  35. package/deps/rocksdb/rocksdb/db/multi_scan.cc +70 -0
  36. package/deps/rocksdb/rocksdb/db/version_set.cc +15 -8
  37. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +4 -0
  38. package/deps/rocksdb/rocksdb/env/composite_env.cc +4 -0
  39. package/deps/rocksdb/rocksdb/env/env.cc +4 -0
  40. package/deps/rocksdb/rocksdb/env/env_encryption.cc +38 -3
  41. package/deps/rocksdb/rocksdb/env/env_test.cc +36 -1
  42. package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -4
  43. package/deps/rocksdb/rocksdb/env/io_posix.cc +16 -0
  44. package/deps/rocksdb/rocksdb/env/io_posix.h +3 -0
  45. package/deps/rocksdb/rocksdb/env/mock_env.cc +5 -0
  46. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +4 -0
  47. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -6
  48. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +5 -0
  49. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +25 -1
  50. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +10 -0
  51. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -0
  52. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +12 -0
  53. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +12 -8
  54. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +29 -28
  55. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +26 -6
  56. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +9 -0
  57. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +3 -0
  58. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +142 -0
  59. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +2 -0
  60. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +2 -2
  61. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +2 -0
  62. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  63. package/deps/rocksdb/rocksdb/options/options_helper.h +3 -0
  64. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
  65. package/deps/rocksdb/rocksdb/port/win/io_win.cc +20 -0
  66. package/deps/rocksdb/rocksdb/port/win/io_win.h +4 -0
  67. package/deps/rocksdb/rocksdb/src.mk +2 -1
  68. package/deps/rocksdb/rocksdb/table/block_based/block.cc +31 -34
  69. package/deps/rocksdb/rocksdb/table/block_based/block.h +2 -4
  70. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +43 -7
  71. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  72. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +367 -2
  73. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +69 -23
  74. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +54 -6
  75. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +27 -5
  76. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +167 -3
  77. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +6 -2
  78. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +6 -0
  79. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +12 -0
  80. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +1 -0
  81. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -3
  82. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +10 -7
  83. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +244 -0
  84. package/deps/rocksdb/rocksdb/table/external_table.cc +1 -1
  85. package/deps/rocksdb/rocksdb/table/format.cc +51 -33
  86. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +13 -8
  88. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -3
  89. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +5 -1
  90. package/deps/rocksdb/rocksdb/table/table_test.cc +629 -1
  91. package/deps/rocksdb/rocksdb/test_util/testutil.cc +0 -1
  92. package/deps/rocksdb/rocksdb/test_util/testutil.h +5 -0
  93. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +183 -94
  94. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +71 -0
  95. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +37 -22
  96. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +308 -0
  97. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +189 -0
  98. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -11
  99. package/deps/rocksdb/rocksdb/util/coding.h +4 -3
  100. package/deps/rocksdb/rocksdb/util/compression.cc +2 -0
  101. package/deps/rocksdb/rocksdb/util/compression.h +16 -6
  102. package/deps/rocksdb/rocksdb/util/compression_test.cc +1679 -15
  103. package/deps/rocksdb/rocksdb/util/stop_watch.h +17 -7
  104. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +17 -3
  105. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +10 -0
  106. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +5 -0
  107. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +2 -0
  108. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +18 -2
  109. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +22 -3
  110. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +5 -0
  111. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +22 -2
  112. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +15 -4
  113. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +61 -0
  114. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +18 -0
  115. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +3 -0
  116. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +3 -0
  117. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +9 -3
  118. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +9 -0
  119. package/deps/rocksdb/rocksdb.gyp +15 -1
  120. package/package.json +1 -1
  121. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  122. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  123. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +0 -131
  124. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +0 -90
@@ -102,6 +102,7 @@ class StopWatch {
102
102
  };
103
103
 
104
104
  // a nano second precision stopwatch
105
+ template <bool use_cpu_time = false>
105
106
  class StopWatchNano {
106
107
  public:
107
108
  explicit StopWatchNano(SystemClock* clock, bool auto_start = false)
@@ -110,27 +111,36 @@ class StopWatchNano {
110
111
  Start();
111
112
  }
112
113
  }
113
-
114
- void Start() { start_ = clock_->NowNanos(); }
115
-
114
+ void Start() {
115
+ if constexpr (use_cpu_time) {
116
+ start_ = clock_->CPUNanos();
117
+ } else {
118
+ start_ = clock_->NowNanos();
119
+ }
120
+ }
116
121
  uint64_t ElapsedNanos(bool reset = false) {
117
- auto now = clock_->NowNanos();
122
+ uint64_t now = 0;
123
+ if constexpr (use_cpu_time) {
124
+ now = clock_->CPUNanos();
125
+ } else {
126
+ now = clock_->NowNanos();
127
+ }
118
128
  auto elapsed = now - start_;
119
129
  if (reset) {
120
130
  start_ = now;
121
131
  }
122
132
  return elapsed;
123
133
  }
124
-
125
134
  uint64_t ElapsedNanosSafe(bool reset = false) {
126
135
  return (clock_ != nullptr) ? ElapsedNanos(reset) : 0U;
127
136
  }
128
-
129
137
  bool IsStarted() { return start_ != 0; }
138
+ uint64_t ElapsedMicros(bool reset = false) {
139
+ return ElapsedNanos(reset) / 1000;
140
+ }
130
141
 
131
142
  private:
132
143
  SystemClock* clock_;
133
144
  uint64_t start_;
134
145
  };
135
-
136
146
  } // namespace ROCKSDB_NAMESPACE
@@ -28,6 +28,10 @@
28
28
 
29
29
  #include <future>
30
30
 
31
+ #include "test_util/testharness.h"
32
+
33
+ namespace ROCKSDB_NAMESPACE {
34
+
31
35
  namespace Timing {
32
36
 
33
37
  using Clock = std::chrono::high_resolution_clock;
@@ -39,7 +43,9 @@ double now() {
39
43
 
40
44
  } // namespace Timing
41
45
 
42
- int main() {
46
+ class TimerQueueTest : public testing::Test {};
47
+
48
+ TEST_F(TimerQueueTest, BasicFunctionality) {
43
49
  TimerQueue q;
44
50
 
45
51
  double tnow = Timing::now();
@@ -68,6 +74,14 @@ int main() {
68
74
  // assert(ret == 1);
69
75
  // q.cancelAll();
70
76
 
71
- return 0;
77
+ // Test passes if we can create and add timers without crashing
78
+ ASSERT_TRUE(true);
79
+ }
80
+
81
+ } // namespace ROCKSDB_NAMESPACE
82
+
83
+ int main(int argc, char** argv) {
84
+ ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
85
+ ::testing::InitGoogleTest(&argc, argv);
86
+ return RUN_ALL_TESTS();
72
87
  }
73
- //////////////////////////////////////////
@@ -94,6 +94,16 @@ class RandomAccessFileMirror : public RandomAccessFile {
94
94
  // NOTE: not verified
95
95
  return a_->GetUniqueId(id, max_size);
96
96
  }
97
+
98
+ Status GetFileSize(uint64_t* file_size) override {
99
+ uint64_t asize = 0, bsize = 0;
100
+ Status as = a_->GetFileSize(&asize);
101
+ Status bs = b_->GetFileSize(&bsize);
102
+ assert(as == bs);
103
+ assert(asize == bsize);
104
+ *file_size = asize;
105
+ return as;
106
+ }
97
107
  };
98
108
 
99
109
  class WritableFileMirror : public WritableFile {
@@ -159,6 +159,11 @@ Status TestRandomAccessFile::MultiRead(ReadRequest* reqs, size_t num_reqs) {
159
159
  return target_->MultiRead(reqs, num_reqs);
160
160
  }
161
161
 
162
+ Status TestRandomAccessFile::GetFileSize(uint64_t* file_size) {
163
+ assert(target_);
164
+ return target_->GetFileSize(file_size);
165
+ }
166
+
162
167
  TestWritableFile::TestWritableFile(const std::string& fname,
163
168
  std::unique_ptr<WritableFile>&& f,
164
169
  FaultInjectionTestEnv* env)
@@ -59,6 +59,8 @@ class TestRandomAccessFile : public RandomAccessFile {
59
59
 
60
60
  Status MultiRead(ReadRequest* reqs, size_t num_reqs) override;
61
61
 
62
+ Status GetFileSize(uint64_t* file_size) override;
63
+
62
64
  private:
63
65
  std::unique_ptr<RandomAccessFile> target_;
64
66
  FaultInjectionTestEnv* env_;
@@ -399,10 +399,10 @@ IOStatus TestFSWritableFile::RangeSync(uint64_t offset, uint64_t nbytes,
399
399
  return io_s;
400
400
  }
401
401
 
402
- TestFSRandomRWFile::TestFSRandomRWFile(const std::string& /*fname*/,
402
+ TestFSRandomRWFile::TestFSRandomRWFile(const std::string& fname,
403
403
  std::unique_ptr<FSRandomRWFile>&& f,
404
404
  FaultInjectionTestFS* fs)
405
- : target_(std::move(f)), file_opened_(true), fs_(fs) {
405
+ : fname_(fname), target_(std::move(f)), file_opened_(true), fs_(fs) {
406
406
  assert(target_ != nullptr);
407
407
  }
408
408
 
@@ -433,6 +433,7 @@ IOStatus TestFSRandomRWFile::Read(uint64_t offset, size_t n,
433
433
 
434
434
  IOStatus TestFSRandomRWFile::Close(const IOOptions& options,
435
435
  IODebugContext* dbg) {
436
+ fs_->RandomRWFileClosed(fname_);
436
437
  if (!fs_->IsFilesystemActive()) {
437
438
  return fs_->GetError();
438
439
  }
@@ -562,6 +563,14 @@ size_t TestFSRandomAccessFile::GetUniqueId(char* id, size_t max_size) const {
562
563
  }
563
564
  }
564
565
 
566
+ IOStatus TestFSRandomAccessFile::GetFileSize(uint64_t* file_size) {
567
+ if (fs_->ShouldFailGetFileSize()) {
568
+ return IOStatus::IOError("GetFileSize failed");
569
+ } else {
570
+ return target_->GetFileSize(file_size);
571
+ }
572
+ }
573
+
565
574
  namespace {
566
575
  // Modifies `result` to start at the beginning of `scratch` if not already,
567
576
  // copying data there if needed.
@@ -1265,6 +1274,13 @@ IOStatus FaultInjectionTestFS::AbortIO(std::vector<void*>& io_handles) {
1265
1274
  return target()->AbortIO(io_handles);
1266
1275
  }
1267
1276
 
1277
+ void FaultInjectionTestFS::RandomRWFileClosed(const std::string& fname) {
1278
+ MutexLock l(&mutex_);
1279
+ if (open_managed_files_.find(fname) != open_managed_files_.end()) {
1280
+ open_managed_files_.erase(fname);
1281
+ }
1282
+ }
1283
+
1268
1284
  void FaultInjectionTestFS::WritableFileClosed(const FSFileState& state) {
1269
1285
  MutexLock l(&mutex_);
1270
1286
  if (open_managed_files_.find(state.filename_) != open_managed_files_.end()) {
@@ -106,8 +106,8 @@ class TestFSWritableFile : public FSWritableFile {
106
106
  const bool unsync_data_loss_;
107
107
  };
108
108
 
109
- // A wrapper around WritableFileWriter* file
110
- // is written to or sync'ed.
109
+ // A wrapper around FSRandomRWFile* file
110
+ // is read from/write to or sync'ed.
111
111
  class TestFSRandomRWFile : public FSRandomRWFile {
112
112
  public:
113
113
  explicit TestFSRandomRWFile(const std::string& fname,
@@ -128,6 +128,9 @@ class TestFSRandomRWFile : public FSRandomRWFile {
128
128
  bool use_direct_io() const override { return target_->use_direct_io(); }
129
129
 
130
130
  private:
131
+ // keep a copy of file name, so we can untrack it in File system, when it is
132
+ // closed
133
+ std::string fname_;
131
134
  std::unique_ptr<FSRandomRWFile> target_;
132
135
  bool file_opened_;
133
136
  FaultInjectionTestFS* fs_;
@@ -155,6 +158,8 @@ class TestFSRandomAccessFile : public FSRandomAccessFile {
155
158
 
156
159
  size_t GetUniqueId(char* id, size_t max_size) const override;
157
160
 
161
+ IOStatus GetFileSize(uint64_t* file_size) override;
162
+
158
163
  private:
159
164
  std::unique_ptr<FSRandomAccessFile> target_;
160
165
  FaultInjectionTestFS* fs_;
@@ -218,7 +223,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
218
223
  DeleteThreadLocalErrorContext),
219
224
  ingest_data_corruption_before_write_(false),
220
225
  checksum_handoff_func_type_(kCRC32c),
221
- fail_get_file_unique_id_(false) {}
226
+ fail_get_file_unique_id_(false),
227
+ fail_get_file_size_(false) {}
222
228
  virtual ~FaultInjectionTestFS() override { fs_error_.PermitUncheckedError(); }
223
229
 
224
230
  static const char* kClassName() { return "FaultInjectionTestFS"; }
@@ -338,6 +344,8 @@ class FaultInjectionTestFS : public FileSystemWrapper {
338
344
 
339
345
  void WritableFileAppended(const FSFileState& state);
340
346
 
347
+ void RandomRWFileClosed(const std::string& fname);
348
+
341
349
  IOStatus DropUnsyncedFileData();
342
350
 
343
351
  IOStatus DropRandomUnsyncedFileData(Random* rnd);
@@ -477,6 +485,16 @@ class FaultInjectionTestFS : public FileSystemWrapper {
477
485
  return fail_get_file_unique_id_;
478
486
  }
479
487
 
488
+ void SetFailGetFileSize(bool flag) {
489
+ MutexLock l(&mutex_);
490
+ fail_get_file_size_ = flag;
491
+ }
492
+
493
+ bool ShouldFailGetFileSize() {
494
+ MutexLock l(&mutex_);
495
+ return fail_get_file_size_;
496
+ }
497
+
480
498
  // Specify what the operation, so we can inject the right type of error
481
499
  enum ErrorOperation : char {
482
500
  kRead = 0,
@@ -636,6 +654,7 @@ class FaultInjectionTestFS : public FileSystemWrapper {
636
654
  bool ingest_data_corruption_before_write_;
637
655
  ChecksumType checksum_handoff_func_type_;
638
656
  bool fail_get_file_unique_id_;
657
+ bool fail_get_file_size_;
639
658
 
640
659
  // Inject an error. For a READ operation, a status of IOError(), a
641
660
  // corruption in the contents of scratch, or truncation of slice
@@ -359,6 +359,11 @@ Status PointLockManager::AcquireWithTimeout(
359
359
 
360
360
  stripe->stripe_mutex->UnLock();
361
361
 
362
+ // On timeout, persist the lock information so we can debug the contention
363
+ if (result.IsTimedOut()) {
364
+ txn->SetWaitingTxn(wait_ids, column_family_id, &key, true);
365
+ }
366
+
362
367
  return result;
363
368
  }
364
369
 
@@ -898,10 +898,30 @@ Status WriteCommittedTxn::CommitInternal() {
898
898
  if (!needs_ts) {
899
899
  if (commit_bypass_memtable_threshold_ &&
900
900
  wb_count >= commit_bypass_memtable_threshold_) {
901
- bypass_memtable = true;
901
+ if (wbwi->GetWBWIOpCount() != wb_count) {
902
+ ROCKS_LOG_WARN(
903
+ db_impl_->immutable_db_options().info_log,
904
+ "Transaction %s qualifies for commit optimization due to update "
905
+ "count. However, it will commit normally due to wbwi and wb record "
906
+ "count mismatch. Some updates were added directly to the "
907
+ "transaction's underlying write batch.",
908
+ GetName().c_str());
909
+ } else {
910
+ bypass_memtable = true;
911
+ }
902
912
  } else if (commit_bypass_memtable_byte_threshold_ &&
903
913
  wb->GetDataSize() >= commit_bypass_memtable_byte_threshold_) {
904
- bypass_memtable = true;
914
+ if (wbwi->GetWBWIOpCount() != wb_count) {
915
+ ROCKS_LOG_WARN(
916
+ db_impl_->immutable_db_options().info_log,
917
+ "Transaction %s qualifies for commit optimization due to write "
918
+ "batch size. However, it will commit normally due to wbwi and wb "
919
+ "record count mismatch. Some updates were added directly to the "
920
+ "transaction's underlying write batch.",
921
+ GetName().c_str());
922
+ } else {
923
+ bypass_memtable = true;
924
+ }
905
925
  }
906
926
  }
907
927
  if (!bypass_memtable) {
@@ -71,18 +71,26 @@ class PessimisticTransaction : public TransactionBaseImpl {
71
71
  std::string* key) const override {
72
72
  std::lock_guard<std::mutex> lock(wait_mutex_);
73
73
  std::vector<TransactionID> ids(waiting_txn_ids_.size());
74
- if (key) *key = waiting_key_ ? *waiting_key_ : "";
74
+ if (timed_out_key_.has_value()) {
75
+ if (key) *key = timed_out_key_.value();
76
+ } else {
77
+ if (key) *key = waiting_key_ ? *waiting_key_ : "";
78
+ }
75
79
  if (column_family_id) *column_family_id = waiting_cf_id_;
76
80
  std::copy(waiting_txn_ids_.begin(), waiting_txn_ids_.end(), ids.begin());
77
81
  return ids;
78
82
  }
79
83
 
80
84
  void SetWaitingTxn(autovector<TransactionID> ids, uint32_t column_family_id,
81
- const std::string* key) {
85
+ const std::string* key, bool is_timed_out = false) {
82
86
  std::lock_guard<std::mutex> lock(wait_mutex_);
83
87
  waiting_txn_ids_ = ids;
84
88
  waiting_cf_id_ = column_family_id;
85
- waiting_key_ = key;
89
+ if (is_timed_out) {
90
+ timed_out_key_ = key ? *key : "";
91
+ } else {
92
+ waiting_key_ = key;
93
+ }
86
94
  }
87
95
 
88
96
  void ClearWaitingTxn() {
@@ -182,7 +190,7 @@ class PessimisticTransaction : public TransactionBaseImpl {
182
190
 
183
191
  // IDs for the transactions that are blocking the current transaction.
184
192
  //
185
- // empty if current transaction is not waiting.
193
+ // empty if current transaction is not waiting or has timed out
186
194
  autovector<TransactionID> waiting_txn_ids_;
187
195
 
188
196
  // The following two represents the (cf, key) that a transaction is waiting
@@ -196,6 +204,9 @@ class PessimisticTransaction : public TransactionBaseImpl {
196
204
  uint32_t waiting_cf_id_;
197
205
  const std::string* waiting_key_;
198
206
 
207
+ // Waiting key with lifetime of the txn so it can be accessed after timeouts
208
+ std::optional<std::string> timed_out_key_;
209
+
199
210
  // Mutex protecting waiting_txn_ids_, waiting_cf_id_ and waiting_key_.
200
211
  mutable std::mutex wait_mutex_;
201
212
 
@@ -561,6 +561,16 @@ TEST_P(TransactionTest, WaitingTxn) {
561
561
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
562
562
  ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
563
563
 
564
+ // We expect GetWaitingTxns still returns the waiting values as it would
565
+ // normally before timeout
566
+ std::string key;
567
+ uint32_t cf_id;
568
+ std::vector<TransactionID> wait = txn2->GetWaitingTxns(&cf_id, &key);
569
+ ASSERT_EQ(key, "foo");
570
+ ASSERT_EQ(wait.size(), 1);
571
+ ASSERT_EQ(wait[0], id1);
572
+ ASSERT_EQ(cf_id, 0U);
573
+
564
574
  delete cfa;
565
575
  delete txn1;
566
576
  delete txn2;
@@ -9929,6 +9939,57 @@ TEST_P(CommitBypassMemtableTest,
9929
9939
 
9930
9940
  delete txn_cf;
9931
9941
  }
9942
+
9943
+ TEST_P(CommitBypassMemtableTest, WBWIOpCountMismatchWBCount) {
9944
+ // Tests that large txn optimization checks op count in WBWI vs WB. When an
9945
+ // update is written directly to a transaction's underlying write batch, the
9946
+ // optimization should not apply.
9947
+ SetUpTransactionDB();
9948
+ bool commit_bypass_memtable = false;
9949
+ SyncPoint::GetInstance()->SetCallBack(
9950
+ "WriteCommittedTxn::CommitInternal:bypass_memtable",
9951
+ [&](void* arg) { commit_bypass_memtable = *(static_cast<bool*>(arg)); });
9952
+ SyncPoint::GetInstance()->EnableProcessing();
9953
+
9954
+ Random rnd(301);
9955
+ {
9956
+ WriteOptions wopts;
9957
+ TransactionOptions txn_opts;
9958
+ txn_opts.large_txn_commit_optimize_byte_threshold = 100;
9959
+ auto txn = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
9960
+ ASSERT_OK(txn->SetName("xid0"));
9961
+ ASSERT_OK(txn->Put("k1", rnd.RandomString(1000)));
9962
+ // This update is written directly to the underlying write batch, so the
9963
+ // optimization should not apply.
9964
+ ASSERT_OK(txn->GetWriteBatch()->GetWriteBatch()->Put("meta", "1"));
9965
+ ASSERT_OK(txn->Prepare());
9966
+ ASSERT_OK(txn->Commit());
9967
+ ASSERT_FALSE(commit_bypass_memtable);
9968
+
9969
+ ASSERT_EQ(Get("meta"), "1");
9970
+ delete txn;
9971
+ }
9972
+
9973
+ {
9974
+ WriteOptions wopts;
9975
+ TransactionOptions txn_opts;
9976
+ txn_opts.large_txn_commit_optimize_threshold = 10;
9977
+ auto txn = txn_db->BeginTransaction(wopts, txn_opts, nullptr);
9978
+ ASSERT_OK(txn->SetName("xid0"));
9979
+ for (int i = 0; i < 10; ++i) {
9980
+ ASSERT_OK(txn->Put(Key(i), rnd.RandomString(10)));
9981
+ }
9982
+ // This update is written directly to the underlying write batch, so the
9983
+ // optimization should not apply.
9984
+ ASSERT_OK(txn->GetWriteBatch()->GetWriteBatch()->Put("meta", "2"));
9985
+ ASSERT_OK(txn->Prepare());
9986
+ ASSERT_OK(txn->Commit());
9987
+ ASSERT_FALSE(commit_bypass_memtable);
9988
+
9989
+ ASSERT_EQ(Get("meta"), "2");
9990
+ delete txn;
9991
+ }
9992
+ }
9932
9993
  } // namespace ROCKSDB_NAMESPACE
9933
9994
 
9934
9995
  int main(int argc, char** argv) {
@@ -635,4 +635,22 @@ void DBWithTTLImpl::SetTtl(ColumnFamilyHandle* h, int32_t ttl) {
635
635
  filter->SetTtl(ttl);
636
636
  }
637
637
 
638
+ Status DBWithTTLImpl::GetTtl(ColumnFamilyHandle* h, int32_t* ttl) {
639
+ if (h == nullptr || ttl == nullptr) {
640
+ return Status::InvalidArgument(
641
+ "column family handle or ttl cannot be null");
642
+ }
643
+ std::shared_ptr<TtlCompactionFilterFactory> filter;
644
+ Options opts;
645
+ opts = GetOptions(h);
646
+ filter = std::static_pointer_cast<TtlCompactionFilterFactory>(
647
+ opts.compaction_filter_factory);
648
+ if (!filter) {
649
+ return Status::InvalidArgument(
650
+ "TTLCompactionFilterFactory is not set for TTLDB");
651
+ }
652
+ *ttl = filter->GetTtl();
653
+ return Status::OK();
654
+ }
655
+
638
656
  } // namespace ROCKSDB_NAMESPACE
@@ -100,6 +100,8 @@ class DBWithTTLImpl : public DBWithTTL {
100
100
 
101
101
  void SetTtl(ColumnFamilyHandle* h, int32_t ttl) override;
102
102
 
103
+ Status GetTtl(ColumnFamilyHandle* h, int32_t* ttl) override;
104
+
103
105
  private:
104
106
  // remember whether the Close completes or not
105
107
  bool closed_;
@@ -184,6 +186,7 @@ class TtlCompactionFilterFactory : public CompactionFilterFactory {
184
186
  std::unique_ptr<CompactionFilter> CreateCompactionFilter(
185
187
  const CompactionFilter::Context& context) override;
186
188
  void SetTtl(int32_t ttl) { ttl_ = ttl; }
189
+ int32_t GetTtl() { return ttl_; }
187
190
 
188
191
  const char* Name() const override { return kClassName(); }
189
192
  static const char* kClassName() { return "TtlCompactionFilterFactory"; }
@@ -720,6 +720,9 @@ TEST_F(TtlTest, ChangeTtlOnOpenDb) {
720
720
 
721
721
  OpenTtl(1); // T=0:Open the db with ttl = 2
722
722
  SetTtl(3);
723
+ int32_t ttl = 0;
724
+ ASSERT_OK(db_ttl_->GetTtl(db_ttl_->DefaultColumnFamily(), &ttl));
725
+ ASSERT_EQ(ttl, 3);
723
726
  PutValues(0, kSampleSize_); // T=0:Insert Set1. Delete at t=2
724
727
  SleepCompactCheck(2, 0, kSampleSize_, true); // T=2:Set1 should be there
725
728
  CloseTtl();
@@ -32,7 +32,8 @@ struct WriteBatchWithIndex::Rep {
32
32
  skip_list(comparator, &arena),
33
33
  last_sub_batch_offset(0),
34
34
  sub_batch_cnt(1),
35
- overwrite_key(_overwrite_key) {}
35
+ overwrite_key(_overwrite_key),
36
+ op_count(0) {}
36
37
  ReadableWriteBatch write_batch;
37
38
  WriteBatchEntryComparator comparator;
38
39
  Arena arena;
@@ -48,7 +49,8 @@ struct WriteBatchWithIndex::Rep {
48
49
  // Tracks ids of CFs that have updates in this WBWI, number of updates and
49
50
  // number of overwritten single deletions per cf. Useful for WBWIMemTable
50
51
  // when this WBWI is ingested into a DB.
51
- std::unordered_map<uint32_t, CFStat> cf_id_to_stat;
52
+ std::unordered_map<uint32_t, WriteBatchWithIndex::CFStat> cf_id_to_stat;
53
+ size_t op_count;
52
54
 
53
55
  // In overwrite mode, find the existing entry for the same key and update it
54
56
  // to point to the current entry if this is not a Merge operation.
@@ -154,6 +156,7 @@ bool WriteBatchWithIndex::Rep::UpdateExistingEntryWithCfId(
154
156
  void WriteBatchWithIndex::Rep::AddOrUpdateIndexWithCfId(
155
157
  uint32_t cf_id, const Slice& key, WriteType type, size_t last_entry_offset,
156
158
  const Comparator* cf_cmp) {
159
+ op_count++;
157
160
  uint32_t update_count = 0;
158
161
  if (!UpdateExistingEntryWithCfId(cf_id, key, type, last_entry_offset,
159
162
  &update_count)) {
@@ -201,7 +204,6 @@ void WriteBatchWithIndex::Rep::AddNewEntry(uint32_t column_family_id,
201
204
 
202
205
  void WriteBatchWithIndex::Rep::Clear() {
203
206
  write_batch.Clear();
204
- cf_id_to_stat.clear();
205
207
  ClearIndex();
206
208
  }
207
209
 
@@ -212,6 +214,8 @@ void WriteBatchWithIndex::Rep::ClearIndex() {
212
214
  new (&skip_list) WriteBatchEntrySkipList(comparator, &arena);
213
215
  last_sub_batch_offset = 0;
214
216
  sub_batch_cnt = 1;
217
+ cf_id_to_stat.clear();
218
+ op_count = 0;
215
219
  }
216
220
 
217
221
  Status WriteBatchWithIndex::Rep::ReBuildIndex() {
@@ -1173,5 +1177,7 @@ WriteBatchWithIndex::GetCFStats() const {
1173
1177
  return rep->cf_id_to_stat;
1174
1178
  }
1175
1179
 
1180
+ size_t WriteBatchWithIndex::GetWBWIOpCount() const { return rep->op_count; }
1181
+
1176
1182
  bool WriteBatchWithIndex::GetOverwriteKey() const { return rep->overwrite_key; }
1177
1183
  } // namespace ROCKSDB_NAMESPACE
@@ -342,6 +342,10 @@ void AssertIterEqual(WBWIIteratorImpl* wbwii,
342
342
  }
343
343
  ASSERT_FALSE(wbwii->Valid());
344
344
  }
345
+
346
+ void AssertWBWICountEQWBCount(WriteBatchWithIndex& wbwi) {
347
+ ASSERT_EQ(wbwi.GetWBWIOpCount(), wbwi.GetWriteBatch()->Count());
348
+ }
345
349
  } // namespace
346
350
 
347
351
  class WBWIBaseTest : public testing::Test {
@@ -356,6 +360,8 @@ class WBWIBaseTest : public testing::Test {
356
360
  }
357
361
 
358
362
  virtual ~WBWIBaseTest() {
363
+ AssertWBWICountEQWBCount(*batch_);
364
+
359
365
  if (db_ != nullptr) {
360
366
  ReleaseSnapshot();
361
367
  delete db_;
@@ -715,6 +721,7 @@ TEST_P(WriteBatchWithIndexTest, TestValueAsSecondaryIndex) {
715
721
  batch_.reset(new WriteBatchWithIndex(nullptr, 20, GetParam()));
716
722
 
717
723
  TestValueAsSecondaryIndexHelper(entries_list, batch_.get(), GetParam());
724
+ AssertWBWICountEQWBCount(*batch_);
718
725
 
719
726
  // Clear batch and re-run test with new values
720
727
  batch_->Clear();
@@ -729,6 +736,7 @@ TEST_P(WriteBatchWithIndexTest, TestValueAsSecondaryIndex) {
729
736
  entries_list = std::vector<Entry>(new_entries, new_entries + 8);
730
737
 
731
738
  TestValueAsSecondaryIndexHelper(entries_list, batch_.get(), GetParam());
739
+ AssertWBWICountEQWBCount(*batch_);
732
740
  }
733
741
 
734
742
  TEST_P(WriteBatchWithIndexTest, WBWIIteratorImpl) {
@@ -3816,6 +3824,7 @@ TEST_F(WBWIMemTableTest, ReadFromWBWIMemtable) {
3816
3824
  // See comment for WBWIMemTable for sequence number assignment method.
3817
3825
  expected_seqno[idx]++;
3818
3826
  }
3827
+ AssertWBWICountEQWBCount(*wbwi);
3819
3828
  // Get a non-existing key
3820
3829
  found_final_value = false;
3821
3830
  ASSERT_EQ("NOT_FOUND", Get("foo", wbwi_mem, visible_seq, &found_final_value));
@@ -231,6 +231,7 @@
231
231
  "rocksdb/db/memtable_list.cc",
232
232
  "rocksdb/db/merge_helper.cc",
233
233
  "rocksdb/db/merge_operator.cc",
234
+ "rocksdb/db/multi_scan.cc",
234
235
  "rocksdb/db/output_validator.cc",
235
236
  "rocksdb/db/periodic_task_scheduler.cc",
236
237
  "rocksdb/db/range_del_aggregator.cc",
@@ -384,7 +385,7 @@
384
385
  "rocksdb/trace_replay/trace_record.cc",
385
386
  "rocksdb/trace_replay/trace_replay.cc",
386
387
  "rocksdb/util/async_file_reader.cc",
387
- "rocksdb/util/auto_skip_compressor.cc",
388
+ "rocksdb/util/auto_tune_compressor.cc",
388
389
  "rocksdb/util/cleanable.cc",
389
390
  "rocksdb/util/coding.cc",
390
391
  "rocksdb/util/compaction_job_stats_impl.cc",
@@ -483,6 +484,19 @@
483
484
  "rocksdb/utilities/wal_filter.cc",
484
485
  "rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc",
485
486
  "rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc",
487
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc",
488
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/keyrange.cc",
489
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/lock_request.cc",
490
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc",
491
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/manager.cc",
492
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/range_buffer.cc",
493
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/treenode.cc",
494
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/txnid_set.cc",
495
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/wfg.cc",
496
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc",
497
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc",
498
+ "rocksdb/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc",
499
+
486
500
  "build_version.cc",
487
501
  ]
488
502
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@nxtedition/rocksdb",
3
- "version": "13.5.9",
3
+ "version": "13.5.10",
4
4
  "description": "A low-level Node.js RocksDB binding",
5
5
  "license": "MIT",
6
6
  "main": "index.js",