@nxtedition/rocksdb 7.0.4 → 7.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/binding.cc +320 -324
  2. package/chained-batch.js +6 -1
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +8 -3
  4. package/deps/rocksdb/rocksdb/Makefile +10 -4
  5. package/deps/rocksdb/rocksdb/TARGETS +6 -4
  6. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +9 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +14 -0
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +8 -8
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +272 -174
  10. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +201 -57
  11. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +19 -19
  12. package/deps/rocksdb/rocksdb/cache/lru_cache.h +2 -1
  13. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +170 -0
  14. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +95 -0
  15. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +298 -0
  16. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +172 -0
  17. package/deps/rocksdb/rocksdb/db/column_family.cc +8 -3
  18. package/deps/rocksdb/rocksdb/db/column_family.h +6 -3
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +10 -0
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +6 -6
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +22 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +38 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -5
  24. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +4 -7
  25. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -71
  26. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +70 -1
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +13 -12
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +36 -0
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -4
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +139 -91
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +48 -14
  33. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +90 -55
  34. package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +9 -4
  35. package/deps/rocksdb/rocksdb/db/db_test.cc +3 -1
  36. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -7
  37. package/deps/rocksdb/rocksdb/db/db_write_test.cc +35 -0
  38. package/deps/rocksdb/rocksdb/db/dbformat.cc +3 -1
  39. package/deps/rocksdb/rocksdb/db/dbformat.h +5 -3
  40. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +1 -1
  41. package/deps/rocksdb/rocksdb/db/memtable.cc +1 -0
  42. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
  43. package/deps/rocksdb/rocksdb/db/repair.cc +1 -1
  44. package/deps/rocksdb/rocksdb/db/version_builder.cc +43 -1
  45. package/deps/rocksdb/rocksdb/db/version_edit.cc +13 -5
  46. package/deps/rocksdb/rocksdb/db/version_edit.h +22 -1
  47. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +4 -5
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +109 -41
  49. package/deps/rocksdb/rocksdb/db/version_set.h +36 -3
  50. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -4
  51. package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -10
  52. package/deps/rocksdb/rocksdb/db/version_util.h +1 -1
  53. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +1 -1
  54. package/deps/rocksdb/rocksdb/db/write_batch.cc +34 -10
  55. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +2 -0
  56. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -0
  57. package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +2 -0
  58. package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -1
  60. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +7 -5
  61. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +5 -10
  62. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -7
  63. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +2 -0
  64. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +24 -3
  65. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
  66. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +10 -0
  67. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +5 -0
  68. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +4 -4
  69. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +9 -5
  70. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +5 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +1 -0
  72. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +1 -1
  73. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
  74. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -3
  75. package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +8 -6
  76. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +3 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -2
  78. package/deps/rocksdb/rocksdb/options/options_test.cc +1 -11
  79. package/deps/rocksdb/rocksdb/port/port_posix.h +7 -0
  80. package/deps/rocksdb/rocksdb/port/win/port_win.h +11 -3
  81. package/deps/rocksdb/rocksdb/src.mk +6 -2
  82. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -33
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +3 -3
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -118
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +6 -8
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +10 -13
  87. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +4 -9
  88. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +0 -1
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +10 -28
  90. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -3
  91. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -91
  92. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -30
  93. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -27
  94. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +11 -13
  95. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -40
  96. package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +0 -1
  97. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +22 -43
  98. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +11 -22
  99. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +24 -25
  100. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +0 -1
  101. package/deps/rocksdb/rocksdb/table/get_context.h +0 -1
  102. package/deps/rocksdb/rocksdb/table/table_test.cc +3 -18
  103. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +3 -16
  104. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +3 -3
  105. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -1
  106. package/deps/rocksdb/rocksdb/util/bloom_test.cc +0 -201
  107. package/deps/rocksdb/rocksdb/util/distributed_mutex.h +48 -0
  108. package/deps/rocksdb/rocksdb/util/filter_bench.cc +5 -11
  109. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3 -0
  110. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +7 -21
  111. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  112. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +45 -0
  113. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +21 -14
  114. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +10 -1
  115. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +3 -1
  116. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +9 -0
  117. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +3 -2
  118. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -1
  119. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +5 -4
  120. package/deps/rocksdb/rocksdb.gyp +1 -1
  121. package/index.js +36 -14
  122. package/package-lock.json +2 -2
  123. package/package.json +1 -1
  124. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  125. package/prebuilds/linux-x64/node.napi.node +0 -0
  126. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +0 -358
  127. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +0 -127
  128. package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +0 -219
@@ -190,6 +190,8 @@ class BatchedOpsStressTest : public StressTest {
190
190
  std::vector<Status> statuses(num_prefixes);
191
191
  ReadOptions readoptionscopy = readoptions;
192
192
  readoptionscopy.snapshot = db_->GetSnapshot();
193
+ readoptionscopy.rate_limiter_priority =
194
+ FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
193
195
  std::vector<std::string> key_str;
194
196
  key_str.reserve(num_prefixes);
195
197
  key_slices.reserve(num_prefixes);
@@ -214,12 +214,15 @@ class CfConsistencyStressTest : public StressTest {
214
214
  std::vector<PinnableSlice> values(num_keys);
215
215
  std::vector<Status> statuses(num_keys);
216
216
  ColumnFamilyHandle* cfh = column_families_[rand_column_families[0]];
217
+ ReadOptions readoptionscopy = read_opts;
218
+ readoptionscopy.rate_limiter_priority =
219
+ FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
217
220
 
218
221
  for (size_t i = 0; i < num_keys; ++i) {
219
222
  key_str.emplace_back(Key(rand_keys[i]));
220
223
  keys.emplace_back(key_str.back());
221
224
  }
222
- db_->MultiGet(read_opts, cfh, num_keys, keys.data(), values.data(),
225
+ db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(),
223
226
  statuses.data());
224
227
  for (auto s : statuses) {
225
228
  if (s.ok()) {
@@ -152,12 +152,12 @@ DECLARE_double(experimental_mempurge_threshold);
152
152
  DECLARE_bool(enable_write_thread_adaptive_yield);
153
153
  DECLARE_int32(reopen);
154
154
  DECLARE_double(bloom_bits);
155
- DECLARE_bool(use_block_based_filter);
156
155
  DECLARE_int32(ribbon_starting_level);
157
156
  DECLARE_bool(partition_filters);
158
157
  DECLARE_bool(optimize_filters_for_memory);
159
158
  DECLARE_bool(detect_filter_construct_corruption);
160
159
  DECLARE_int32(index_type);
160
+ DECLARE_int32(data_block_index_type);
161
161
  DECLARE_string(db);
162
162
  DECLARE_string(secondaries_base);
163
163
  DECLARE_bool(test_secondary);
@@ -469,10 +469,6 @@ DEFINE_double(bloom_bits, 10,
469
469
  "Bloom filter bits per key. "
470
470
  "Negative means use default settings.");
471
471
 
472
- DEFINE_bool(use_block_based_filter, false,
473
- "use block based filter"
474
- "instead of full filter for block based table");
475
-
476
472
  DEFINE_int32(
477
473
  ribbon_starting_level, 999,
478
474
  "Use Bloom filter on levels below specified and Ribbon beginning on level "
@@ -497,9 +493,15 @@ DEFINE_bool(
497
493
  DEFINE_int32(
498
494
  index_type,
499
495
  static_cast<int32_t>(
500
- ROCKSDB_NAMESPACE::BlockBasedTableOptions::kBinarySearch),
496
+ ROCKSDB_NAMESPACE::BlockBasedTableOptions().index_type),
501
497
  "Type of block-based table index (see `enum IndexType` in table.h)");
502
498
 
499
+ DEFINE_int32(
500
+ data_block_index_type,
501
+ static_cast<int32_t>(
502
+ ROCKSDB_NAMESPACE::BlockBasedTableOptions().data_block_index_type),
503
+ "Index type for data blocks (see `enum DataBlockIndexType` in table.h)");
504
+
503
505
  DEFINE_string(db, "", "Use the db with the following name.");
504
506
 
505
507
  DEFINE_string(secondaries_base, "",
@@ -36,16 +36,7 @@ std::shared_ptr<const FilterPolicy> CreateFilterPolicy() {
36
36
  return BlockBasedTableOptions().filter_policy;
37
37
  }
38
38
  const FilterPolicy* new_policy;
39
- if (FLAGS_use_block_based_filter) {
40
- if (FLAGS_ribbon_starting_level < 999) {
41
- fprintf(
42
- stderr,
43
- "Cannot combine use_block_based_filter and ribbon_starting_level\n");
44
- exit(1);
45
- } else {
46
- new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, true);
47
- }
48
- } else if (FLAGS_ribbon_starting_level >= 999) {
39
+ if (FLAGS_ribbon_starting_level >= 999) {
49
40
  // Use Bloom API
50
41
  new_policy = NewBloomFilterPolicy(FLAGS_bloom_bits, false);
51
42
  } else {
@@ -626,6 +617,7 @@ void StressTest::OperateDb(ThreadState* thread) {
626
617
  write_opts.sync = true;
627
618
  }
628
619
  write_opts.disableWAL = FLAGS_disable_wal;
620
+ write_opts.protection_bytes_per_key = FLAGS_batch_protection_bytes_per_key;
629
621
  const int prefix_bound = static_cast<int>(FLAGS_readpercent) +
630
622
  static_cast<int>(FLAGS_prefixpercent);
631
623
  const int write_bound = prefix_bound + static_cast<int>(FLAGS_writepercent);
@@ -2783,6 +2775,9 @@ void InitializeOptionsFromFlags(
2783
2775
  FLAGS_detect_filter_construct_corruption;
2784
2776
  block_based_options.index_type =
2785
2777
  static_cast<BlockBasedTableOptions::IndexType>(FLAGS_index_type);
2778
+ block_based_options.data_block_index_type =
2779
+ static_cast<BlockBasedTableOptions::DataBlockIndexType>(
2780
+ FLAGS_data_block_index_type);
2786
2781
  block_based_options.prepopulate_block_cache =
2787
2782
  static_cast<BlockBasedTableOptions::PrepopulateBlockCache>(
2788
2783
  FLAGS_prepopulate_block_cache);
@@ -265,13 +265,6 @@ int db_stress_tool(int argc, char** argv) {
265
265
  "test_batches_snapshots must all be 0 when using compaction filter\n");
266
266
  exit(1);
267
267
  }
268
- if (FLAGS_batch_protection_bytes_per_key > 0 &&
269
- !FLAGS_test_batches_snapshots) {
270
- fprintf(stderr,
271
- "Error: test_batches_snapshots must be enabled when "
272
- "batch_protection_bytes_per_key > 0\n");
273
- exit(1);
274
- }
275
268
  if (FLAGS_test_multi_ops_txns) {
276
269
  CheckAndSetOptionsForMultiOpsTxnStressTest();
277
270
  }
@@ -391,6 +391,8 @@ class NonBatchedOpsStressTest : public StressTest {
391
391
  if (do_consistency_check) {
392
392
  readoptionscopy.snapshot = db_->GetSnapshot();
393
393
  }
394
+ readoptionscopy.rate_limiter_priority =
395
+ FLAGS_rate_limit_user_ops ? Env::IO_USER : Env::IO_TOTAL;
394
396
 
395
397
  // To appease clang analyzer
396
398
  const bool use_txn = FLAGS_use_txn;
@@ -270,9 +270,6 @@ bool TryMerge(FSReadRequest* dest, const FSReadRequest& src) {
270
270
  IOStatus RandomAccessFileReader::MultiRead(
271
271
  const IOOptions& opts, FSReadRequest* read_reqs, size_t num_reqs,
272
272
  AlignedBuf* aligned_buf, Env::IOPriority rate_limiter_priority) const {
273
- if (rate_limiter_priority != Env::IO_TOTAL) {
274
- return IOStatus::NotSupported("Unable to rate limit MultiRead()");
275
- }
276
273
  (void)aligned_buf; // suppress warning of unused variable in LITE mode
277
274
  assert(num_reqs > 0);
278
275
 
@@ -359,6 +356,30 @@ IOStatus RandomAccessFileReader::MultiRead(
359
356
 
360
357
  {
361
358
  IOSTATS_CPU_TIMER_GUARD(cpu_read_nanos, clock_);
359
+ if (rate_limiter_priority != Env::IO_TOTAL && rate_limiter_ != nullptr) {
360
+ // TODO: ideally we should call `RateLimiter::RequestToken()` for
361
+ // allowed bytes to multi-read and then consume those bytes by
362
+ // satisfying as many requests in `MultiRead()` as possible, instead of
363
+ // what we do here, which can cause burst when the
364
+ // `total_multi_read_size` is big.
365
+ size_t total_multi_read_size = 0;
366
+ assert(fs_reqs != nullptr);
367
+ for (size_t i = 0; i < num_fs_reqs; ++i) {
368
+ FSReadRequest& req = fs_reqs[i];
369
+ total_multi_read_size += req.len;
370
+ }
371
+ size_t remaining_bytes = total_multi_read_size;
372
+ size_t request_bytes = 0;
373
+ while (remaining_bytes > 0) {
374
+ request_bytes = std::min(
375
+ static_cast<size_t>(rate_limiter_->GetSingleBurstBytes()),
376
+ remaining_bytes);
377
+ rate_limiter_->Request(request_bytes, rate_limiter_priority,
378
+ nullptr /* stats */,
379
+ RateLimiter::OpType::kRead);
380
+ remaining_bytes -= request_bytes;
381
+ }
382
+ }
362
383
  io_s = file_->MultiRead(fs_reqs, num_fs_reqs, opts, nullptr);
363
384
  }
364
385
 
@@ -585,6 +585,8 @@ IOStatus WritableFileWriter::WriteBuffered(
585
585
 
586
586
  left -= allowed;
587
587
  src += allowed;
588
+ uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
589
+ flushed_size_.store(cur_size + allowed, std::memory_order_release);
588
590
  }
589
591
  buf_.Size(0);
590
592
  buffered_data_crc32c_checksum_ = 0;
@@ -675,6 +677,8 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
675
677
  // the corresponding checksum value
676
678
  buf_.Size(0);
677
679
  buffered_data_crc32c_checksum_ = 0;
680
+ uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
681
+ flushed_size_.store(cur_size + left, std::memory_order_release);
678
682
  return s;
679
683
  }
680
684
 
@@ -782,6 +786,8 @@ IOStatus WritableFileWriter::WriteDirect(
782
786
  left -= size;
783
787
  src += size;
784
788
  write_offset += size;
789
+ uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
790
+ flushed_size_.store(cur_size + size, std::memory_order_release);
785
791
  assert((next_write_offset_ % alignment) == 0);
786
792
  }
787
793
 
@@ -884,6 +890,8 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
884
890
 
885
891
  IOSTATS_ADD(bytes_written, left);
886
892
  assert((next_write_offset_ % alignment) == 0);
893
+ uint64_t cur_size = flushed_size_.load(std::memory_order_acquire);
894
+ flushed_size_.store(cur_size + left, std::memory_order_release);
887
895
 
888
896
  if (s.ok()) {
889
897
  // Move the tail to the beginning of the buffer
@@ -143,6 +143,7 @@ class WritableFileWriter {
143
143
  // Actually written data size can be used for truncate
144
144
  // not counting padding data
145
145
  std::atomic<uint64_t> filesize_;
146
+ std::atomic<uint64_t> flushed_size_;
146
147
  #ifndef ROCKSDB_LITE
147
148
  // This is necessary when we use unbuffered access
148
149
  // and writes must happen on aligned offsets
@@ -180,6 +181,7 @@ class WritableFileWriter {
180
181
  buf_(),
181
182
  max_buffer_size_(options.writable_file_max_buffer_size),
182
183
  filesize_(0),
184
+ flushed_size_(0),
183
185
  #ifndef ROCKSDB_LITE
184
186
  next_write_offset_(0),
185
187
  #endif // ROCKSDB_LITE
@@ -259,6 +261,14 @@ class WritableFileWriter {
259
261
  return filesize_.load(std::memory_order_acquire);
260
262
  }
261
263
 
264
+ // Returns the size of data flushed to the underlying `FSWritableFile`.
265
+ // Expected to match `writable_file()->GetFileSize()`.
266
+ // The return value can serve as a lower-bound for the amount of data synced
267
+ // by a future call to `SyncWithoutFlush()`.
268
+ uint64_t GetFlushedSize() const {
269
+ return flushed_size_.load(std::memory_order_acquire);
270
+ }
271
+
262
272
  IOStatus InvalidateCache(size_t offset, size_t length) {
263
273
  return writable_file_->InvalidateCache(offset, length);
264
274
  }
@@ -55,6 +55,11 @@ enum CompactionPri : char {
55
55
  // and its size is the smallest. It in many cases can optimize write
56
56
  // amplification.
57
57
  kMinOverlappingRatio = 0x3,
58
+ // Keeps a cursor(s) of the successor of the file (key range) was/were
59
+ // compacted before, and always picks the next files (key range) in that
60
+ // level. The file picking process will cycle through all the files in a
61
+ // round-robin manner.
62
+ kRoundRobin = 0x4,
58
63
  };
59
64
 
60
65
  struct CompactionOptionsFIFO {
@@ -290,7 +290,7 @@ class Cache {
290
290
  virtual const char* Name() const = 0;
291
291
 
292
292
  // Insert a mapping from key->value into the volatile cache only
293
- // and assign it // the specified charge against the total cache capacity.
293
+ // and assign it with the specified charge against the total cache capacity.
294
294
  // If strict_capacity_limit is true and cache reaches its full capacity,
295
295
  // return Status::Incomplete.
296
296
  //
@@ -394,8 +394,8 @@ class Cache {
394
394
  // memory - call this only if you're shutting down the process.
395
395
  // Any attempts of using cache after this call will fail terribly.
396
396
  // Always delete the DB object before calling this method!
397
- virtual void DisownData(){
398
- // default implementation is noop
397
+ virtual void DisownData() {
398
+ // default implementation is noop
399
399
  }
400
400
 
401
401
  struct ApplyToAllEntriesOptions {
@@ -553,7 +553,7 @@ enum class CacheEntryRole {
553
553
  kFilterBlock,
554
554
  // Block-based table metadata block for partitioned filter
555
555
  kFilterMetaBlock,
556
- // Block-based table deprecated filter block (old "block-based" filter)
556
+ // OBSOLETE / DEPRECATED: old/removed block-based filter
557
557
  kDeprecatedFilterBlock,
558
558
  // Block-based table index block
559
559
  kIndexBlock,
@@ -1641,10 +1641,6 @@ struct ReadOptions {
1641
1641
  // is a `PlainTableFactory`) and cuckoo tables (these can exist when
1642
1642
  // `ColumnFamilyOptions::table_factory` is a `CuckooTableFactory`).
1643
1643
  //
1644
- // The new `DB::MultiGet()` APIs (i.e., the ones returning `void`) will return
1645
- // `Status::NotSupported` when that operation requires file read(s) and
1646
- // `rate_limiter_priority != Env::IO_TOTAL`.
1647
- //
1648
1644
  // The bytes charged to rate limiter may not exactly match the file read bytes
1649
1645
  // since there are some seemingly insignificant reads, like for file
1650
1646
  // headers/footers, that we currently do not charge to rate limiter.
@@ -1737,6 +1733,13 @@ struct WriteOptions {
1737
1733
  // Default: `Env::IO_TOTAL`
1738
1734
  Env::IOPriority rate_limiter_priority;
1739
1735
 
1736
+ // `protection_bytes_per_key` is the number of bytes used to store
1737
+ // protection information for each key entry. Currently supported values are
1738
+ // zero (disabled) and eight.
1739
+ //
1740
+ // Default: zero (disabled).
1741
+ size_t protection_bytes_per_key;
1742
+
1740
1743
  WriteOptions()
1741
1744
  : sync(false),
1742
1745
  disableWAL(false),
@@ -1744,7 +1747,8 @@ struct WriteOptions {
1744
1747
  no_slowdown(false),
1745
1748
  low_pri(false),
1746
1749
  memtable_insert_hint_per_batch(false),
1747
- rate_limiter_priority(Env::IO_TOTAL) {}
1750
+ rate_limiter_priority(Env::IO_TOTAL),
1751
+ protection_bytes_per_key(0) {}
1748
1752
  };
1749
1753
 
1750
1754
  // Options that control flush operations
@@ -432,6 +432,7 @@ enum Tickers : uint32_t {
432
432
  NON_LAST_LEVEL_READ_COUNT,
433
433
 
434
434
  BLOCK_CHECKSUM_COMPUTE_COUNT,
435
+ MULTIGET_COROUTINE_COUNT,
435
436
 
436
437
  TICKER_ENUM_MAX
437
438
  };
@@ -529,6 +530,7 @@ enum Histograms : uint32_t {
529
530
  // Num of index and filter blocks read from file system per level.
530
531
  NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
531
532
  // Num of data blocks read from file system per level.
533
+ // Obsolete
532
534
  NUM_DATA_BLOCKS_READ_PER_LEVEL,
533
535
  // Num of sst files read from file system per level.
534
536
  NUM_SST_READ_PER_LEVEL,
@@ -546,6 +548,9 @@ enum Histograms : uint32_t {
546
548
  // Number of IOs issued in parallel in a MultiGet batch
547
549
  MULTIGET_IO_BATCH_SIZE,
548
550
 
551
+ // Number of levels requiring IO for MultiGet
552
+ NUM_LEVEL_READ_PER_MULTIGET,
553
+
549
554
  HISTOGRAM_ENUM_MAX,
550
555
  };
551
556
 
@@ -58,6 +58,7 @@ enum EntryType {
58
58
  kEntryRangeDeletion,
59
59
  kEntryBlobIndex,
60
60
  kEntryDeleteWithTimestamp,
61
+ kEntryWideColumnEntity,
61
62
  kEntryOther,
62
63
  };
63
64
 
@@ -98,7 +98,7 @@ class WriteBatchWithIndex : public WriteBatchBase {
98
98
  explicit WriteBatchWithIndex(
99
99
  const Comparator* backup_index_comparator = BytewiseComparator(),
100
100
  size_t reserved_bytes = 0, bool overwrite_key = false,
101
- size_t max_bytes = 0);
101
+ size_t max_bytes = 0, size_t protection_bytes_per_key = 0);
102
102
 
103
103
  ~WriteBatchWithIndex() override;
104
104
  WriteBatchWithIndex(WriteBatchWithIndex&&);
@@ -12,7 +12,7 @@
12
12
  // NOTE: in 'main' development branch, this should be the *next*
13
13
  // minor or major version number planned for release.
14
14
  #define ROCKSDB_MAJOR 7
15
- #define ROCKSDB_MINOR 4
15
+ #define ROCKSDB_MINOR 5
16
16
  #define ROCKSDB_PATCH 0
17
17
 
18
18
  // Do not use these. We made the mistake of declaring macros starting with
@@ -419,9 +419,6 @@ class WriteBatch : public WriteBatchBase {
419
419
  struct ProtectionInfo;
420
420
  size_t GetProtectionBytesPerKey() const;
421
421
 
422
- // Clears prot_info_ if there are no entries.
423
- void ClearProtectionInfoIfEmpty();
424
-
425
422
  private:
426
423
  friend class WriteBatchInternal;
427
424
  friend class LocalSavePoint;
@@ -52,7 +52,9 @@ struct KeyMaker {
52
52
  // 2. average data key length
53
53
  // 3. data entry number
54
54
  static void CustomArguments(benchmark::internal::Benchmark *b) {
55
- for (int filter_impl : {0, 2, 3}) {
55
+ const auto kImplCount =
56
+ static_cast<int>(BloomLikeFilterPolicy::GetAllFixedImpls().size());
57
+ for (int filter_impl = 0; filter_impl < kImplCount; ++filter_impl) {
56
58
  for (int bits_per_key : {10, 20}) {
57
59
  for (int key_len_avg : {10, 100}) {
58
60
  for (int64_t entry_num : {1 << 10, 1 << 20}) {
@@ -69,7 +71,7 @@ static void FilterBuild(benchmark::State &state) {
69
71
  auto filter = BloomLikeFilterPolicy::Create(
70
72
  BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
71
73
  static_cast<double>(state.range(1)));
72
- auto tester = new mock::MockBlockBasedTableTester(filter);
74
+ auto tester = std::make_unique<mock::MockBlockBasedTableTester>(filter);
73
75
  KeyMaker km(state.range(2));
74
76
  std::unique_ptr<const char[]> owner;
75
77
  const int64_t kEntryNum = state.range(3);
@@ -92,7 +94,7 @@ static void FilterQueryPositive(benchmark::State &state) {
92
94
  auto filter = BloomLikeFilterPolicy::Create(
93
95
  BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
94
96
  static_cast<double>(state.range(1)));
95
- auto tester = new mock::MockBlockBasedTableTester(filter);
97
+ auto tester = std::make_unique<mock::MockBlockBasedTableTester>(filter);
96
98
  KeyMaker km(state.range(2));
97
99
  std::unique_ptr<const char[]> owner;
98
100
  const int64_t kEntryNum = state.range(3);
@@ -103,7 +105,7 @@ static void FilterQueryPositive(benchmark::State &state) {
103
105
  builder->AddKey(km.Get(filter_num, i));
104
106
  }
105
107
  auto data = builder->Finish(&owner);
106
- auto reader = filter->GetFilterBitsReader(data);
108
+ std::unique_ptr<FilterBitsReader> reader{filter->GetFilterBitsReader(data)};
107
109
 
108
110
  // run test
109
111
  uint32_t i = 0;
@@ -120,7 +122,7 @@ static void FilterQueryNegative(benchmark::State &state) {
120
122
  auto filter = BloomLikeFilterPolicy::Create(
121
123
  BloomLikeFilterPolicy::GetAllFixedImpls().at(state.range(0)),
122
124
  static_cast<double>(state.range(1)));
123
- auto tester = new mock::MockBlockBasedTableTester(filter);
125
+ auto tester = std::make_unique<mock::MockBlockBasedTableTester>(filter);
124
126
  KeyMaker km(state.range(2));
125
127
  std::unique_ptr<const char[]> owner;
126
128
  const int64_t kEntryNum = state.range(3);
@@ -131,7 +133,7 @@ static void FilterQueryNegative(benchmark::State &state) {
131
133
  builder->AddKey(km.Get(filter_num, i));
132
134
  }
133
135
  auto data = builder->Finish(&owner);
134
- auto reader = filter->GetFilterBitsReader(data);
136
+ std::unique_ptr<FilterBitsReader> reader{filter->GetFilterBitsReader(data)};
135
137
 
136
138
  // run test
137
139
  uint32_t i = 0;
@@ -226,7 +226,8 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
226
226
  {LAST_LEVEL_READ_COUNT, "rocksdb.last.level.read.count"},
227
227
  {NON_LAST_LEVEL_READ_BYTES, "rocksdb.non.last.level.read.bytes"},
228
228
  {NON_LAST_LEVEL_READ_COUNT, "rocksdb.non.last.level.read.count"},
229
- {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"}};
229
+ {BLOCK_CHECKSUM_COMPUTE_COUNT, "rocksdb.block.checksum.compute.count"},
230
+ {MULTIGET_COROUTINE_COUNT, "rocksdb.multiget.coroutine.count"}};
230
231
 
231
232
  const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
232
233
  {DB_GET, "rocksdb.db.get.micros"},
@@ -287,6 +288,7 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
287
288
  {POLL_WAIT_MICROS, "rocksdb.poll.wait.micros"},
288
289
  {PREFETCHED_BYTES_DISCARDED, "rocksdb.prefetched.bytes.discarded"},
289
290
  {MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
291
+ {NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
290
292
  };
291
293
 
292
294
  std::shared_ptr<Statistics> CreateDBStatistics() {
@@ -320,7 +320,8 @@ std::map<CompactionPri, std::string> OptionsHelper::compaction_pri_to_string = {
320
320
  {kByCompensatedSize, "kByCompensatedSize"},
321
321
  {kOldestLargestSeqFirst, "kOldestLargestSeqFirst"},
322
322
  {kOldestSmallestSeqFirst, "kOldestSmallestSeqFirst"},
323
- {kMinOverlappingRatio, "kMinOverlappingRatio"}};
323
+ {kMinOverlappingRatio, "kMinOverlappingRatio"},
324
+ {kRoundRobin, "kRoundRobin"}};
324
325
 
325
326
  std::map<CompactionStopStyle, std::string>
326
327
  OptionsHelper::compaction_stop_style_to_string = {
@@ -830,7 +831,8 @@ std::unordered_map<std::string, CompactionPri>
830
831
  {"kByCompensatedSize", kByCompensatedSize},
831
832
  {"kOldestLargestSeqFirst", kOldestLargestSeqFirst},
832
833
  {"kOldestSmallestSeqFirst", kOldestSmallestSeqFirst},
833
- {"kMinOverlappingRatio", kMinOverlappingRatio}};
834
+ {"kMinOverlappingRatio", kMinOverlappingRatio},
835
+ {"kRoundRobin", kRoundRobin}};
834
836
 
835
837
  std::unordered_map<std::string, CompactionStopStyle>
836
838
  OptionsHelper::compaction_stop_style_string_map = {
@@ -999,21 +999,11 @@ TEST_F(OptionsTest, GetBlockBasedTableOptionsFromString) {
999
999
  EXPECT_EQ(bfp->GetMillibitsPerKey(), 4000);
1000
1000
  EXPECT_EQ(bfp->GetWholeBitsPerKey(), 4);
1001
1001
 
1002
- // Back door way of enabling deprecated block-based Bloom
1003
- ASSERT_OK(GetBlockBasedTableOptionsFromString(
1004
- config_options, table_opt,
1005
- "filter_policy=rocksdb.internal.DeprecatedBlockBasedBloomFilter:4",
1006
- &new_opt));
1007
- auto builtin =
1008
- dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
1009
- EXPECT_EQ(builtin->GetId(),
1010
- "rocksdb.internal.DeprecatedBlockBasedBloomFilter:4");
1011
-
1012
1002
  // Test configuring using other internal names
1013
1003
  ASSERT_OK(GetBlockBasedTableOptionsFromString(
1014
1004
  config_options, table_opt,
1015
1005
  "filter_policy=rocksdb.internal.LegacyBloomFilter:3", &new_opt));
1016
- builtin =
1006
+ auto builtin =
1017
1007
  dynamic_cast<const BuiltinFilterPolicy*>(new_opt.filter_policy.get());
1018
1008
  EXPECT_EQ(builtin->GetId(), "rocksdb.internal.LegacyBloomFilter:3");
1019
1009
 
@@ -95,6 +95,8 @@ class CondVar;
95
95
 
96
96
  class Mutex {
97
97
  public:
98
+ static const char* kName() { return "pthread_mutex_t"; }
99
+
98
100
  explicit Mutex(bool adaptive = kDefaultToAdaptiveMutex);
99
101
  // No copying
100
102
  Mutex(const Mutex&) = delete;
@@ -111,6 +113,11 @@ class Mutex {
111
113
  // it does NOT verify that mutex is held by a calling thread
112
114
  void AssertHeld();
113
115
 
116
+ // Also implement std Lockable
117
+ inline void lock() { Lock(); }
118
+ inline void unlock() { Unlock(); }
119
+ inline bool try_lock() { return TryLock(); }
120
+
114
121
  private:
115
122
  friend class CondVar;
116
123
  pthread_mutex_t mu_;
@@ -79,12 +79,15 @@ class CondVar;
79
79
 
80
80
  class Mutex {
81
81
  public:
82
+ static const char* kName() { return "std::mutex"; }
82
83
 
83
- /* implicit */ Mutex(bool adaptive = kDefaultToAdaptiveMutex)
84
+ explicit Mutex(bool IGNORED_adaptive = kDefaultToAdaptiveMutex)
84
85
  #ifndef NDEBUG
85
- : locked_(false)
86
+ : locked_(false)
86
87
  #endif
87
- { }
88
+ {
89
+ (void)IGNORED_adaptive;
90
+ }
88
91
 
89
92
  ~Mutex();
90
93
 
@@ -120,6 +123,11 @@ class Mutex {
120
123
  #endif
121
124
  }
122
125
 
126
+ // Also implement std Lockable
127
+ inline void lock() { Lock(); }
128
+ inline void unlock() { Unlock(); }
129
+ inline bool try_lock() { return TryLock(); }
130
+
123
131
  // Mutex is move only with lock ownership transfer
124
132
  Mutex(const Mutex&) = delete;
125
133
  void operator=(const Mutex&) = delete;
@@ -21,6 +21,7 @@ LIB_SOURCES = \
21
21
  db/blob/blob_log_format.cc \
22
22
  db/blob/blob_log_sequential_reader.cc \
23
23
  db/blob/blob_log_writer.cc \
24
+ db/blob/blob_source.cc \
24
25
  db/blob/prefetch_buffer_collection.cc \
25
26
  db/builder.cc \
26
27
  db/c.cc \
@@ -156,7 +157,6 @@ LIB_SOURCES = \
156
157
  table/adaptive/adaptive_table_factory.cc \
157
158
  table/block_based/binary_search_index_reader.cc \
158
159
  table/block_based/block.cc \
159
- table/block_based/block_based_filter_block.cc \
160
160
  table/block_based/block_based_table_builder.cc \
161
161
  table/block_based/block_based_table_factory.cc \
162
162
  table/block_based/block_based_table_iterator.cc \
@@ -375,9 +375,13 @@ TEST_LIB_SOURCES = \
375
375
 
376
376
  FOLLY_SOURCES = \
377
377
  $(FOLLY_DIR)/folly/container/detail/F14Table.cpp \
378
+ $(FOLLY_DIR)/folly/detail/Futex.cpp \
378
379
  $(FOLLY_DIR)/folly/lang/SafeAssert.cpp \
379
380
  $(FOLLY_DIR)/folly/lang/ToAscii.cpp \
380
381
  $(FOLLY_DIR)/folly/ScopeGuard.cpp \
382
+ $(FOLLY_DIR)/folly/synchronization/AtomicNotification.cpp \
383
+ $(FOLLY_DIR)/folly/synchronization/DistributedMutex.cpp \
384
+ $(FOLLY_DIR)/folly/synchronization/ParkingLot.cpp \
381
385
 
382
386
  TOOLS_MAIN_SOURCES = \
383
387
  db_stress_tool/db_stress.cc \
@@ -416,6 +420,7 @@ TEST_MAIN_SOURCES = \
416
420
  db/blob/blob_file_garbage_test.cc \
417
421
  db/blob/blob_file_reader_test.cc \
418
422
  db/blob/blob_garbage_meter_test.cc \
423
+ db/blob/blob_source_test.cc \
419
424
  db/blob/db_blob_basic_test.cc \
420
425
  db/blob/db_blob_compaction_test.cc \
421
426
  db/blob/db_blob_corruption_test.cc \
@@ -525,7 +530,6 @@ TEST_MAIN_SOURCES = \
525
530
  options/customizable_test.cc \
526
531
  options/options_settable_test.cc \
527
532
  options/options_test.cc \
528
- table/block_based/block_based_filter_block_test.cc \
529
533
  table/block_based/block_based_table_reader_test.cc \
530
534
  table/block_based/block_test.cc \
531
535
  table/block_based/data_block_hash_index_test.cc \