@nxtedition/rocksdb 7.0.39 → 7.0.42

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/binding.cc +59 -30
  2. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +27 -11
  3. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +310 -337
  4. package/deps/rocksdb/rocksdb/cache/clock_cache.h +394 -352
  5. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +1 -1
  6. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -2
  7. package/deps/rocksdb/rocksdb/db/column_family_test.cc +1 -1
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +273 -134
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +33 -2
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -3
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +2 -1
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +2 -2
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +133 -5
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +130 -1
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -4
  17. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +11 -9
  18. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +209 -12
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +54 -39
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +102 -19
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +30 -11
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
  23. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +28 -25
  24. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -14
  25. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +63 -54
  26. package/deps/rocksdb/rocksdb/db/db_test.cc +6 -6
  27. package/deps/rocksdb/rocksdb/db/error_handler.cc +7 -0
  28. package/deps/rocksdb/rocksdb/db/error_handler.h +10 -9
  29. package/deps/rocksdb/rocksdb/db/log_test.cc +13 -6
  30. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +1 -1
  31. package/deps/rocksdb/rocksdb/db/table_cache.cc +21 -0
  32. package/deps/rocksdb/rocksdb/db/table_cache.h +5 -0
  33. package/deps/rocksdb/rocksdb/db/version_set.cc +3 -2
  34. package/deps/rocksdb/rocksdb/db/version_set.h +6 -4
  35. package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -6
  36. package/deps/rocksdb/rocksdb/db/wal_edit.cc +22 -15
  37. package/deps/rocksdb/rocksdb/db/wal_edit.h +10 -0
  38. package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +4 -5
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +0 -36
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +1 -12
  41. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +23 -29
  42. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +0 -5
  43. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +7 -0
  44. package/deps/rocksdb/rocksdb/env/env_test.cc +0 -5
  45. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -7
  46. package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +100 -78
  47. package/deps/rocksdb/rocksdb/options/options_test.cc +16 -0
  48. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +51 -0
  49. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +3 -0
  50. package/deps/rocksdb/rocksdb/table/table_reader.h +14 -0
  51. package/deps/rocksdb/rocksdb/table/table_test.cc +52 -0
  52. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +8 -38
  53. package/deps/rocksdb/rocksdb/util/rate_limiter.cc +27 -21
  54. package/deps/rocksdb/rocksdb/util/rate_limiter.h +12 -10
  55. package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +11 -8
  56. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +2 -1
  57. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +59 -0
  58. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +12 -0
  59. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +31 -0
  60. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -3
  61. package/max_rev_operator.h +101 -0
  62. package/package.json +1 -1
  63. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  64. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -209,10 +209,16 @@ class HashLinkListRep : public MemTableRep {
209
209
 
210
210
  bool LinkListContains(Node* head, const Slice& key) const;
211
211
 
212
- SkipListBucketHeader* GetSkipListBucketHeader(Pointer* first_next_pointer)
213
- const;
212
+ bool IsEmptyBucket(Pointer& bucket_pointer) const {
213
+ return bucket_pointer.load(std::memory_order_acquire) == nullptr;
214
+ }
215
+
216
+ // Precondition: GetLinkListFirstNode() must have been called first and return
217
+ // null so that it must be a skip list bucket
218
+ SkipListBucketHeader* GetSkipListBucketHeader(Pointer& bucket_pointer) const;
214
219
 
215
- Node* GetLinkListFirstNode(Pointer* first_next_pointer) const;
220
+ // Returning nullptr indicates it is a skip list bucket.
221
+ Node* GetLinkListFirstNode(Pointer& bucket_pointer) const;
216
222
 
217
223
  Slice GetPrefix(const Slice& internal_key) const {
218
224
  return transform_->Transform(ExtractUserKey(internal_key));
@@ -222,11 +228,9 @@ class HashLinkListRep : public MemTableRep {
222
228
  return GetSliceRangedNPHash(slice, bucket_size_);
223
229
  }
224
230
 
225
- Pointer* GetBucket(size_t i) const {
226
- return static_cast<Pointer*>(buckets_[i].load(std::memory_order_acquire));
227
- }
231
+ Pointer& GetBucket(size_t i) const { return buckets_[i]; }
228
232
 
229
- Pointer* GetBucket(const Slice& slice) const {
233
+ Pointer& GetBucket(const Slice& slice) const {
230
234
  return GetBucket(GetHash(slice));
231
235
  }
232
236
 
@@ -414,30 +418,39 @@ class HashLinkListRep : public MemTableRep {
414
418
  // Advance to the first entry with a key >= target
415
419
  void Seek(const Slice& k, const char* memtable_key) override {
416
420
  auto transformed = memtable_rep_.GetPrefix(k);
417
- auto* bucket = memtable_rep_.GetBucket(transformed);
418
-
419
- SkipListBucketHeader* skip_list_header =
420
- memtable_rep_.GetSkipListBucketHeader(bucket);
421
- if (skip_list_header != nullptr) {
422
- // The bucket is organized as a skip list
423
- if (!skip_list_iter_) {
424
- skip_list_iter_.reset(
425
- new MemtableSkipList::Iterator(&skip_list_header->skip_list));
426
- } else {
427
- skip_list_iter_->SetList(&skip_list_header->skip_list);
428
- }
429
- if (memtable_key != nullptr) {
430
- skip_list_iter_->Seek(memtable_key);
421
+ Pointer& bucket = memtable_rep_.GetBucket(transformed);
422
+
423
+ if (memtable_rep_.IsEmptyBucket(bucket)) {
424
+ skip_list_iter_.reset();
425
+ Reset(nullptr);
426
+ } else {
427
+ Node* first_linked_list_node =
428
+ memtable_rep_.GetLinkListFirstNode(bucket);
429
+ if (first_linked_list_node != nullptr) {
430
+ // The bucket is organized as a linked list
431
+ skip_list_iter_.reset();
432
+ Reset(first_linked_list_node);
433
+ HashLinkListRep::LinkListIterator::Seek(k, memtable_key);
434
+
431
435
  } else {
432
- IterKey encoded_key;
433
- encoded_key.EncodeLengthPrefixedKey(k);
434
- skip_list_iter_->Seek(encoded_key.GetUserKey().data());
436
+ SkipListBucketHeader* skip_list_header =
437
+ memtable_rep_.GetSkipListBucketHeader(bucket);
438
+ assert(skip_list_header != nullptr);
439
+ // The bucket is organized as a skip list
440
+ if (!skip_list_iter_) {
441
+ skip_list_iter_.reset(
442
+ new MemtableSkipList::Iterator(&skip_list_header->skip_list));
443
+ } else {
444
+ skip_list_iter_->SetList(&skip_list_header->skip_list);
445
+ }
446
+ if (memtable_key != nullptr) {
447
+ skip_list_iter_->Seek(memtable_key);
448
+ } else {
449
+ IterKey encoded_key;
450
+ encoded_key.EncodeLengthPrefixedKey(k);
451
+ skip_list_iter_->Seek(encoded_key.GetUserKey().data());
452
+ }
435
453
  }
436
- } else {
437
- // The bucket is organized as a linked list
438
- skip_list_iter_.reset();
439
- Reset(memtable_rep_.GetLinkListFirstNode(bucket));
440
- HashLinkListRep::LinkListIterator::Seek(k, memtable_key);
441
454
  }
442
455
  }
443
456
 
@@ -528,36 +541,38 @@ KeyHandle HashLinkListRep::Allocate(const size_t len, char** buf) {
528
541
  }
529
542
 
530
543
  SkipListBucketHeader* HashLinkListRep::GetSkipListBucketHeader(
531
- Pointer* first_next_pointer) const {
532
- if (first_next_pointer == nullptr) {
533
- return nullptr;
534
- }
535
- if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) {
536
- // Single entry bucket
537
- return nullptr;
538
- }
544
+ Pointer& bucket_pointer) const {
545
+ Pointer* first_next_pointer =
546
+ static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
547
+ assert(first_next_pointer != nullptr);
548
+ assert(first_next_pointer->load(std::memory_order_relaxed) != nullptr);
549
+
539
550
  // Counting header
540
551
  BucketHeader* header = reinterpret_cast<BucketHeader*>(first_next_pointer);
541
- if (header->IsSkipListBucket()) {
542
- assert(header->GetNumEntries() > threshold_use_skiplist_);
543
- auto* skip_list_bucket_header =
544
- reinterpret_cast<SkipListBucketHeader*>(header);
545
- assert(skip_list_bucket_header->Counting_header.next.load(
546
- std::memory_order_relaxed) == header);
547
- return skip_list_bucket_header;
548
- }
549
- assert(header->GetNumEntries() <= threshold_use_skiplist_);
550
- return nullptr;
552
+ assert(header->IsSkipListBucket());
553
+ assert(header->GetNumEntries() > threshold_use_skiplist_);
554
+ auto* skip_list_bucket_header =
555
+ reinterpret_cast<SkipListBucketHeader*>(header);
556
+ assert(skip_list_bucket_header->Counting_header.next.load(
557
+ std::memory_order_relaxed) == header);
558
+ return skip_list_bucket_header;
551
559
  }
552
560
 
553
- Node* HashLinkListRep::GetLinkListFirstNode(Pointer* first_next_pointer) const {
554
- if (first_next_pointer == nullptr) {
555
- return nullptr;
556
- }
561
+ Node* HashLinkListRep::GetLinkListFirstNode(Pointer& bucket_pointer) const {
562
+ Pointer* first_next_pointer =
563
+ static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
564
+ assert(first_next_pointer != nullptr);
557
565
  if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) {
558
566
  // Single entry bucket
559
567
  return reinterpret_cast<Node*>(first_next_pointer);
560
568
  }
569
+
570
+ // It is possible that after we fetch first_next_pointer it is modified
571
+ // and the next is not null anymore. In this case, the bucket should have been
572
+ // modified to a counting header, so we should reload the first_next_pointer
573
+ // to make sure we see the update.
574
+ first_next_pointer =
575
+ static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
561
576
  // Counting header
562
577
  BucketHeader* header = reinterpret_cast<BucketHeader*>(first_next_pointer);
563
578
  if (!header->IsSkipListBucket()) {
@@ -695,17 +710,21 @@ bool HashLinkListRep::Contains(const char* key) const {
695
710
  Slice internal_key = GetLengthPrefixedSlice(key);
696
711
 
697
712
  auto transformed = GetPrefix(internal_key);
698
- auto bucket = GetBucket(transformed);
699
- if (bucket == nullptr) {
713
+ Pointer& bucket = GetBucket(transformed);
714
+ if (IsEmptyBucket(bucket)) {
700
715
  return false;
701
716
  }
702
717
 
718
+ Node* linked_list_node = GetLinkListFirstNode(bucket);
719
+ if (linked_list_node != nullptr) {
720
+ return LinkListContains(linked_list_node, internal_key);
721
+ }
722
+
703
723
  SkipListBucketHeader* skip_list_header = GetSkipListBucketHeader(bucket);
704
724
  if (skip_list_header != nullptr) {
705
725
  return skip_list_header->skip_list.Contains(key);
706
- } else {
707
- return LinkListContains(GetLinkListFirstNode(bucket), internal_key);
708
726
  }
727
+ return false;
709
728
  }
710
729
 
711
730
  size_t HashLinkListRep::ApproximateMemoryUsage() {
@@ -716,21 +735,25 @@ size_t HashLinkListRep::ApproximateMemoryUsage() {
716
735
  void HashLinkListRep::Get(const LookupKey& k, void* callback_args,
717
736
  bool (*callback_func)(void* arg, const char* entry)) {
718
737
  auto transformed = transform_->Transform(k.user_key());
719
- auto bucket = GetBucket(transformed);
738
+ Pointer& bucket = GetBucket(transformed);
720
739
 
721
- auto* skip_list_header = GetSkipListBucketHeader(bucket);
722
- if (skip_list_header != nullptr) {
723
- // Is a skip list
724
- MemtableSkipList::Iterator iter(&skip_list_header->skip_list);
725
- for (iter.Seek(k.memtable_key().data());
740
+ if (IsEmptyBucket(bucket)) {
741
+ return;
742
+ }
743
+
744
+ auto* link_list_head = GetLinkListFirstNode(bucket);
745
+ if (link_list_head != nullptr) {
746
+ LinkListIterator iter(this, link_list_head);
747
+ for (iter.Seek(k.internal_key(), nullptr);
726
748
  iter.Valid() && callback_func(callback_args, iter.key());
727
749
  iter.Next()) {
728
750
  }
729
751
  } else {
730
- auto* link_list_head = GetLinkListFirstNode(bucket);
731
- if (link_list_head != nullptr) {
732
- LinkListIterator iter(this, link_list_head);
733
- for (iter.Seek(k.internal_key(), nullptr);
752
+ auto* skip_list_header = GetSkipListBucketHeader(bucket);
753
+ if (skip_list_header != nullptr) {
754
+ // Is a skip list
755
+ MemtableSkipList::Iterator iter(&skip_list_header->skip_list);
756
+ for (iter.Seek(k.memtable_key().data());
734
757
  iter.Valid() && callback_func(callback_args, iter.key());
735
758
  iter.Next()) {
736
759
  }
@@ -746,25 +769,24 @@ MemTableRep::Iterator* HashLinkListRep::GetIterator(Arena* alloc_arena) {
746
769
 
747
770
  for (size_t i = 0; i < bucket_size_; ++i) {
748
771
  int count = 0;
749
- auto* bucket = GetBucket(i);
750
- if (bucket != nullptr) {
751
- auto* skip_list_header = GetSkipListBucketHeader(bucket);
752
- if (skip_list_header != nullptr) {
772
+ Pointer& bucket = GetBucket(i);
773
+ if (!IsEmptyBucket(bucket)) {
774
+ auto* link_list_head = GetLinkListFirstNode(bucket);
775
+ if (link_list_head != nullptr) {
776
+ LinkListIterator itr(this, link_list_head);
777
+ for (itr.SeekToHead(); itr.Valid(); itr.Next()) {
778
+ list->Insert(itr.key());
779
+ count++;
780
+ }
781
+ } else {
782
+ auto* skip_list_header = GetSkipListBucketHeader(bucket);
783
+ assert(skip_list_header != nullptr);
753
784
  // Is a skip list
754
785
  MemtableSkipList::Iterator itr(&skip_list_header->skip_list);
755
786
  for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
756
787
  list->Insert(itr.key());
757
788
  count++;
758
- }
759
- } else {
760
- auto* link_list_head = GetLinkListFirstNode(bucket);
761
- if (link_list_head != nullptr) {
762
- LinkListIterator itr(this, link_list_head);
763
- for (itr.SeekToHead(); itr.Valid(); itr.Next()) {
764
- list->Insert(itr.key());
765
- count++;
766
789
  }
767
- }
768
790
  }
769
791
  }
770
792
  if (if_log_bucket_dist_when_flash_) {
@@ -4958,6 +4958,22 @@ TEST_F(ConfigOptionsTest, MergeOperatorFromString) {
4958
4958
  ASSERT_EQ(*delimiter, "&&");
4959
4959
  }
4960
4960
 
4961
+ TEST_F(ConfigOptionsTest, ConfiguringOptionsDoesNotRevertRateLimiterBandwidth) {
4962
+ // Regression test for bug where rate limiter's dynamically set bandwidth
4963
+ // could be silently reverted when configuring an options structure with an
4964
+ // existing `rate_limiter`.
4965
+ Options base_options;
4966
+ base_options.rate_limiter.reset(
4967
+ NewGenericRateLimiter(1 << 20 /* rate_bytes_per_sec */));
4968
+ Options copy_options(base_options);
4969
+
4970
+ base_options.rate_limiter->SetBytesPerSecond(2 << 20);
4971
+ ASSERT_EQ(2 << 20, base_options.rate_limiter->GetBytesPerSecond());
4972
+
4973
+ ASSERT_OK(GetOptionsFromString(base_options, "", &copy_options));
4974
+ ASSERT_EQ(2 << 20, base_options.rate_limiter->GetBytesPerSecond());
4975
+ }
4976
+
4961
4977
  INSTANTIATE_TEST_CASE_P(OptionsSanityCheckTest, OptionsSanityCheckTest,
4962
4978
  ::testing::Bool());
4963
4979
  #endif // !ROCKSDB_LITE
@@ -2045,6 +2045,57 @@ void BlockBasedTable::FullFilterKeysMayMatch(
2045
2045
  }
2046
2046
  }
2047
2047
 
2048
+ Status BlockBasedTable::ApproximateKeyAnchors(const ReadOptions& read_options,
2049
+ std::vector<Anchor>& anchors) {
2050
+ // We iterator the whole index block here. More efficient implementation
2051
+ // is possible if we push this operation into IndexReader. For example, we
2052
+ // can directly sample from restart block entries in the index block and
2053
+ // only read keys needed. Here we take a simple solution. Performance is
2054
+ // likely not to be a problem. We are compacting the whole file, so all
2055
+ // keys will be read out anyway. An extra read to index block might be
2056
+ // a small share of the overhead. We can try to optimize if needed.
2057
+ IndexBlockIter iiter_on_stack;
2058
+ auto iiter = NewIndexIterator(
2059
+ read_options, /*disable_prefix_seek=*/false, &iiter_on_stack,
2060
+ /*get_context=*/nullptr, /*lookup_context=*/nullptr);
2061
+ std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
2062
+ if (iiter != &iiter_on_stack) {
2063
+ iiter_unique_ptr.reset(iiter);
2064
+ }
2065
+
2066
+ // If needed the threshold could be more adaptive. For example, it can be
2067
+ // based on size, so that a larger will be sampled to more partitions than a
2068
+ // smaller file. The size might also need to be passed in by the caller based
2069
+ // on total compaction size.
2070
+ const uint64_t kMaxNumAnchors = uint64_t{128};
2071
+ uint64_t num_blocks = this->GetTableProperties()->num_data_blocks;
2072
+ uint64_t num_blocks_per_anchor = num_blocks / kMaxNumAnchors;
2073
+ if (num_blocks_per_anchor == 0) {
2074
+ num_blocks_per_anchor = 1;
2075
+ }
2076
+
2077
+ uint64_t count = 0;
2078
+ std::string last_key;
2079
+ uint64_t range_size = 0;
2080
+ uint64_t prev_offset = 0;
2081
+ for (iiter->SeekToFirst(); iiter->Valid(); iiter->Next()) {
2082
+ const BlockHandle& bh = iiter->value().handle;
2083
+ range_size += bh.offset() + bh.size() - prev_offset;
2084
+ prev_offset = bh.offset() + bh.size();
2085
+ if (++count % num_blocks_per_anchor == 0) {
2086
+ count = 0;
2087
+ anchors.emplace_back(iiter->user_key(), range_size);
2088
+ range_size = 0;
2089
+ } else {
2090
+ last_key = iiter->user_key().ToString();
2091
+ }
2092
+ }
2093
+ if (count != 0) {
2094
+ anchors.emplace_back(last_key, range_size);
2095
+ }
2096
+ return Status::OK();
2097
+ }
2098
+
2048
2099
  Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2049
2100
  GetContext* get_context,
2050
2101
  const SliceTransform* prefix_extractor,
@@ -168,6 +168,9 @@ class BlockBasedTable : public TableReader {
168
168
  uint64_t ApproximateSize(const Slice& start, const Slice& end,
169
169
  TableReaderCaller caller) override;
170
170
 
171
+ Status ApproximateKeyAnchors(const ReadOptions& read_options,
172
+ std::vector<Anchor>& anchors) override;
173
+
171
174
  bool TEST_BlockInCache(const BlockHandle& handle) const;
172
175
 
173
176
  // Returns true if the block for the specified key is in cache.
@@ -86,6 +86,20 @@ class TableReader {
86
86
  virtual uint64_t ApproximateSize(const Slice& start, const Slice& end,
87
87
  TableReaderCaller caller) = 0;
88
88
 
89
+ struct Anchor {
90
+ Anchor(const Slice& _user_key, size_t _range_size)
91
+ : user_key(_user_key.ToStringView()), range_size(_range_size) {}
92
+ std::string user_key;
93
+ size_t range_size;
94
+ };
95
+
96
+ // Now try to return approximately 128 anchor keys.
97
+ // The last one tends to be the largest key.
98
+ virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/,
99
+ std::vector<Anchor>& /*anchors*/) {
100
+ return Status::NotSupported("ApproximateKeyAnchors() not supported.");
101
+ }
102
+
89
103
  // Set up the table for Compaction. Might change some parameters with
90
104
  // posix_fadvise
91
105
  virtual void SetupForCompaction() = 0;
@@ -41,6 +41,7 @@
41
41
  #include "rocksdb/filter_policy.h"
42
42
  #include "rocksdb/iterator.h"
43
43
  #include "rocksdb/memtablerep.h"
44
+ #include "rocksdb/options.h"
44
45
  #include "rocksdb/perf_context.h"
45
46
  #include "rocksdb/slice_transform.h"
46
47
  #include "rocksdb/statistics.h"
@@ -4047,6 +4048,57 @@ TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
4047
4048
  }
4048
4049
  }
4049
4050
 
4051
+ TEST_F(GeneralTableTest, ApproximateKeyAnchors) {
4052
+ Random rnd(301);
4053
+ TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
4054
+ std::string tmp;
4055
+ for (int i = 1000; i < 9000; i++) {
4056
+ c.Add(std::to_string(i), rnd.RandomString(2000));
4057
+ }
4058
+ std::vector<std::string> keys;
4059
+ stl_wrappers::KVMap kvmap;
4060
+ Options options;
4061
+ InternalKeyComparator ikc(options.comparator);
4062
+ options.compression = kNoCompression;
4063
+ BlockBasedTableOptions table_options;
4064
+ table_options.block_size = 4096;
4065
+ const ImmutableOptions ioptions(options);
4066
+ const MutableCFOptions moptions(options);
4067
+ c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
4068
+
4069
+ std::vector<TableReader::Anchor> anchors;
4070
+ ASSERT_OK(c.GetTableReader()->ApproximateKeyAnchors(ReadOptions(), anchors));
4071
+ // The target is 128 anchors. But in reality it can be slightly more or fewer.
4072
+ ASSERT_GT(anchors.size(), 120);
4073
+ ASSERT_LT(anchors.size(), 140);
4074
+
4075
+ // We have around 8000 keys. With 128 anchors, in average 62.5 keys per
4076
+ // anchor. Here we take a rough range and estimate the distance between
4077
+ // anchors is between 50 and 100.
4078
+ // Total data size is about 18,000,000, so each anchor range is about
4079
+ // 140,625. We also take a rough range.
4080
+ int prev_num = 1000;
4081
+ // Non-last anchor
4082
+ for (size_t i = 0; i + 1 < anchors.size(); i++) {
4083
+ auto& anchor = anchors[i];
4084
+ ASSERT_GT(anchor.range_size, 100000);
4085
+ ASSERT_LT(anchor.range_size, 200000);
4086
+
4087
+ // Key might be shortened, so fill 0 in the end if it is the case.
4088
+ std::string key_cpy = anchor.user_key;
4089
+ key_cpy.append(4 - key_cpy.size(), '0');
4090
+ int num = std::stoi(key_cpy);
4091
+ ASSERT_GT(num - prev_num, 50);
4092
+ ASSERT_LT(num - prev_num, 100);
4093
+ prev_num = num;
4094
+ }
4095
+
4096
+ ASSERT_EQ("8999", anchors.back().user_key);
4097
+ ASSERT_LT(anchors.back().range_size, 200000);
4098
+
4099
+ c.ResetTableReader();
4100
+ }
4101
+
4050
4102
  #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
4051
4103
  TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) {
4052
4104
  Random rnd(test::RandomSeed() + 5);
@@ -758,6 +758,9 @@ DEFINE_bool(show_table_properties, false,
758
758
 
759
759
  DEFINE_string(db, "", "Use the db with the following name.");
760
760
 
761
+ DEFINE_bool(progress_reports, true,
762
+ "If true, db_bench will report number of finished operations.");
763
+
761
764
  // Read cache flags
762
765
 
763
766
  DEFINE_string(read_cache_path, "",
@@ -2252,7 +2255,7 @@ class Stats {
2252
2255
  }
2253
2256
 
2254
2257
  done_ += num_ops;
2255
- if (done_ >= next_report_) {
2258
+ if (done_ >= next_report_ && FLAGS_progress_reports) {
2256
2259
  if (!FLAGS_stats_interval) {
2257
2260
  if (next_report_ < 1000) next_report_ += 100;
2258
2261
  else if (next_report_ < 5000) next_report_ += 500;
@@ -3814,6 +3817,10 @@ class Benchmark {
3814
3817
  perf_context.EnablePerLevelPerfContext();
3815
3818
  thread->stats.Start(thread->tid);
3816
3819
  (arg->bm->*(arg->method))(thread);
3820
+ if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
3821
+ thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
3822
+ get_perf_context()->ToString());
3823
+ }
3817
3824
  thread->stats.Stop();
3818
3825
 
3819
3826
  {
@@ -5743,10 +5750,6 @@ class Benchmark {
5743
5750
 
5744
5751
  delete iter;
5745
5752
  thread->stats.AddBytes(bytes);
5746
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
5747
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
5748
- get_perf_context()->ToString());
5749
- }
5750
5753
  }
5751
5754
 
5752
5755
  void ReadToRowCache(ThreadState* thread) {
@@ -5800,11 +5803,6 @@ class Benchmark {
5800
5803
 
5801
5804
  thread->stats.AddBytes(bytes);
5802
5805
  thread->stats.AddMessage(msg);
5803
-
5804
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
5805
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
5806
- get_perf_context()->ToString());
5807
- }
5808
5806
  }
5809
5807
 
5810
5808
  void ReadReverse(ThreadState* thread) {
@@ -5896,11 +5894,6 @@ class Benchmark {
5896
5894
  found, read, nonexist);
5897
5895
 
5898
5896
  thread->stats.AddMessage(msg);
5899
-
5900
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
5901
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
5902
- get_perf_context()->ToString());
5903
- }
5904
5897
  }
5905
5898
 
5906
5899
  int64_t GetRandomKey(Random64* rand) {
@@ -6036,11 +6029,6 @@ class Benchmark {
6036
6029
 
6037
6030
  thread->stats.AddBytes(bytes);
6038
6031
  thread->stats.AddMessage(msg);
6039
-
6040
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
6041
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
6042
- get_perf_context()->ToString());
6043
- }
6044
6032
  }
6045
6033
 
6046
6034
  // Calls MultiGet over a list of keys from a random distribution.
@@ -6602,11 +6590,6 @@ class Benchmark {
6602
6590
 
6603
6591
  thread->stats.AddBytes(bytes);
6604
6592
  thread->stats.AddMessage(msg);
6605
-
6606
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
6607
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
6608
- get_perf_context()->ToString());
6609
- }
6610
6593
  }
6611
6594
 
6612
6595
  void IteratorCreation(ThreadState* thread) {
@@ -6756,10 +6739,6 @@ class Benchmark {
6756
6739
  found, read);
6757
6740
  thread->stats.AddBytes(bytes);
6758
6741
  thread->stats.AddMessage(msg);
6759
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
6760
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
6761
- get_perf_context()->ToString());
6762
- }
6763
6742
  }
6764
6743
 
6765
6744
  void SeekRandomWhileWriting(ThreadState* thread) {
@@ -7782,11 +7761,6 @@ class Benchmark {
7782
7761
  snprintf(msg, sizeof(msg), "( batches:%" PRIu64 " )", transactions_done);
7783
7762
  }
7784
7763
  thread->stats.AddMessage(msg);
7785
-
7786
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
7787
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
7788
- get_perf_context()->ToString());
7789
- }
7790
7764
  thread->stats.AddBytes(static_cast<int64_t>(inserter.GetBytesInserted()));
7791
7765
  }
7792
7766
 
@@ -7965,10 +7939,6 @@ class Benchmark {
7965
7939
  read);
7966
7940
  thread->stats.AddBytes(bytes);
7967
7941
  thread->stats.AddMessage(msg);
7968
- if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
7969
- thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
7970
- get_perf_context()->ToString());
7971
- }
7972
7942
  }
7973
7943
 
7974
7944
  void TimeSeriesWrite(ThreadState* thread) {