@nxtedition/rocksdb 7.0.39 → 7.0.42
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +59 -30
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +27 -11
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +310 -337
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +394 -352
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +2 -2
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +13 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +273 -134
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +33 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +133 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +130 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +11 -9
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +209 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +54 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +102 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +30 -11
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +28 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +63 -54
- package/deps/rocksdb/rocksdb/db/db_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/error_handler.cc +7 -0
- package/deps/rocksdb/rocksdb/db/error_handler.h +10 -9
- package/deps/rocksdb/rocksdb/db/log_test.cc +13 -6
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +21 -0
- package/deps/rocksdb/rocksdb/db/table_cache.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +3 -2
- package/deps/rocksdb/rocksdb/db/version_set.h +6 -4
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +8 -6
- package/deps/rocksdb/rocksdb/db/wal_edit.cc +22 -15
- package/deps/rocksdb/rocksdb/db/wal_edit.h +10 -0
- package/deps/rocksdb/rocksdb/db/wal_edit_test.cc +4 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +0 -36
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +1 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +23 -29
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +0 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +7 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +0 -5
- package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -7
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +100 -78
- package/deps/rocksdb/rocksdb/options/options_test.cc +16 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +51 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +3 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +14 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +52 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +8 -38
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +27 -21
- package/deps/rocksdb/rocksdb/util/rate_limiter.h +12 -10
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +11 -8
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +59 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +12 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +31 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +0 -3
- package/max_rev_operator.h +101 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -209,10 +209,16 @@ class HashLinkListRep : public MemTableRep {
|
|
|
209
209
|
|
|
210
210
|
bool LinkListContains(Node* head, const Slice& key) const;
|
|
211
211
|
|
|
212
|
-
|
|
213
|
-
|
|
212
|
+
bool IsEmptyBucket(Pointer& bucket_pointer) const {
|
|
213
|
+
return bucket_pointer.load(std::memory_order_acquire) == nullptr;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// Precondition: GetLinkListFirstNode() must have been called first and return
|
|
217
|
+
// null so that it must be a skip list bucket
|
|
218
|
+
SkipListBucketHeader* GetSkipListBucketHeader(Pointer& bucket_pointer) const;
|
|
214
219
|
|
|
215
|
-
|
|
220
|
+
// Returning nullptr indicates it is a skip list bucket.
|
|
221
|
+
Node* GetLinkListFirstNode(Pointer& bucket_pointer) const;
|
|
216
222
|
|
|
217
223
|
Slice GetPrefix(const Slice& internal_key) const {
|
|
218
224
|
return transform_->Transform(ExtractUserKey(internal_key));
|
|
@@ -222,11 +228,9 @@ class HashLinkListRep : public MemTableRep {
|
|
|
222
228
|
return GetSliceRangedNPHash(slice, bucket_size_);
|
|
223
229
|
}
|
|
224
230
|
|
|
225
|
-
Pointer
|
|
226
|
-
return static_cast<Pointer*>(buckets_[i].load(std::memory_order_acquire));
|
|
227
|
-
}
|
|
231
|
+
Pointer& GetBucket(size_t i) const { return buckets_[i]; }
|
|
228
232
|
|
|
229
|
-
Pointer
|
|
233
|
+
Pointer& GetBucket(const Slice& slice) const {
|
|
230
234
|
return GetBucket(GetHash(slice));
|
|
231
235
|
}
|
|
232
236
|
|
|
@@ -414,30 +418,39 @@ class HashLinkListRep : public MemTableRep {
|
|
|
414
418
|
// Advance to the first entry with a key >= target
|
|
415
419
|
void Seek(const Slice& k, const char* memtable_key) override {
|
|
416
420
|
auto transformed = memtable_rep_.GetPrefix(k);
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
skip_list_iter_
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
421
|
+
Pointer& bucket = memtable_rep_.GetBucket(transformed);
|
|
422
|
+
|
|
423
|
+
if (memtable_rep_.IsEmptyBucket(bucket)) {
|
|
424
|
+
skip_list_iter_.reset();
|
|
425
|
+
Reset(nullptr);
|
|
426
|
+
} else {
|
|
427
|
+
Node* first_linked_list_node =
|
|
428
|
+
memtable_rep_.GetLinkListFirstNode(bucket);
|
|
429
|
+
if (first_linked_list_node != nullptr) {
|
|
430
|
+
// The bucket is organized as a linked list
|
|
431
|
+
skip_list_iter_.reset();
|
|
432
|
+
Reset(first_linked_list_node);
|
|
433
|
+
HashLinkListRep::LinkListIterator::Seek(k, memtable_key);
|
|
434
|
+
|
|
431
435
|
} else {
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
436
|
+
SkipListBucketHeader* skip_list_header =
|
|
437
|
+
memtable_rep_.GetSkipListBucketHeader(bucket);
|
|
438
|
+
assert(skip_list_header != nullptr);
|
|
439
|
+
// The bucket is organized as a skip list
|
|
440
|
+
if (!skip_list_iter_) {
|
|
441
|
+
skip_list_iter_.reset(
|
|
442
|
+
new MemtableSkipList::Iterator(&skip_list_header->skip_list));
|
|
443
|
+
} else {
|
|
444
|
+
skip_list_iter_->SetList(&skip_list_header->skip_list);
|
|
445
|
+
}
|
|
446
|
+
if (memtable_key != nullptr) {
|
|
447
|
+
skip_list_iter_->Seek(memtable_key);
|
|
448
|
+
} else {
|
|
449
|
+
IterKey encoded_key;
|
|
450
|
+
encoded_key.EncodeLengthPrefixedKey(k);
|
|
451
|
+
skip_list_iter_->Seek(encoded_key.GetUserKey().data());
|
|
452
|
+
}
|
|
435
453
|
}
|
|
436
|
-
} else {
|
|
437
|
-
// The bucket is organized as a linked list
|
|
438
|
-
skip_list_iter_.reset();
|
|
439
|
-
Reset(memtable_rep_.GetLinkListFirstNode(bucket));
|
|
440
|
-
HashLinkListRep::LinkListIterator::Seek(k, memtable_key);
|
|
441
454
|
}
|
|
442
455
|
}
|
|
443
456
|
|
|
@@ -528,36 +541,38 @@ KeyHandle HashLinkListRep::Allocate(const size_t len, char** buf) {
|
|
|
528
541
|
}
|
|
529
542
|
|
|
530
543
|
SkipListBucketHeader* HashLinkListRep::GetSkipListBucketHeader(
|
|
531
|
-
Pointer
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
return nullptr;
|
|
538
|
-
}
|
|
544
|
+
Pointer& bucket_pointer) const {
|
|
545
|
+
Pointer* first_next_pointer =
|
|
546
|
+
static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
|
|
547
|
+
assert(first_next_pointer != nullptr);
|
|
548
|
+
assert(first_next_pointer->load(std::memory_order_relaxed) != nullptr);
|
|
549
|
+
|
|
539
550
|
// Counting header
|
|
540
551
|
BucketHeader* header = reinterpret_cast<BucketHeader*>(first_next_pointer);
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
}
|
|
549
|
-
assert(header->GetNumEntries() <= threshold_use_skiplist_);
|
|
550
|
-
return nullptr;
|
|
552
|
+
assert(header->IsSkipListBucket());
|
|
553
|
+
assert(header->GetNumEntries() > threshold_use_skiplist_);
|
|
554
|
+
auto* skip_list_bucket_header =
|
|
555
|
+
reinterpret_cast<SkipListBucketHeader*>(header);
|
|
556
|
+
assert(skip_list_bucket_header->Counting_header.next.load(
|
|
557
|
+
std::memory_order_relaxed) == header);
|
|
558
|
+
return skip_list_bucket_header;
|
|
551
559
|
}
|
|
552
560
|
|
|
553
|
-
Node* HashLinkListRep::GetLinkListFirstNode(Pointer
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
561
|
+
Node* HashLinkListRep::GetLinkListFirstNode(Pointer& bucket_pointer) const {
|
|
562
|
+
Pointer* first_next_pointer =
|
|
563
|
+
static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
|
|
564
|
+
assert(first_next_pointer != nullptr);
|
|
557
565
|
if (first_next_pointer->load(std::memory_order_relaxed) == nullptr) {
|
|
558
566
|
// Single entry bucket
|
|
559
567
|
return reinterpret_cast<Node*>(first_next_pointer);
|
|
560
568
|
}
|
|
569
|
+
|
|
570
|
+
// It is possible that after we fetch first_next_pointer it is modified
|
|
571
|
+
// and the next is not null anymore. In this case, the bucket should have been
|
|
572
|
+
// modified to a counting header, so we should reload the first_next_pointer
|
|
573
|
+
// to make sure we see the update.
|
|
574
|
+
first_next_pointer =
|
|
575
|
+
static_cast<Pointer*>(bucket_pointer.load(std::memory_order_acquire));
|
|
561
576
|
// Counting header
|
|
562
577
|
BucketHeader* header = reinterpret_cast<BucketHeader*>(first_next_pointer);
|
|
563
578
|
if (!header->IsSkipListBucket()) {
|
|
@@ -695,17 +710,21 @@ bool HashLinkListRep::Contains(const char* key) const {
|
|
|
695
710
|
Slice internal_key = GetLengthPrefixedSlice(key);
|
|
696
711
|
|
|
697
712
|
auto transformed = GetPrefix(internal_key);
|
|
698
|
-
|
|
699
|
-
if (bucket
|
|
713
|
+
Pointer& bucket = GetBucket(transformed);
|
|
714
|
+
if (IsEmptyBucket(bucket)) {
|
|
700
715
|
return false;
|
|
701
716
|
}
|
|
702
717
|
|
|
718
|
+
Node* linked_list_node = GetLinkListFirstNode(bucket);
|
|
719
|
+
if (linked_list_node != nullptr) {
|
|
720
|
+
return LinkListContains(linked_list_node, internal_key);
|
|
721
|
+
}
|
|
722
|
+
|
|
703
723
|
SkipListBucketHeader* skip_list_header = GetSkipListBucketHeader(bucket);
|
|
704
724
|
if (skip_list_header != nullptr) {
|
|
705
725
|
return skip_list_header->skip_list.Contains(key);
|
|
706
|
-
} else {
|
|
707
|
-
return LinkListContains(GetLinkListFirstNode(bucket), internal_key);
|
|
708
726
|
}
|
|
727
|
+
return false;
|
|
709
728
|
}
|
|
710
729
|
|
|
711
730
|
size_t HashLinkListRep::ApproximateMemoryUsage() {
|
|
@@ -716,21 +735,25 @@ size_t HashLinkListRep::ApproximateMemoryUsage() {
|
|
|
716
735
|
void HashLinkListRep::Get(const LookupKey& k, void* callback_args,
|
|
717
736
|
bool (*callback_func)(void* arg, const char* entry)) {
|
|
718
737
|
auto transformed = transform_->Transform(k.user_key());
|
|
719
|
-
|
|
738
|
+
Pointer& bucket = GetBucket(transformed);
|
|
720
739
|
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
740
|
+
if (IsEmptyBucket(bucket)) {
|
|
741
|
+
return;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
auto* link_list_head = GetLinkListFirstNode(bucket);
|
|
745
|
+
if (link_list_head != nullptr) {
|
|
746
|
+
LinkListIterator iter(this, link_list_head);
|
|
747
|
+
for (iter.Seek(k.internal_key(), nullptr);
|
|
726
748
|
iter.Valid() && callback_func(callback_args, iter.key());
|
|
727
749
|
iter.Next()) {
|
|
728
750
|
}
|
|
729
751
|
} else {
|
|
730
|
-
auto*
|
|
731
|
-
if (
|
|
732
|
-
|
|
733
|
-
|
|
752
|
+
auto* skip_list_header = GetSkipListBucketHeader(bucket);
|
|
753
|
+
if (skip_list_header != nullptr) {
|
|
754
|
+
// Is a skip list
|
|
755
|
+
MemtableSkipList::Iterator iter(&skip_list_header->skip_list);
|
|
756
|
+
for (iter.Seek(k.memtable_key().data());
|
|
734
757
|
iter.Valid() && callback_func(callback_args, iter.key());
|
|
735
758
|
iter.Next()) {
|
|
736
759
|
}
|
|
@@ -746,25 +769,24 @@ MemTableRep::Iterator* HashLinkListRep::GetIterator(Arena* alloc_arena) {
|
|
|
746
769
|
|
|
747
770
|
for (size_t i = 0; i < bucket_size_; ++i) {
|
|
748
771
|
int count = 0;
|
|
749
|
-
|
|
750
|
-
if (bucket
|
|
751
|
-
auto*
|
|
752
|
-
if (
|
|
772
|
+
Pointer& bucket = GetBucket(i);
|
|
773
|
+
if (!IsEmptyBucket(bucket)) {
|
|
774
|
+
auto* link_list_head = GetLinkListFirstNode(bucket);
|
|
775
|
+
if (link_list_head != nullptr) {
|
|
776
|
+
LinkListIterator itr(this, link_list_head);
|
|
777
|
+
for (itr.SeekToHead(); itr.Valid(); itr.Next()) {
|
|
778
|
+
list->Insert(itr.key());
|
|
779
|
+
count++;
|
|
780
|
+
}
|
|
781
|
+
} else {
|
|
782
|
+
auto* skip_list_header = GetSkipListBucketHeader(bucket);
|
|
783
|
+
assert(skip_list_header != nullptr);
|
|
753
784
|
// Is a skip list
|
|
754
785
|
MemtableSkipList::Iterator itr(&skip_list_header->skip_list);
|
|
755
786
|
for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
|
|
756
787
|
list->Insert(itr.key());
|
|
757
788
|
count++;
|
|
758
|
-
}
|
|
759
|
-
} else {
|
|
760
|
-
auto* link_list_head = GetLinkListFirstNode(bucket);
|
|
761
|
-
if (link_list_head != nullptr) {
|
|
762
|
-
LinkListIterator itr(this, link_list_head);
|
|
763
|
-
for (itr.SeekToHead(); itr.Valid(); itr.Next()) {
|
|
764
|
-
list->Insert(itr.key());
|
|
765
|
-
count++;
|
|
766
789
|
}
|
|
767
|
-
}
|
|
768
790
|
}
|
|
769
791
|
}
|
|
770
792
|
if (if_log_bucket_dist_when_flash_) {
|
|
@@ -4958,6 +4958,22 @@ TEST_F(ConfigOptionsTest, MergeOperatorFromString) {
|
|
|
4958
4958
|
ASSERT_EQ(*delimiter, "&&");
|
|
4959
4959
|
}
|
|
4960
4960
|
|
|
4961
|
+
TEST_F(ConfigOptionsTest, ConfiguringOptionsDoesNotRevertRateLimiterBandwidth) {
|
|
4962
|
+
// Regression test for bug where rate limiter's dynamically set bandwidth
|
|
4963
|
+
// could be silently reverted when configuring an options structure with an
|
|
4964
|
+
// existing `rate_limiter`.
|
|
4965
|
+
Options base_options;
|
|
4966
|
+
base_options.rate_limiter.reset(
|
|
4967
|
+
NewGenericRateLimiter(1 << 20 /* rate_bytes_per_sec */));
|
|
4968
|
+
Options copy_options(base_options);
|
|
4969
|
+
|
|
4970
|
+
base_options.rate_limiter->SetBytesPerSecond(2 << 20);
|
|
4971
|
+
ASSERT_EQ(2 << 20, base_options.rate_limiter->GetBytesPerSecond());
|
|
4972
|
+
|
|
4973
|
+
ASSERT_OK(GetOptionsFromString(base_options, "", ©_options));
|
|
4974
|
+
ASSERT_EQ(2 << 20, base_options.rate_limiter->GetBytesPerSecond());
|
|
4975
|
+
}
|
|
4976
|
+
|
|
4961
4977
|
INSTANTIATE_TEST_CASE_P(OptionsSanityCheckTest, OptionsSanityCheckTest,
|
|
4962
4978
|
::testing::Bool());
|
|
4963
4979
|
#endif // !ROCKSDB_LITE
|
|
@@ -2045,6 +2045,57 @@ void BlockBasedTable::FullFilterKeysMayMatch(
|
|
|
2045
2045
|
}
|
|
2046
2046
|
}
|
|
2047
2047
|
|
|
2048
|
+
Status BlockBasedTable::ApproximateKeyAnchors(const ReadOptions& read_options,
|
|
2049
|
+
std::vector<Anchor>& anchors) {
|
|
2050
|
+
// We iterator the whole index block here. More efficient implementation
|
|
2051
|
+
// is possible if we push this operation into IndexReader. For example, we
|
|
2052
|
+
// can directly sample from restart block entries in the index block and
|
|
2053
|
+
// only read keys needed. Here we take a simple solution. Performance is
|
|
2054
|
+
// likely not to be a problem. We are compacting the whole file, so all
|
|
2055
|
+
// keys will be read out anyway. An extra read to index block might be
|
|
2056
|
+
// a small share of the overhead. We can try to optimize if needed.
|
|
2057
|
+
IndexBlockIter iiter_on_stack;
|
|
2058
|
+
auto iiter = NewIndexIterator(
|
|
2059
|
+
read_options, /*disable_prefix_seek=*/false, &iiter_on_stack,
|
|
2060
|
+
/*get_context=*/nullptr, /*lookup_context=*/nullptr);
|
|
2061
|
+
std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
|
|
2062
|
+
if (iiter != &iiter_on_stack) {
|
|
2063
|
+
iiter_unique_ptr.reset(iiter);
|
|
2064
|
+
}
|
|
2065
|
+
|
|
2066
|
+
// If needed the threshold could be more adaptive. For example, it can be
|
|
2067
|
+
// based on size, so that a larger will be sampled to more partitions than a
|
|
2068
|
+
// smaller file. The size might also need to be passed in by the caller based
|
|
2069
|
+
// on total compaction size.
|
|
2070
|
+
const uint64_t kMaxNumAnchors = uint64_t{128};
|
|
2071
|
+
uint64_t num_blocks = this->GetTableProperties()->num_data_blocks;
|
|
2072
|
+
uint64_t num_blocks_per_anchor = num_blocks / kMaxNumAnchors;
|
|
2073
|
+
if (num_blocks_per_anchor == 0) {
|
|
2074
|
+
num_blocks_per_anchor = 1;
|
|
2075
|
+
}
|
|
2076
|
+
|
|
2077
|
+
uint64_t count = 0;
|
|
2078
|
+
std::string last_key;
|
|
2079
|
+
uint64_t range_size = 0;
|
|
2080
|
+
uint64_t prev_offset = 0;
|
|
2081
|
+
for (iiter->SeekToFirst(); iiter->Valid(); iiter->Next()) {
|
|
2082
|
+
const BlockHandle& bh = iiter->value().handle;
|
|
2083
|
+
range_size += bh.offset() + bh.size() - prev_offset;
|
|
2084
|
+
prev_offset = bh.offset() + bh.size();
|
|
2085
|
+
if (++count % num_blocks_per_anchor == 0) {
|
|
2086
|
+
count = 0;
|
|
2087
|
+
anchors.emplace_back(iiter->user_key(), range_size);
|
|
2088
|
+
range_size = 0;
|
|
2089
|
+
} else {
|
|
2090
|
+
last_key = iiter->user_key().ToString();
|
|
2091
|
+
}
|
|
2092
|
+
}
|
|
2093
|
+
if (count != 0) {
|
|
2094
|
+
anchors.emplace_back(last_key, range_size);
|
|
2095
|
+
}
|
|
2096
|
+
return Status::OK();
|
|
2097
|
+
}
|
|
2098
|
+
|
|
2048
2099
|
Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
|
|
2049
2100
|
GetContext* get_context,
|
|
2050
2101
|
const SliceTransform* prefix_extractor,
|
|
@@ -168,6 +168,9 @@ class BlockBasedTable : public TableReader {
|
|
|
168
168
|
uint64_t ApproximateSize(const Slice& start, const Slice& end,
|
|
169
169
|
TableReaderCaller caller) override;
|
|
170
170
|
|
|
171
|
+
Status ApproximateKeyAnchors(const ReadOptions& read_options,
|
|
172
|
+
std::vector<Anchor>& anchors) override;
|
|
173
|
+
|
|
171
174
|
bool TEST_BlockInCache(const BlockHandle& handle) const;
|
|
172
175
|
|
|
173
176
|
// Returns true if the block for the specified key is in cache.
|
|
@@ -86,6 +86,20 @@ class TableReader {
|
|
|
86
86
|
virtual uint64_t ApproximateSize(const Slice& start, const Slice& end,
|
|
87
87
|
TableReaderCaller caller) = 0;
|
|
88
88
|
|
|
89
|
+
struct Anchor {
|
|
90
|
+
Anchor(const Slice& _user_key, size_t _range_size)
|
|
91
|
+
: user_key(_user_key.ToStringView()), range_size(_range_size) {}
|
|
92
|
+
std::string user_key;
|
|
93
|
+
size_t range_size;
|
|
94
|
+
};
|
|
95
|
+
|
|
96
|
+
// Now try to return approximately 128 anchor keys.
|
|
97
|
+
// The last one tends to be the largest key.
|
|
98
|
+
virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/,
|
|
99
|
+
std::vector<Anchor>& /*anchors*/) {
|
|
100
|
+
return Status::NotSupported("ApproximateKeyAnchors() not supported.");
|
|
101
|
+
}
|
|
102
|
+
|
|
89
103
|
// Set up the table for Compaction. Might change some parameters with
|
|
90
104
|
// posix_fadvise
|
|
91
105
|
virtual void SetupForCompaction() = 0;
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
#include "rocksdb/filter_policy.h"
|
|
42
42
|
#include "rocksdb/iterator.h"
|
|
43
43
|
#include "rocksdb/memtablerep.h"
|
|
44
|
+
#include "rocksdb/options.h"
|
|
44
45
|
#include "rocksdb/perf_context.h"
|
|
45
46
|
#include "rocksdb/slice_transform.h"
|
|
46
47
|
#include "rocksdb/statistics.h"
|
|
@@ -4047,6 +4048,57 @@ TEST_F(GeneralTableTest, ApproximateOffsetOfCompressed) {
|
|
|
4047
4048
|
}
|
|
4048
4049
|
}
|
|
4049
4050
|
|
|
4051
|
+
TEST_F(GeneralTableTest, ApproximateKeyAnchors) {
|
|
4052
|
+
Random rnd(301);
|
|
4053
|
+
TableConstructor c(BytewiseComparator(), true /* convert_to_internal_key_ */);
|
|
4054
|
+
std::string tmp;
|
|
4055
|
+
for (int i = 1000; i < 9000; i++) {
|
|
4056
|
+
c.Add(std::to_string(i), rnd.RandomString(2000));
|
|
4057
|
+
}
|
|
4058
|
+
std::vector<std::string> keys;
|
|
4059
|
+
stl_wrappers::KVMap kvmap;
|
|
4060
|
+
Options options;
|
|
4061
|
+
InternalKeyComparator ikc(options.comparator);
|
|
4062
|
+
options.compression = kNoCompression;
|
|
4063
|
+
BlockBasedTableOptions table_options;
|
|
4064
|
+
table_options.block_size = 4096;
|
|
4065
|
+
const ImmutableOptions ioptions(options);
|
|
4066
|
+
const MutableCFOptions moptions(options);
|
|
4067
|
+
c.Finish(options, ioptions, moptions, table_options, ikc, &keys, &kvmap);
|
|
4068
|
+
|
|
4069
|
+
std::vector<TableReader::Anchor> anchors;
|
|
4070
|
+
ASSERT_OK(c.GetTableReader()->ApproximateKeyAnchors(ReadOptions(), anchors));
|
|
4071
|
+
// The target is 128 anchors. But in reality it can be slightly more or fewer.
|
|
4072
|
+
ASSERT_GT(anchors.size(), 120);
|
|
4073
|
+
ASSERT_LT(anchors.size(), 140);
|
|
4074
|
+
|
|
4075
|
+
// We have around 8000 keys. With 128 anchors, in average 62.5 keys per
|
|
4076
|
+
// anchor. Here we take a rough range and estimate the distance between
|
|
4077
|
+
// anchors is between 50 and 100.
|
|
4078
|
+
// Total data size is about 18,000,000, so each anchor range is about
|
|
4079
|
+
// 140,625. We also take a rough range.
|
|
4080
|
+
int prev_num = 1000;
|
|
4081
|
+
// Non-last anchor
|
|
4082
|
+
for (size_t i = 0; i + 1 < anchors.size(); i++) {
|
|
4083
|
+
auto& anchor = anchors[i];
|
|
4084
|
+
ASSERT_GT(anchor.range_size, 100000);
|
|
4085
|
+
ASSERT_LT(anchor.range_size, 200000);
|
|
4086
|
+
|
|
4087
|
+
// Key might be shortened, so fill 0 in the end if it is the case.
|
|
4088
|
+
std::string key_cpy = anchor.user_key;
|
|
4089
|
+
key_cpy.append(4 - key_cpy.size(), '0');
|
|
4090
|
+
int num = std::stoi(key_cpy);
|
|
4091
|
+
ASSERT_GT(num - prev_num, 50);
|
|
4092
|
+
ASSERT_LT(num - prev_num, 100);
|
|
4093
|
+
prev_num = num;
|
|
4094
|
+
}
|
|
4095
|
+
|
|
4096
|
+
ASSERT_EQ("8999", anchors.back().user_key);
|
|
4097
|
+
ASSERT_LT(anchors.back().range_size, 200000);
|
|
4098
|
+
|
|
4099
|
+
c.ResetTableReader();
|
|
4100
|
+
}
|
|
4101
|
+
|
|
4050
4102
|
#if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
|
|
4051
4103
|
TEST_P(ParameterizedHarnessTest, RandomizedHarnessTest) {
|
|
4052
4104
|
Random rnd(test::RandomSeed() + 5);
|
|
@@ -758,6 +758,9 @@ DEFINE_bool(show_table_properties, false,
|
|
|
758
758
|
|
|
759
759
|
DEFINE_string(db, "", "Use the db with the following name.");
|
|
760
760
|
|
|
761
|
+
DEFINE_bool(progress_reports, true,
|
|
762
|
+
"If true, db_bench will report number of finished operations.");
|
|
763
|
+
|
|
761
764
|
// Read cache flags
|
|
762
765
|
|
|
763
766
|
DEFINE_string(read_cache_path, "",
|
|
@@ -2252,7 +2255,7 @@ class Stats {
|
|
|
2252
2255
|
}
|
|
2253
2256
|
|
|
2254
2257
|
done_ += num_ops;
|
|
2255
|
-
if (done_ >= next_report_) {
|
|
2258
|
+
if (done_ >= next_report_ && FLAGS_progress_reports) {
|
|
2256
2259
|
if (!FLAGS_stats_interval) {
|
|
2257
2260
|
if (next_report_ < 1000) next_report_ += 100;
|
|
2258
2261
|
else if (next_report_ < 5000) next_report_ += 500;
|
|
@@ -3814,6 +3817,10 @@ class Benchmark {
|
|
|
3814
3817
|
perf_context.EnablePerLevelPerfContext();
|
|
3815
3818
|
thread->stats.Start(thread->tid);
|
|
3816
3819
|
(arg->bm->*(arg->method))(thread);
|
|
3820
|
+
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
3821
|
+
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
3822
|
+
get_perf_context()->ToString());
|
|
3823
|
+
}
|
|
3817
3824
|
thread->stats.Stop();
|
|
3818
3825
|
|
|
3819
3826
|
{
|
|
@@ -5743,10 +5750,6 @@ class Benchmark {
|
|
|
5743
5750
|
|
|
5744
5751
|
delete iter;
|
|
5745
5752
|
thread->stats.AddBytes(bytes);
|
|
5746
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
5747
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
5748
|
-
get_perf_context()->ToString());
|
|
5749
|
-
}
|
|
5750
5753
|
}
|
|
5751
5754
|
|
|
5752
5755
|
void ReadToRowCache(ThreadState* thread) {
|
|
@@ -5800,11 +5803,6 @@ class Benchmark {
|
|
|
5800
5803
|
|
|
5801
5804
|
thread->stats.AddBytes(bytes);
|
|
5802
5805
|
thread->stats.AddMessage(msg);
|
|
5803
|
-
|
|
5804
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
5805
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
5806
|
-
get_perf_context()->ToString());
|
|
5807
|
-
}
|
|
5808
5806
|
}
|
|
5809
5807
|
|
|
5810
5808
|
void ReadReverse(ThreadState* thread) {
|
|
@@ -5896,11 +5894,6 @@ class Benchmark {
|
|
|
5896
5894
|
found, read, nonexist);
|
|
5897
5895
|
|
|
5898
5896
|
thread->stats.AddMessage(msg);
|
|
5899
|
-
|
|
5900
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
5901
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
5902
|
-
get_perf_context()->ToString());
|
|
5903
|
-
}
|
|
5904
5897
|
}
|
|
5905
5898
|
|
|
5906
5899
|
int64_t GetRandomKey(Random64* rand) {
|
|
@@ -6036,11 +6029,6 @@ class Benchmark {
|
|
|
6036
6029
|
|
|
6037
6030
|
thread->stats.AddBytes(bytes);
|
|
6038
6031
|
thread->stats.AddMessage(msg);
|
|
6039
|
-
|
|
6040
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
6041
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
6042
|
-
get_perf_context()->ToString());
|
|
6043
|
-
}
|
|
6044
6032
|
}
|
|
6045
6033
|
|
|
6046
6034
|
// Calls MultiGet over a list of keys from a random distribution.
|
|
@@ -6602,11 +6590,6 @@ class Benchmark {
|
|
|
6602
6590
|
|
|
6603
6591
|
thread->stats.AddBytes(bytes);
|
|
6604
6592
|
thread->stats.AddMessage(msg);
|
|
6605
|
-
|
|
6606
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
6607
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
6608
|
-
get_perf_context()->ToString());
|
|
6609
|
-
}
|
|
6610
6593
|
}
|
|
6611
6594
|
|
|
6612
6595
|
void IteratorCreation(ThreadState* thread) {
|
|
@@ -6756,10 +6739,6 @@ class Benchmark {
|
|
|
6756
6739
|
found, read);
|
|
6757
6740
|
thread->stats.AddBytes(bytes);
|
|
6758
6741
|
thread->stats.AddMessage(msg);
|
|
6759
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
6760
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
6761
|
-
get_perf_context()->ToString());
|
|
6762
|
-
}
|
|
6763
6742
|
}
|
|
6764
6743
|
|
|
6765
6744
|
void SeekRandomWhileWriting(ThreadState* thread) {
|
|
@@ -7782,11 +7761,6 @@ class Benchmark {
|
|
|
7782
7761
|
snprintf(msg, sizeof(msg), "( batches:%" PRIu64 " )", transactions_done);
|
|
7783
7762
|
}
|
|
7784
7763
|
thread->stats.AddMessage(msg);
|
|
7785
|
-
|
|
7786
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
7787
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
7788
|
-
get_perf_context()->ToString());
|
|
7789
|
-
}
|
|
7790
7764
|
thread->stats.AddBytes(static_cast<int64_t>(inserter.GetBytesInserted()));
|
|
7791
7765
|
}
|
|
7792
7766
|
|
|
@@ -7965,10 +7939,6 @@ class Benchmark {
|
|
|
7965
7939
|
read);
|
|
7966
7940
|
thread->stats.AddBytes(bytes);
|
|
7967
7941
|
thread->stats.AddMessage(msg);
|
|
7968
|
-
if (FLAGS_perf_level > ROCKSDB_NAMESPACE::PerfLevel::kDisable) {
|
|
7969
|
-
thread->stats.AddMessage(std::string("PERF_CONTEXT:\n") +
|
|
7970
|
-
get_perf_context()->ToString());
|
|
7971
|
-
}
|
|
7972
7942
|
}
|
|
7973
7943
|
|
|
7974
7944
|
void TimeSeriesWrite(ThreadState* thread) {
|