@nxtedition/rocksdb 7.1.12 → 7.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +49 -48
- package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
- package/deps/rocksdb/rocksdb/TARGETS +2 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +60 -17
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +81 -37
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +10 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +14 -9
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +69 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +19 -7
- package/deps/rocksdb/rocksdb/db/memtable.h +8 -16
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +27 -16
- package/deps/rocksdb/rocksdb/db/memtable_list.h +18 -11
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +70 -55
- package/deps/rocksdb/rocksdb/db/table_cache.cc +9 -11
- package/deps/rocksdb/rocksdb/db/table_cache.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +3 -3
- package/deps/rocksdb/rocksdb/db/version_set.cc +530 -257
- package/deps/rocksdb/rocksdb/db/version_set.h +32 -2
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +64 -12
- package/deps/rocksdb/rocksdb/db/wide/wide_columns.cc +18 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +13 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +83 -0
- package/deps/rocksdb/rocksdb/options/options.cc +4 -2
- package/deps/rocksdb/rocksdb/src.mk +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +3 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +5 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +10 -28
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +4 -4
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +11 -9
- package/deps/rocksdb/rocksdb/table/get_context.cc +34 -22
- package/deps/rocksdb/rocksdb/table/get_context.h +6 -3
- package/deps/rocksdb/rocksdb/table/multiget_context.h +69 -5
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +8 -8
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +23 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +27 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +8 -4
- package/deps/rocksdb/rocksdb.gyp +1 -0
- package/index.js +18 -17
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -347,6 +347,7 @@ class FilePicker {
|
|
|
347
347
|
return false;
|
|
348
348
|
}
|
|
349
349
|
};
|
|
350
|
+
} // anonymous namespace
|
|
350
351
|
|
|
351
352
|
class FilePickerMultiGet {
|
|
352
353
|
private:
|
|
@@ -362,20 +363,21 @@ class FilePickerMultiGet {
|
|
|
362
363
|
curr_level_(static_cast<unsigned int>(-1)),
|
|
363
364
|
returned_file_level_(static_cast<unsigned int>(-1)),
|
|
364
365
|
hit_file_level_(static_cast<unsigned int>(-1)),
|
|
365
|
-
range_(range),
|
|
366
|
-
batch_iter_(range->begin()),
|
|
367
|
-
batch_iter_prev_(range->begin()),
|
|
368
|
-
upper_key_(range->begin()),
|
|
366
|
+
range_(*range, range->begin(), range->end()),
|
|
369
367
|
maybe_repeat_key_(false),
|
|
370
368
|
current_level_range_(*range, range->begin(), range->end()),
|
|
371
369
|
current_file_range_(*range, range->begin(), range->end()),
|
|
370
|
+
batch_iter_(range->begin()),
|
|
371
|
+
batch_iter_prev_(range->begin()),
|
|
372
|
+
upper_key_(range->begin()),
|
|
372
373
|
level_files_brief_(file_levels),
|
|
373
374
|
is_hit_file_last_in_level_(false),
|
|
374
375
|
curr_file_level_(nullptr),
|
|
375
376
|
file_indexer_(file_indexer),
|
|
376
377
|
user_comparator_(user_comparator),
|
|
377
|
-
internal_comparator_(internal_comparator)
|
|
378
|
-
|
|
378
|
+
internal_comparator_(internal_comparator),
|
|
379
|
+
hit_file_(nullptr) {
|
|
380
|
+
for (auto iter = range_.begin(); iter != range_.end(); ++iter) {
|
|
379
381
|
fp_ctx_array_[iter.index()] =
|
|
380
382
|
FilePickerContext(0, FileIndexer::kLevelMaxIndex);
|
|
381
383
|
}
|
|
@@ -391,7 +393,7 @@ class FilePickerMultiGet {
|
|
|
391
393
|
for (unsigned int i = 0; i < (*level_files_brief_)[0].num_files; ++i) {
|
|
392
394
|
auto* r = (*level_files_brief_)[0].files[i].fd.table_reader;
|
|
393
395
|
if (r) {
|
|
394
|
-
for (auto iter = range_
|
|
396
|
+
for (auto iter = range_.begin(); iter != range_.end(); ++iter) {
|
|
395
397
|
r->Prepare(iter->ikey);
|
|
396
398
|
}
|
|
397
399
|
}
|
|
@@ -399,8 +401,186 @@ class FilePickerMultiGet {
|
|
|
399
401
|
}
|
|
400
402
|
}
|
|
401
403
|
|
|
404
|
+
FilePickerMultiGet(MultiGetRange* range, const FilePickerMultiGet& other)
|
|
405
|
+
: num_levels_(other.num_levels_),
|
|
406
|
+
curr_level_(other.curr_level_),
|
|
407
|
+
returned_file_level_(other.returned_file_level_),
|
|
408
|
+
hit_file_level_(other.hit_file_level_),
|
|
409
|
+
fp_ctx_array_(other.fp_ctx_array_),
|
|
410
|
+
range_(*range, range->begin(), range->end()),
|
|
411
|
+
maybe_repeat_key_(false),
|
|
412
|
+
current_level_range_(*range, range->begin(), range->end()),
|
|
413
|
+
current_file_range_(*range, range->begin(), range->end()),
|
|
414
|
+
batch_iter_(range->begin()),
|
|
415
|
+
batch_iter_prev_(range->begin()),
|
|
416
|
+
upper_key_(range->begin()),
|
|
417
|
+
level_files_brief_(other.level_files_brief_),
|
|
418
|
+
is_hit_file_last_in_level_(false),
|
|
419
|
+
curr_file_level_(other.curr_file_level_),
|
|
420
|
+
file_indexer_(other.file_indexer_),
|
|
421
|
+
user_comparator_(other.user_comparator_),
|
|
422
|
+
internal_comparator_(other.internal_comparator_),
|
|
423
|
+
hit_file_(nullptr) {
|
|
424
|
+
PrepareNextLevelForSearch();
|
|
425
|
+
}
|
|
426
|
+
|
|
402
427
|
int GetCurrentLevel() const { return curr_level_; }
|
|
403
428
|
|
|
429
|
+
void PrepareNextLevelForSearch() { search_ended_ = !PrepareNextLevel(); }
|
|
430
|
+
|
|
431
|
+
FdWithKeyRange* GetNextFileInLevel() {
|
|
432
|
+
if (batch_iter_ == current_level_range_.end() || search_ended_) {
|
|
433
|
+
hit_file_ = nullptr;
|
|
434
|
+
return nullptr;
|
|
435
|
+
} else {
|
|
436
|
+
if (maybe_repeat_key_) {
|
|
437
|
+
maybe_repeat_key_ = false;
|
|
438
|
+
// Check if we found the final value for the last key in the
|
|
439
|
+
// previous lookup range. If we did, then there's no need to look
|
|
440
|
+
// any further for that key, so advance batch_iter_. Else, keep
|
|
441
|
+
// batch_iter_ positioned on that key so we look it up again in
|
|
442
|
+
// the next file
|
|
443
|
+
// For L0, always advance the key because we will look in the next
|
|
444
|
+
// file regardless for all keys not found yet
|
|
445
|
+
if (current_level_range_.CheckKeyDone(batch_iter_) ||
|
|
446
|
+
curr_level_ == 0) {
|
|
447
|
+
batch_iter_ = upper_key_;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
// batch_iter_prev_ will become the start key for the next file
|
|
451
|
+
// lookup
|
|
452
|
+
batch_iter_prev_ = batch_iter_;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
MultiGetRange next_file_range(current_level_range_, batch_iter_prev_,
|
|
456
|
+
current_level_range_.end());
|
|
457
|
+
size_t curr_file_index =
|
|
458
|
+
(batch_iter_ != current_level_range_.end())
|
|
459
|
+
? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level
|
|
460
|
+
: curr_file_level_->num_files;
|
|
461
|
+
FdWithKeyRange* f;
|
|
462
|
+
bool is_last_key_in_file;
|
|
463
|
+
if (!GetNextFileInLevelWithKeys(&next_file_range, &curr_file_index, &f,
|
|
464
|
+
&is_last_key_in_file)) {
|
|
465
|
+
hit_file_ = nullptr;
|
|
466
|
+
return nullptr;
|
|
467
|
+
} else {
|
|
468
|
+
if (is_last_key_in_file) {
|
|
469
|
+
// Since cmp_largest is 0, batch_iter_ still points to the last key
|
|
470
|
+
// that falls in this file, instead of the next one. Increment
|
|
471
|
+
// the file index for all keys between batch_iter_ and upper_key_
|
|
472
|
+
auto tmp_iter = batch_iter_;
|
|
473
|
+
while (tmp_iter != upper_key_) {
|
|
474
|
+
++(fp_ctx_array_[tmp_iter.index()].curr_index_in_curr_level);
|
|
475
|
+
++tmp_iter;
|
|
476
|
+
}
|
|
477
|
+
maybe_repeat_key_ = true;
|
|
478
|
+
}
|
|
479
|
+
// Set the range for this file
|
|
480
|
+
current_file_range_ =
|
|
481
|
+
MultiGetRange(next_file_range, batch_iter_prev_, upper_key_);
|
|
482
|
+
returned_file_level_ = curr_level_;
|
|
483
|
+
hit_file_level_ = curr_level_;
|
|
484
|
+
is_hit_file_last_in_level_ =
|
|
485
|
+
curr_file_index == curr_file_level_->num_files - 1;
|
|
486
|
+
hit_file_ = f;
|
|
487
|
+
return f;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
// getter for current file level
|
|
492
|
+
// for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts
|
|
493
|
+
unsigned int GetHitFileLevel() { return hit_file_level_; }
|
|
494
|
+
|
|
495
|
+
FdWithKeyRange* GetHitFile() { return hit_file_; }
|
|
496
|
+
|
|
497
|
+
// Returns true if the most recent "hit file" (i.e., one returned by
|
|
498
|
+
// GetNextFile()) is at the last index in its level.
|
|
499
|
+
bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
|
|
500
|
+
|
|
501
|
+
bool KeyMaySpanNextFile() { return maybe_repeat_key_; }
|
|
502
|
+
|
|
503
|
+
bool IsSearchEnded() { return search_ended_; }
|
|
504
|
+
|
|
505
|
+
const MultiGetRange& CurrentFileRange() { return current_file_range_; }
|
|
506
|
+
|
|
507
|
+
bool RemainingOverlapInLevel() {
|
|
508
|
+
return !current_level_range_.Suffix(current_file_range_).empty();
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
MultiGetRange& GetRange() { return range_; }
|
|
512
|
+
|
|
513
|
+
void ReplaceRange(const MultiGetRange& other) {
|
|
514
|
+
range_ = other;
|
|
515
|
+
current_level_range_ = other;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
FilePickerMultiGet(FilePickerMultiGet&& other)
|
|
519
|
+
: num_levels_(other.num_levels_),
|
|
520
|
+
curr_level_(other.curr_level_),
|
|
521
|
+
returned_file_level_(other.returned_file_level_),
|
|
522
|
+
hit_file_level_(other.hit_file_level_),
|
|
523
|
+
fp_ctx_array_(std::move(other.fp_ctx_array_)),
|
|
524
|
+
range_(std::move(other.range_)),
|
|
525
|
+
maybe_repeat_key_(other.maybe_repeat_key_),
|
|
526
|
+
current_level_range_(std::move(other.current_level_range_)),
|
|
527
|
+
current_file_range_(std::move(other.current_file_range_)),
|
|
528
|
+
batch_iter_(other.batch_iter_, ¤t_level_range_),
|
|
529
|
+
batch_iter_prev_(other.batch_iter_prev_, ¤t_level_range_),
|
|
530
|
+
upper_key_(other.upper_key_, ¤t_level_range_),
|
|
531
|
+
level_files_brief_(other.level_files_brief_),
|
|
532
|
+
search_ended_(other.search_ended_),
|
|
533
|
+
is_hit_file_last_in_level_(other.is_hit_file_last_in_level_),
|
|
534
|
+
curr_file_level_(other.curr_file_level_),
|
|
535
|
+
file_indexer_(other.file_indexer_),
|
|
536
|
+
user_comparator_(other.user_comparator_),
|
|
537
|
+
internal_comparator_(other.internal_comparator_),
|
|
538
|
+
hit_file_(other.hit_file_) {}
|
|
539
|
+
|
|
540
|
+
private:
|
|
541
|
+
unsigned int num_levels_;
|
|
542
|
+
unsigned int curr_level_;
|
|
543
|
+
unsigned int returned_file_level_;
|
|
544
|
+
unsigned int hit_file_level_;
|
|
545
|
+
|
|
546
|
+
struct FilePickerContext {
|
|
547
|
+
int32_t search_left_bound;
|
|
548
|
+
int32_t search_right_bound;
|
|
549
|
+
unsigned int curr_index_in_curr_level;
|
|
550
|
+
unsigned int start_index_in_curr_level;
|
|
551
|
+
|
|
552
|
+
FilePickerContext(int32_t left, int32_t right)
|
|
553
|
+
: search_left_bound(left),
|
|
554
|
+
search_right_bound(right),
|
|
555
|
+
curr_index_in_curr_level(0),
|
|
556
|
+
start_index_in_curr_level(0) {}
|
|
557
|
+
|
|
558
|
+
FilePickerContext() = default;
|
|
559
|
+
};
|
|
560
|
+
std::array<FilePickerContext, MultiGetContext::MAX_BATCH_SIZE> fp_ctx_array_;
|
|
561
|
+
MultiGetRange range_;
|
|
562
|
+
bool maybe_repeat_key_;
|
|
563
|
+
MultiGetRange current_level_range_;
|
|
564
|
+
MultiGetRange current_file_range_;
|
|
565
|
+
// Iterator to iterate through the keys in a MultiGet batch, that gets reset
|
|
566
|
+
// at the beginning of each level. Each call to GetNextFile() will position
|
|
567
|
+
// batch_iter_ at or right after the last key that was found in the returned
|
|
568
|
+
// SST file
|
|
569
|
+
MultiGetRange::Iterator batch_iter_;
|
|
570
|
+
// An iterator that records the previous position of batch_iter_, i.e last
|
|
571
|
+
// key found in the previous SST file, in order to serve as the start of
|
|
572
|
+
// the batch key range for the next SST file
|
|
573
|
+
MultiGetRange::Iterator batch_iter_prev_;
|
|
574
|
+
MultiGetRange::Iterator upper_key_;
|
|
575
|
+
autovector<LevelFilesBrief>* level_files_brief_;
|
|
576
|
+
bool search_ended_;
|
|
577
|
+
bool is_hit_file_last_in_level_;
|
|
578
|
+
LevelFilesBrief* curr_file_level_;
|
|
579
|
+
FileIndexer* file_indexer_;
|
|
580
|
+
const Comparator* user_comparator_;
|
|
581
|
+
const InternalKeyComparator* internal_comparator_;
|
|
582
|
+
FdWithKeyRange* hit_file_;
|
|
583
|
+
|
|
404
584
|
// Iterates through files in the current level until it finds a file that
|
|
405
585
|
// contains at least one key from the MultiGet batch
|
|
406
586
|
bool GetNextFileInLevelWithKeys(MultiGetRange* next_file_range,
|
|
@@ -524,124 +704,6 @@ class FilePickerMultiGet {
|
|
|
524
704
|
return file_hit;
|
|
525
705
|
}
|
|
526
706
|
|
|
527
|
-
void PrepareNextLevelForSearch() { search_ended_ = !PrepareNextLevel(); }
|
|
528
|
-
|
|
529
|
-
FdWithKeyRange* GetNextFileInLevel() {
|
|
530
|
-
if (batch_iter_ == current_level_range_.end() || search_ended_) {
|
|
531
|
-
return nullptr;
|
|
532
|
-
} else {
|
|
533
|
-
if (maybe_repeat_key_) {
|
|
534
|
-
maybe_repeat_key_ = false;
|
|
535
|
-
// Check if we found the final value for the last key in the
|
|
536
|
-
// previous lookup range. If we did, then there's no need to look
|
|
537
|
-
// any further for that key, so advance batch_iter_. Else, keep
|
|
538
|
-
// batch_iter_ positioned on that key so we look it up again in
|
|
539
|
-
// the next file
|
|
540
|
-
// For L0, always advance the key because we will look in the next
|
|
541
|
-
// file regardless for all keys not found yet
|
|
542
|
-
if (current_level_range_.CheckKeyDone(batch_iter_) ||
|
|
543
|
-
curr_level_ == 0) {
|
|
544
|
-
batch_iter_ = upper_key_;
|
|
545
|
-
}
|
|
546
|
-
}
|
|
547
|
-
// batch_iter_prev_ will become the start key for the next file
|
|
548
|
-
// lookup
|
|
549
|
-
batch_iter_prev_ = batch_iter_;
|
|
550
|
-
}
|
|
551
|
-
|
|
552
|
-
MultiGetRange next_file_range(current_level_range_, batch_iter_prev_,
|
|
553
|
-
current_level_range_.end());
|
|
554
|
-
size_t curr_file_index =
|
|
555
|
-
(batch_iter_ != current_level_range_.end())
|
|
556
|
-
? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level
|
|
557
|
-
: curr_file_level_->num_files;
|
|
558
|
-
FdWithKeyRange* f;
|
|
559
|
-
bool is_last_key_in_file;
|
|
560
|
-
if (!GetNextFileInLevelWithKeys(&next_file_range, &curr_file_index, &f,
|
|
561
|
-
&is_last_key_in_file)) {
|
|
562
|
-
return nullptr;
|
|
563
|
-
} else {
|
|
564
|
-
if (is_last_key_in_file) {
|
|
565
|
-
// Since cmp_largest is 0, batch_iter_ still points to the last key
|
|
566
|
-
// that falls in this file, instead of the next one. Increment
|
|
567
|
-
// the file index for all keys between batch_iter_ and upper_key_
|
|
568
|
-
auto tmp_iter = batch_iter_;
|
|
569
|
-
while (tmp_iter != upper_key_) {
|
|
570
|
-
++(fp_ctx_array_[tmp_iter.index()].curr_index_in_curr_level);
|
|
571
|
-
++tmp_iter;
|
|
572
|
-
}
|
|
573
|
-
maybe_repeat_key_ = true;
|
|
574
|
-
}
|
|
575
|
-
// Set the range for this file
|
|
576
|
-
current_file_range_ =
|
|
577
|
-
MultiGetRange(next_file_range, batch_iter_prev_, upper_key_);
|
|
578
|
-
returned_file_level_ = curr_level_;
|
|
579
|
-
hit_file_level_ = curr_level_;
|
|
580
|
-
is_hit_file_last_in_level_ =
|
|
581
|
-
curr_file_index == curr_file_level_->num_files - 1;
|
|
582
|
-
return f;
|
|
583
|
-
}
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
// getter for current file level
|
|
587
|
-
// for GET_HIT_L0, GET_HIT_L1 & GET_HIT_L2_AND_UP counts
|
|
588
|
-
unsigned int GetHitFileLevel() { return hit_file_level_; }
|
|
589
|
-
|
|
590
|
-
// Returns true if the most recent "hit file" (i.e., one returned by
|
|
591
|
-
// GetNextFile()) is at the last index in its level.
|
|
592
|
-
bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
|
|
593
|
-
|
|
594
|
-
bool KeyMaySpanNextFile() { return maybe_repeat_key_; }
|
|
595
|
-
|
|
596
|
-
bool IsSearchEnded() { return search_ended_; }
|
|
597
|
-
|
|
598
|
-
const MultiGetRange& CurrentFileRange() { return current_file_range_; }
|
|
599
|
-
|
|
600
|
-
bool RemainingOverlapInLevel() {
|
|
601
|
-
return !current_level_range_.Suffix(current_file_range_).empty();
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
private:
|
|
605
|
-
unsigned int num_levels_;
|
|
606
|
-
unsigned int curr_level_;
|
|
607
|
-
unsigned int returned_file_level_;
|
|
608
|
-
unsigned int hit_file_level_;
|
|
609
|
-
|
|
610
|
-
struct FilePickerContext {
|
|
611
|
-
int32_t search_left_bound;
|
|
612
|
-
int32_t search_right_bound;
|
|
613
|
-
unsigned int curr_index_in_curr_level;
|
|
614
|
-
unsigned int start_index_in_curr_level;
|
|
615
|
-
|
|
616
|
-
FilePickerContext(int32_t left, int32_t right)
|
|
617
|
-
: search_left_bound(left), search_right_bound(right),
|
|
618
|
-
curr_index_in_curr_level(0), start_index_in_curr_level(0) {}
|
|
619
|
-
|
|
620
|
-
FilePickerContext() = default;
|
|
621
|
-
};
|
|
622
|
-
std::array<FilePickerContext, MultiGetContext::MAX_BATCH_SIZE> fp_ctx_array_;
|
|
623
|
-
MultiGetRange* range_;
|
|
624
|
-
// Iterator to iterate through the keys in a MultiGet batch, that gets reset
|
|
625
|
-
// at the beginning of each level. Each call to GetNextFile() will position
|
|
626
|
-
// batch_iter_ at or right after the last key that was found in the returned
|
|
627
|
-
// SST file
|
|
628
|
-
MultiGetRange::Iterator batch_iter_;
|
|
629
|
-
// An iterator that records the previous position of batch_iter_, i.e last
|
|
630
|
-
// key found in the previous SST file, in order to serve as the start of
|
|
631
|
-
// the batch key range for the next SST file
|
|
632
|
-
MultiGetRange::Iterator batch_iter_prev_;
|
|
633
|
-
MultiGetRange::Iterator upper_key_;
|
|
634
|
-
bool maybe_repeat_key_;
|
|
635
|
-
MultiGetRange current_level_range_;
|
|
636
|
-
MultiGetRange current_file_range_;
|
|
637
|
-
autovector<LevelFilesBrief>* level_files_brief_;
|
|
638
|
-
bool search_ended_;
|
|
639
|
-
bool is_hit_file_last_in_level_;
|
|
640
|
-
LevelFilesBrief* curr_file_level_;
|
|
641
|
-
FileIndexer* file_indexer_;
|
|
642
|
-
const Comparator* user_comparator_;
|
|
643
|
-
const InternalKeyComparator* internal_comparator_;
|
|
644
|
-
|
|
645
707
|
// Setup local variables to search next level.
|
|
646
708
|
// Returns false if there are no more levels to search.
|
|
647
709
|
bool PrepareNextLevel() {
|
|
@@ -692,7 +754,7 @@ class FilePickerMultiGet {
|
|
|
692
754
|
// are always compacted into a single entry).
|
|
693
755
|
int32_t start_index = -1;
|
|
694
756
|
current_level_range_ =
|
|
695
|
-
MultiGetRange(
|
|
757
|
+
MultiGetRange(range_, range_.begin(), range_.end());
|
|
696
758
|
for (auto mget_iter = current_level_range_.begin();
|
|
697
759
|
mget_iter != current_level_range_.end(); ++mget_iter) {
|
|
698
760
|
struct FilePickerContext& fp_ctx = fp_ctx_array_[mget_iter.index()];
|
|
@@ -754,7 +816,6 @@ class FilePickerMultiGet {
|
|
|
754
816
|
return false;
|
|
755
817
|
}
|
|
756
818
|
};
|
|
757
|
-
} // anonymous namespace
|
|
758
819
|
|
|
759
820
|
VersionStorageInfo::~VersionStorageInfo() { delete[] files_; }
|
|
760
821
|
|
|
@@ -1969,7 +2030,8 @@ void Version::MultiGetBlob(
|
|
|
1969
2030
|
}
|
|
1970
2031
|
|
|
1971
2032
|
void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|
1972
|
-
PinnableSlice* value,
|
|
2033
|
+
PinnableSlice* value, PinnableWideColumns* columns,
|
|
2034
|
+
std::string* timestamp, Status* status,
|
|
1973
2035
|
MergeContext* merge_context,
|
|
1974
2036
|
SequenceNumber* max_covering_tombstone_seq,
|
|
1975
2037
|
PinnedIteratorsManager* pinned_iters_mgr, bool* value_found,
|
|
@@ -2002,8 +2064,9 @@ void Version::Get(const ReadOptions& read_options, const LookupKey& k,
|
|
|
2002
2064
|
GetContext get_context(
|
|
2003
2065
|
user_comparator(), merge_operator_, info_log_, db_statistics_,
|
|
2004
2066
|
status->ok() ? GetContext::kNotFound : GetContext::kMerge, user_key,
|
|
2005
|
-
do_merge ? value : nullptr, do_merge ?
|
|
2006
|
-
|
|
2067
|
+
do_merge ? value : nullptr, do_merge ? columns : nullptr,
|
|
2068
|
+
do_merge ? timestamp : nullptr, value_found, merge_context, do_merge,
|
|
2069
|
+
max_covering_tombstone_seq, clock_, seq,
|
|
2007
2070
|
merge_operator_ ? pinned_iters_mgr : nullptr, callback, is_blob_to_use,
|
|
2008
2071
|
tracing_get_id, &blob_fetcher);
|
|
2009
2072
|
|
|
@@ -2171,9 +2234,10 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2171
2234
|
get_ctx.emplace_back(
|
|
2172
2235
|
user_comparator(), merge_operator_, info_log_, db_statistics_,
|
|
2173
2236
|
iter->s->ok() ? GetContext::kNotFound : GetContext::kMerge,
|
|
2174
|
-
iter->ukey_with_ts, iter->value, iter->timestamp,
|
|
2175
|
-
&(iter->merge_context), true,
|
|
2176
|
-
|
|
2237
|
+
iter->ukey_with_ts, iter->value, /*columns=*/nullptr, iter->timestamp,
|
|
2238
|
+
nullptr, &(iter->merge_context), true,
|
|
2239
|
+
&iter->max_covering_tombstone_seq, clock_, nullptr,
|
|
2240
|
+
merge_operator_ ? &pinned_iters_mgr : nullptr, callback,
|
|
2177
2241
|
&iter->is_blob_index, tracing_mget_id, &blob_fetcher);
|
|
2178
2242
|
// MergeInProgress status, if set, has been transferred to the get_context
|
|
2179
2243
|
// state, so we set status to ok here. From now on, the iter status will
|
|
@@ -2187,148 +2251,162 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2187
2251
|
iter->get_context = &(get_ctx[get_ctx_index]);
|
|
2188
2252
|
}
|
|
2189
2253
|
|
|
2190
|
-
MultiGetRange file_picker_range(*range, range->begin(), range->end());
|
|
2191
|
-
FilePickerMultiGet fp(
|
|
2192
|
-
&file_picker_range,
|
|
2193
|
-
&storage_info_.level_files_brief_, storage_info_.num_non_empty_levels_,
|
|
2194
|
-
&storage_info_.file_indexer_, user_comparator(), internal_comparator());
|
|
2195
|
-
FdWithKeyRange* f = fp.GetNextFileInLevel();
|
|
2196
2254
|
Status s;
|
|
2197
|
-
uint64_t num_index_read = 0;
|
|
2198
|
-
uint64_t num_filter_read = 0;
|
|
2199
|
-
uint64_t num_sst_read = 0;
|
|
2200
|
-
uint64_t num_level_read = 0;
|
|
2201
|
-
|
|
2202
|
-
MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
|
|
2203
2255
|
// blob_file => [[blob_idx, it], ...]
|
|
2204
2256
|
std::unordered_map<uint64_t, BlobReadContexts> blob_ctxs;
|
|
2205
|
-
|
|
2206
|
-
|
|
2207
|
-
while (!fp.IsSearchEnded()) {
|
|
2208
|
-
// This will be set to true later if we actually look up in a file in L0.
|
|
2209
|
-
// For per level stats purposes, an L0 file is treated as a level
|
|
2210
|
-
bool dump_stats_for_l0_file = false;
|
|
2211
|
-
|
|
2212
|
-
// Avoid using the coroutine version if we're looking in a L0 file, since
|
|
2213
|
-
// L0 files won't be parallelized anyway. The regular synchronous version
|
|
2214
|
-
// is faster.
|
|
2215
|
-
if (!read_options.async_io || !using_coroutines() ||
|
|
2216
|
-
fp.GetHitFileLevel() == 0 || !fp.RemainingOverlapInLevel()) {
|
|
2217
|
-
if (f) {
|
|
2218
|
-
bool skip_filters = IsFilterSkipped(
|
|
2219
|
-
static_cast<int>(fp.GetHitFileLevel()), fp.IsHitFileLastInLevel());
|
|
2220
|
-
// Call MultiGetFromSST for looking up a single file
|
|
2221
|
-
s = MultiGetFromSST(read_options, fp.CurrentFileRange(),
|
|
2222
|
-
fp.GetHitFileLevel(), skip_filters, f, blob_ctxs,
|
|
2223
|
-
/*table_handle=*/nullptr, num_filter_read,
|
|
2224
|
-
num_index_read, num_sst_read);
|
|
2225
|
-
if (fp.GetHitFileLevel() == 0) {
|
|
2226
|
-
dump_stats_for_l0_file = true;
|
|
2227
|
-
}
|
|
2228
|
-
}
|
|
2229
|
-
if (s.ok()) {
|
|
2230
|
-
f = fp.GetNextFileInLevel();
|
|
2231
|
-
}
|
|
2257
|
+
MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
|
|
2232
2258
|
#if USE_COROUTINES
|
|
2233
|
-
|
|
2234
|
-
|
|
2235
|
-
|
|
2236
|
-
|
|
2237
|
-
|
|
2238
|
-
|
|
2239
|
-
|
|
2240
|
-
|
|
2241
|
-
|
|
2242
|
-
|
|
2243
|
-
|
|
2244
|
-
|
|
2245
|
-
|
|
2246
|
-
|
|
2247
|
-
|
|
2248
|
-
|
|
2249
|
-
|
|
2259
|
+
if (read_options.async_io && read_options.optimize_multiget_for_io &&
|
|
2260
|
+
using_coroutines()) {
|
|
2261
|
+
s = MultiGetAsync(read_options, range, &blob_ctxs);
|
|
2262
|
+
} else
|
|
2263
|
+
#endif // USE_COROUTINES
|
|
2264
|
+
{
|
|
2265
|
+
MultiGetRange file_picker_range(*range, range->begin(), range->end());
|
|
2266
|
+
FilePickerMultiGet fp(&file_picker_range, &storage_info_.level_files_brief_,
|
|
2267
|
+
storage_info_.num_non_empty_levels_,
|
|
2268
|
+
&storage_info_.file_indexer_, user_comparator(),
|
|
2269
|
+
internal_comparator());
|
|
2270
|
+
FdWithKeyRange* f = fp.GetNextFileInLevel();
|
|
2271
|
+
uint64_t num_index_read = 0;
|
|
2272
|
+
uint64_t num_filter_read = 0;
|
|
2273
|
+
uint64_t num_sst_read = 0;
|
|
2274
|
+
uint64_t num_level_read = 0;
|
|
2275
|
+
|
|
2276
|
+
int prev_level = -1;
|
|
2277
|
+
|
|
2278
|
+
while (!fp.IsSearchEnded()) {
|
|
2279
|
+
// This will be set to true later if we actually look up in a file in L0.
|
|
2280
|
+
// For per level stats purposes, an L0 file is treated as a level
|
|
2281
|
+
bool dump_stats_for_l0_file = false;
|
|
2282
|
+
|
|
2283
|
+
// Avoid using the coroutine version if we're looking in a L0 file, since
|
|
2284
|
+
// L0 files won't be parallelized anyway. The regular synchronous version
|
|
2285
|
+
// is faster.
|
|
2286
|
+
if (!read_options.async_io || !using_coroutines() ||
|
|
2287
|
+
fp.GetHitFileLevel() == 0 || !fp.RemainingOverlapInLevel()) {
|
|
2288
|
+
if (f) {
|
|
2289
|
+
bool skip_filters =
|
|
2290
|
+
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
|
2291
|
+
fp.IsHitFileLastInLevel());
|
|
2292
|
+
// Call MultiGetFromSST for looking up a single file
|
|
2293
|
+
s = MultiGetFromSST(read_options, fp.CurrentFileRange(),
|
|
2294
|
+
fp.GetHitFileLevel(), skip_filters,
|
|
2295
|
+
/*skip_range_deletions=*/false, f, blob_ctxs,
|
|
2296
|
+
/*table_handle=*/nullptr, num_filter_read,
|
|
2297
|
+
num_index_read, num_sst_read);
|
|
2298
|
+
if (fp.GetHitFileLevel() == 0) {
|
|
2299
|
+
dump_stats_for_l0_file = true;
|
|
2250
2300
|
}
|
|
2251
2301
|
}
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
break;
|
|
2302
|
+
if (s.ok()) {
|
|
2303
|
+
f = fp.GetNextFileInLevel();
|
|
2255
2304
|
}
|
|
2305
|
+
#if USE_COROUTINES
|
|
2306
|
+
} else {
|
|
2307
|
+
std::vector<folly::coro::Task<Status>> mget_tasks;
|
|
2308
|
+
while (f != nullptr) {
|
|
2309
|
+
MultiGetRange file_range = fp.CurrentFileRange();
|
|
2310
|
+
Cache::Handle* table_handle = nullptr;
|
|
2311
|
+
bool skip_filters =
|
|
2312
|
+
IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
|
2313
|
+
fp.IsHitFileLastInLevel());
|
|
2314
|
+
bool skip_range_deletions = false;
|
|
2315
|
+
if (!skip_filters) {
|
|
2316
|
+
Status status = table_cache_->MultiGetFilter(
|
|
2317
|
+
read_options, *internal_comparator(), *f->file_metadata,
|
|
2318
|
+
mutable_cf_options_.prefix_extractor,
|
|
2319
|
+
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
|
2320
|
+
fp.GetHitFileLevel(), &file_range, &table_handle);
|
|
2321
|
+
skip_range_deletions = true;
|
|
2322
|
+
if (status.ok()) {
|
|
2323
|
+
skip_filters = true;
|
|
2324
|
+
} else if (!status.IsNotSupported()) {
|
|
2325
|
+
s = status;
|
|
2326
|
+
}
|
|
2327
|
+
}
|
|
2256
2328
|
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
read_options, file_range, fp.GetHitFileLevel(), skip_filters, f,
|
|
2260
|
-
blob_ctxs, table_handle, num_filter_read, num_index_read,
|
|
2261
|
-
num_sst_read));
|
|
2262
|
-
}
|
|
2263
|
-
if (fp.KeyMaySpanNextFile()) {
|
|
2264
|
-
break;
|
|
2265
|
-
}
|
|
2266
|
-
f = fp.GetNextFileInLevel();
|
|
2267
|
-
}
|
|
2268
|
-
if (s.ok() && mget_tasks.size() > 0) {
|
|
2269
|
-
RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT, mget_tasks.size());
|
|
2270
|
-
// Collect all results so far
|
|
2271
|
-
std::vector<Status> statuses = folly::coro::blockingWait(
|
|
2272
|
-
folly::coro::collectAllRange(std::move(mget_tasks))
|
|
2273
|
-
.scheduleOn(&range->context()->executor()));
|
|
2274
|
-
for (Status stat : statuses) {
|
|
2275
|
-
if (!stat.ok()) {
|
|
2276
|
-
s = stat;
|
|
2329
|
+
if (!s.ok()) {
|
|
2330
|
+
break;
|
|
2277
2331
|
}
|
|
2278
|
-
}
|
|
2279
2332
|
|
|
2280
|
-
|
|
2333
|
+
if (!file_range.empty()) {
|
|
2334
|
+
mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
|
|
2335
|
+
read_options, file_range, fp.GetHitFileLevel(), skip_filters,
|
|
2336
|
+
skip_range_deletions, f, blob_ctxs, table_handle,
|
|
2337
|
+
num_filter_read, num_index_read, num_sst_read));
|
|
2338
|
+
}
|
|
2339
|
+
if (fp.KeyMaySpanNextFile()) {
|
|
2340
|
+
break;
|
|
2341
|
+
}
|
|
2281
2342
|
f = fp.GetNextFileInLevel();
|
|
2282
2343
|
}
|
|
2283
|
-
|
|
2344
|
+
if (s.ok() && mget_tasks.size() > 0) {
|
|
2345
|
+
RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT,
|
|
2346
|
+
mget_tasks.size());
|
|
2347
|
+
// Collect all results so far
|
|
2348
|
+
std::vector<Status> statuses = folly::coro::blockingWait(
|
|
2349
|
+
folly::coro::collectAllRange(std::move(mget_tasks))
|
|
2350
|
+
.scheduleOn(&range->context()->executor()));
|
|
2351
|
+
for (Status stat : statuses) {
|
|
2352
|
+
if (!stat.ok()) {
|
|
2353
|
+
s = stat;
|
|
2354
|
+
}
|
|
2355
|
+
}
|
|
2356
|
+
|
|
2357
|
+
if (s.ok() && fp.KeyMaySpanNextFile()) {
|
|
2358
|
+
f = fp.GetNextFileInLevel();
|
|
2359
|
+
}
|
|
2360
|
+
}
|
|
2284
2361
|
#endif // USE_COROUTINES
|
|
2285
|
-
|
|
2286
|
-
|
|
2287
|
-
|
|
2288
|
-
|
|
2289
|
-
|
|
2290
|
-
|
|
2291
|
-
|
|
2292
|
-
|
|
2293
|
-
|
|
2294
|
-
|
|
2295
|
-
|
|
2296
|
-
}
|
|
2297
|
-
if (dump_stats_for_l0_file ||
|
|
2298
|
-
(prev_level != 0 && prev_level != (int)fp.GetHitFileLevel())) {
|
|
2299
|
-
// Dump the stats if the search has moved to the next level and
|
|
2300
|
-
// reset for next level.
|
|
2301
|
-
if (num_filter_read + num_index_read) {
|
|
2302
|
-
RecordInHistogram(db_statistics_,
|
|
2303
|
-
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
|
|
2304
|
-
num_index_read + num_filter_read);
|
|
2362
|
+
}
|
|
2363
|
+
// If bad status or we found final result for all the keys
|
|
2364
|
+
if (!s.ok() || file_picker_range.empty()) {
|
|
2365
|
+
break;
|
|
2366
|
+
}
|
|
2367
|
+
if (!f) {
|
|
2368
|
+
// Reached the end of this level. Prepare the next level
|
|
2369
|
+
fp.PrepareNextLevelForSearch();
|
|
2370
|
+
if (!fp.IsSearchEnded()) {
|
|
2371
|
+
// Its possible there is no overlap on this level and f is nullptr
|
|
2372
|
+
f = fp.GetNextFileInLevel();
|
|
2305
2373
|
}
|
|
2306
|
-
if (
|
|
2307
|
-
|
|
2308
|
-
|
|
2309
|
-
|
|
2374
|
+
if (dump_stats_for_l0_file ||
|
|
2375
|
+
(prev_level != 0 && prev_level != (int)fp.GetHitFileLevel())) {
|
|
2376
|
+
// Dump the stats if the search has moved to the next level and
|
|
2377
|
+
// reset for next level.
|
|
2378
|
+
if (num_filter_read + num_index_read) {
|
|
2379
|
+
RecordInHistogram(db_statistics_,
|
|
2380
|
+
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
|
|
2381
|
+
num_index_read + num_filter_read);
|
|
2382
|
+
}
|
|
2383
|
+
if (num_sst_read) {
|
|
2384
|
+
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL,
|
|
2385
|
+
num_sst_read);
|
|
2386
|
+
num_level_read++;
|
|
2387
|
+
}
|
|
2388
|
+
num_filter_read = 0;
|
|
2389
|
+
num_index_read = 0;
|
|
2390
|
+
num_sst_read = 0;
|
|
2310
2391
|
}
|
|
2311
|
-
|
|
2312
|
-
num_index_read = 0;
|
|
2313
|
-
num_sst_read = 0;
|
|
2392
|
+
prev_level = fp.GetHitFileLevel();
|
|
2314
2393
|
}
|
|
2315
|
-
prev_level = fp.GetHitFileLevel();
|
|
2316
2394
|
}
|
|
2317
|
-
}
|
|
2318
2395
|
|
|
2319
|
-
|
|
2320
|
-
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
|
|
2327
|
-
|
|
2328
|
-
|
|
2329
|
-
|
|
2330
|
-
|
|
2331
|
-
|
|
2396
|
+
// Dump stats for most recent level
|
|
2397
|
+
if (num_filter_read + num_index_read) {
|
|
2398
|
+
RecordInHistogram(db_statistics_,
|
|
2399
|
+
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
|
|
2400
|
+
num_index_read + num_filter_read);
|
|
2401
|
+
}
|
|
2402
|
+
if (num_sst_read) {
|
|
2403
|
+
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
|
|
2404
|
+
num_level_read++;
|
|
2405
|
+
}
|
|
2406
|
+
if (num_level_read) {
|
|
2407
|
+
RecordInHistogram(db_statistics_, NUM_LEVEL_READ_PER_MULTIGET,
|
|
2408
|
+
num_level_read);
|
|
2409
|
+
}
|
|
2332
2410
|
}
|
|
2333
2411
|
|
|
2334
2412
|
if (s.ok() && !blob_ctxs.empty()) {
|
|
@@ -2380,6 +2458,201 @@ void Version::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
2380
2458
|
}
|
|
2381
2459
|
}
|
|
2382
2460
|
|
|
2461
|
+
#ifdef USE_COROUTINES
|
|
2462
|
+
Status Version::ProcessBatch(
|
|
2463
|
+
const ReadOptions& read_options, FilePickerMultiGet* batch,
|
|
2464
|
+
std::vector<folly::coro::Task<Status>>& mget_tasks,
|
|
2465
|
+
std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs,
|
|
2466
|
+
autovector<FilePickerMultiGet, 4>& batches, std::deque<size_t>& waiting,
|
|
2467
|
+
std::deque<size_t>& to_process, unsigned int& num_tasks_queued,
|
|
2468
|
+
uint64_t& num_filter_read, uint64_t& num_index_read,
|
|
2469
|
+
uint64_t& num_sst_read) {
|
|
2470
|
+
FilePickerMultiGet& fp = *batch;
|
|
2471
|
+
MultiGetRange range = fp.GetRange();
|
|
2472
|
+
// Initialize a new empty range. Any keys that are not in this level will
|
|
2473
|
+
// eventually become part of the new range.
|
|
2474
|
+
MultiGetRange leftover(range, range.begin(), range.begin());
|
|
2475
|
+
FdWithKeyRange* f = nullptr;
|
|
2476
|
+
Status s;
|
|
2477
|
+
|
|
2478
|
+
f = fp.GetNextFileInLevel();
|
|
2479
|
+
while (!f) {
|
|
2480
|
+
fp.PrepareNextLevelForSearch();
|
|
2481
|
+
if (!fp.IsSearchEnded()) {
|
|
2482
|
+
f = fp.GetNextFileInLevel();
|
|
2483
|
+
} else {
|
|
2484
|
+
break;
|
|
2485
|
+
}
|
|
2486
|
+
}
|
|
2487
|
+
while (f) {
|
|
2488
|
+
MultiGetRange file_range = fp.CurrentFileRange();
|
|
2489
|
+
Cache::Handle* table_handle = nullptr;
|
|
2490
|
+
bool skip_filters = IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
|
|
2491
|
+
fp.IsHitFileLastInLevel());
|
|
2492
|
+
bool skip_range_deletions = false;
|
|
2493
|
+
if (!skip_filters) {
|
|
2494
|
+
Status status = table_cache_->MultiGetFilter(
|
|
2495
|
+
read_options, *internal_comparator(), *f->file_metadata,
|
|
2496
|
+
mutable_cf_options_.prefix_extractor,
|
|
2497
|
+
cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
|
|
2498
|
+
fp.GetHitFileLevel(), &file_range, &table_handle);
|
|
2499
|
+
if (status.ok()) {
|
|
2500
|
+
skip_filters = true;
|
|
2501
|
+
skip_range_deletions = true;
|
|
2502
|
+
} else if (!status.IsNotSupported()) {
|
|
2503
|
+
s = status;
|
|
2504
|
+
}
|
|
2505
|
+
}
|
|
2506
|
+
if (!s.ok()) {
|
|
2507
|
+
break;
|
|
2508
|
+
}
|
|
2509
|
+
// At this point, file_range contains any keys that are likely in this
|
|
2510
|
+
// file. It may have false positives, but that's ok since higher level
|
|
2511
|
+
// lookups for the key are dependent on this lookup anyway.
|
|
2512
|
+
// Add the complement of file_range to leftover. That's the set of keys
|
|
2513
|
+
// definitely not in this level.
|
|
2514
|
+
// Subtract the complement of file_range from range, since they will be
|
|
2515
|
+
// processed in a separate batch in parallel.
|
|
2516
|
+
leftover += ~file_range;
|
|
2517
|
+
range -= ~file_range;
|
|
2518
|
+
if (!file_range.empty()) {
|
|
2519
|
+
if (waiting.empty() && to_process.empty() &&
|
|
2520
|
+
!fp.RemainingOverlapInLevel() && leftover.empty() &&
|
|
2521
|
+
mget_tasks.empty()) {
|
|
2522
|
+
// All keys are in one SST file, so take the fast path
|
|
2523
|
+
s = MultiGetFromSST(read_options, file_range, fp.GetHitFileLevel(),
|
|
2524
|
+
skip_filters, skip_range_deletions, f, *blob_ctxs,
|
|
2525
|
+
table_handle, num_filter_read, num_index_read,
|
|
2526
|
+
num_sst_read);
|
|
2527
|
+
} else {
|
|
2528
|
+
mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
|
|
2529
|
+
read_options, file_range, fp.GetHitFileLevel(), skip_filters,
|
|
2530
|
+
skip_range_deletions, f, *blob_ctxs, table_handle, num_filter_read,
|
|
2531
|
+
num_index_read, num_sst_read));
|
|
2532
|
+
++num_tasks_queued;
|
|
2533
|
+
}
|
|
2534
|
+
}
|
|
2535
|
+
if (fp.KeyMaySpanNextFile() && !file_range.empty()) {
|
|
2536
|
+
break;
|
|
2537
|
+
}
|
|
2538
|
+
f = fp.GetNextFileInLevel();
|
|
2539
|
+
}
|
|
2540
|
+
// Split the current batch only if some keys are likely in this level and
|
|
2541
|
+
// some are not.
|
|
2542
|
+
if (s.ok() && !leftover.empty() && !range.empty()) {
|
|
2543
|
+
fp.ReplaceRange(range);
|
|
2544
|
+
batches.emplace_back(&leftover, fp);
|
|
2545
|
+
to_process.emplace_back(batches.size() - 1);
|
|
2546
|
+
}
|
|
2547
|
+
// 1. If f is non-null, that means we might not be done with this level.
|
|
2548
|
+
// This can happen if one of the keys is the last key in the file, i.e
|
|
2549
|
+
// fp.KeyMaySpanNextFile() is true.
|
|
2550
|
+
// 2. If range is empty, then we're done with this range and no need to
|
|
2551
|
+
// prepare the next level
|
|
2552
|
+
// 3. If some tasks were queued for this range, then the next level will be
|
|
2553
|
+
// prepared after executing those tasks
|
|
2554
|
+
if (!f && !range.empty() && !num_tasks_queued) {
|
|
2555
|
+
fp.PrepareNextLevelForSearch();
|
|
2556
|
+
}
|
|
2557
|
+
return s;
|
|
2558
|
+
}
|
|
2559
|
+
|
|
2560
|
+
Status Version::MultiGetAsync(
|
|
2561
|
+
const ReadOptions& options, MultiGetRange* range,
|
|
2562
|
+
std::unordered_map<uint64_t, BlobReadContexts>* blob_ctxs) {
|
|
2563
|
+
autovector<FilePickerMultiGet, 4> batches;
|
|
2564
|
+
std::deque<size_t> waiting;
|
|
2565
|
+
std::deque<size_t> to_process;
|
|
2566
|
+
Status s;
|
|
2567
|
+
std::vector<folly::coro::Task<Status>> mget_tasks;
|
|
2568
|
+
uint64_t num_filter_read = 0;
|
|
2569
|
+
uint64_t num_index_read = 0;
|
|
2570
|
+
uint64_t num_sst_read = 0;
|
|
2571
|
+
|
|
2572
|
+
// Create the initial batch with the input range
|
|
2573
|
+
batches.emplace_back(range, &storage_info_.level_files_brief_,
|
|
2574
|
+
storage_info_.num_non_empty_levels_,
|
|
2575
|
+
&storage_info_.file_indexer_, user_comparator(),
|
|
2576
|
+
internal_comparator());
|
|
2577
|
+
to_process.emplace_back(0);
|
|
2578
|
+
|
|
2579
|
+
while (!to_process.empty()) {
|
|
2580
|
+
size_t idx = to_process.front();
|
|
2581
|
+
FilePickerMultiGet* batch = &batches.at(idx);
|
|
2582
|
+
unsigned int num_tasks_queued = 0;
|
|
2583
|
+
to_process.pop_front();
|
|
2584
|
+
if (batch->IsSearchEnded() || batch->GetRange().empty()) {
|
|
2585
|
+
if (!to_process.empty()) {
|
|
2586
|
+
continue;
|
|
2587
|
+
}
|
|
2588
|
+
} else {
|
|
2589
|
+
// Look through one level. This may split the batch and enqueue it to
|
|
2590
|
+
// to_process
|
|
2591
|
+
s = ProcessBatch(options, batch, mget_tasks, blob_ctxs, batches, waiting,
|
|
2592
|
+
to_process, num_tasks_queued, num_filter_read,
|
|
2593
|
+
num_index_read, num_sst_read);
|
|
2594
|
+
if (!s.ok()) {
|
|
2595
|
+
break;
|
|
2596
|
+
}
|
|
2597
|
+
// Dump the stats since the search has moved to the next level
|
|
2598
|
+
if (num_filter_read + num_index_read) {
|
|
2599
|
+
RecordInHistogram(db_statistics_,
|
|
2600
|
+
NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
|
|
2601
|
+
num_index_read + num_filter_read);
|
|
2602
|
+
}
|
|
2603
|
+
if (num_sst_read) {
|
|
2604
|
+
RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
|
|
2605
|
+
}
|
|
2606
|
+
// If ProcessBatch didn't enqueue any coroutine tasks, it means all
|
|
2607
|
+
// keys were filtered out. So put the batch back in to_process to
|
|
2608
|
+
// lookup in the next level
|
|
2609
|
+
if (!num_tasks_queued && !batch->IsSearchEnded()) {
|
|
2610
|
+
// Put this back in the processing queue
|
|
2611
|
+
to_process.emplace_back(idx);
|
|
2612
|
+
} else if (num_tasks_queued) {
|
|
2613
|
+
waiting.emplace_back(idx);
|
|
2614
|
+
}
|
|
2615
|
+
}
|
|
2616
|
+
if (to_process.empty()) {
|
|
2617
|
+
if (s.ok() && mget_tasks.size() > 0) {
|
|
2618
|
+
assert(waiting.size());
|
|
2619
|
+
RecordTick(db_statistics_, MULTIGET_COROUTINE_COUNT, mget_tasks.size());
|
|
2620
|
+
// Collect all results so far
|
|
2621
|
+
std::vector<Status> statuses = folly::coro::blockingWait(
|
|
2622
|
+
folly::coro::collectAllRange(std::move(mget_tasks))
|
|
2623
|
+
.scheduleOn(&range->context()->executor()));
|
|
2624
|
+
for (Status stat : statuses) {
|
|
2625
|
+
if (!stat.ok()) {
|
|
2626
|
+
s = stat;
|
|
2627
|
+
break;
|
|
2628
|
+
}
|
|
2629
|
+
}
|
|
2630
|
+
|
|
2631
|
+
if (!s.ok()) {
|
|
2632
|
+
break;
|
|
2633
|
+
}
|
|
2634
|
+
|
|
2635
|
+
for (size_t wait_idx : waiting) {
|
|
2636
|
+
FilePickerMultiGet& fp = batches.at(wait_idx);
|
|
2637
|
+
// 1. If fp.GetHitFile() is non-null, then there could be more
|
|
2638
|
+
// overlap in this level. So skip preparing next level.
|
|
2639
|
+
// 2. If fp.GetRange() is empty, then this batch is completed
|
|
2640
|
+
// and no need to prepare the next level.
|
|
2641
|
+
if (!fp.GetHitFile() && !fp.GetRange().empty()) {
|
|
2642
|
+
fp.PrepareNextLevelForSearch();
|
|
2643
|
+
}
|
|
2644
|
+
}
|
|
2645
|
+
to_process.swap(waiting);
|
|
2646
|
+
} else {
|
|
2647
|
+
assert(!s.ok() || waiting.size() == 0);
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2650
|
+
}
|
|
2651
|
+
|
|
2652
|
+
return s;
|
|
2653
|
+
}
|
|
2654
|
+
#endif
|
|
2655
|
+
|
|
2383
2656
|
bool Version::IsFilterSkipped(int level, bool is_file_last_in_level) {
|
|
2384
2657
|
// Reaching the bottom level implies misses at all upper levels, so we'll
|
|
2385
2658
|
// skip checking the filters when we predict a hit.
|