@nxtedition/rocksdb 13.5.12 → 14.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +33 -2
- package/binding.gyp +2 -2
- package/chained-batch.js +9 -16
- package/deps/rocksdb/rocksdb/BUCK +18 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
- package/deps/rocksdb/rocksdb/Makefile +20 -9
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
- package/deps/rocksdb/rocksdb/db/c.cc +207 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
- package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
- package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
- package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
- package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
- package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
- package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
- package/deps/rocksdb/rocksdb/env/env.cc +12 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
- package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/options.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
- package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
- package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
- package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
- package/deps/rocksdb/rocksdb/table/format.cc +6 -12
- package/deps/rocksdb/rocksdb/table/format.h +10 -0
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
- package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
- package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
- package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
- package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
- package/deps/rocksdb/rocksdb/util/coding.h +3 -3
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
- package/deps/rocksdb/rocksdb/util/compression.h +5 -0
- package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
- package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
- package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
- package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
- package/index.js +3 -3
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/util.h +38 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
|
@@ -45,7 +45,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
45
45
|
need_upper_bound_check_(need_upper_bound_check),
|
|
46
46
|
async_read_in_progress_(false),
|
|
47
47
|
is_last_level_(table->IsLastLevel()),
|
|
48
|
-
block_iter_points_to_real_block_(false) {
|
|
48
|
+
block_iter_points_to_real_block_(false) {
|
|
49
|
+
multi_scan_status_.PermitUncheckedError();
|
|
50
|
+
}
|
|
49
51
|
|
|
50
52
|
~BlockBasedTableIterator() override { ClearBlockHandles(); }
|
|
51
53
|
|
|
@@ -57,7 +59,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
57
59
|
bool NextAndGetResult(IterateResult* result) override;
|
|
58
60
|
void Prev() override;
|
|
59
61
|
bool Valid() const override {
|
|
60
|
-
return !is_out_of_bound_ &&
|
|
62
|
+
return !is_out_of_bound_ && multi_scan_status_.ok() &&
|
|
61
63
|
(is_at_first_key_from_index_ ||
|
|
62
64
|
(block_iter_points_to_real_block_ && block_iter_.Valid()));
|
|
63
65
|
}
|
|
@@ -136,6 +138,9 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
136
138
|
return block_iter_.value();
|
|
137
139
|
}
|
|
138
140
|
Status status() const override {
|
|
141
|
+
if (!multi_scan_status_.ok()) {
|
|
142
|
+
return multi_scan_status_;
|
|
143
|
+
}
|
|
139
144
|
// In case of block cache readahead lookup, it won't add the block to
|
|
140
145
|
// block_handles if it's index is invalid. So index_iter_->status check can
|
|
141
146
|
// be skipped.
|
|
@@ -145,10 +150,13 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
145
150
|
assert(!multi_scan_);
|
|
146
151
|
return index_iter_->status();
|
|
147
152
|
} else if (block_iter_points_to_real_block_) {
|
|
153
|
+
// This is the common case.
|
|
148
154
|
return block_iter_.status();
|
|
149
155
|
} else if (async_read_in_progress_) {
|
|
150
156
|
assert(!multi_scan_);
|
|
151
157
|
return Status::TryAgain("Async read in progress");
|
|
158
|
+
} else if (multi_scan_) {
|
|
159
|
+
return multi_scan_status_;
|
|
152
160
|
} else {
|
|
153
161
|
return Status::OK();
|
|
154
162
|
}
|
|
@@ -160,6 +168,8 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
160
168
|
} else if (block_upper_bound_check_ ==
|
|
161
169
|
BlockUpperBound::kUpperBoundBeyondCurBlock) {
|
|
162
170
|
assert(!is_out_of_bound_);
|
|
171
|
+
// MultiScan does not do block level upper bound check yet.
|
|
172
|
+
assert(!multi_scan_);
|
|
163
173
|
return IterBoundCheck::kInbound;
|
|
164
174
|
} else {
|
|
165
175
|
return IterBoundCheck::kUnknown;
|
|
@@ -225,7 +235,7 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
225
235
|
}
|
|
226
236
|
}
|
|
227
237
|
|
|
228
|
-
void Prepare(const
|
|
238
|
+
void Prepare(const MultiScanArgs* scan_opts) override;
|
|
229
239
|
|
|
230
240
|
FilePrefetchBuffer* prefetch_buffer() {
|
|
231
241
|
return block_prefetcher_.prefetch_buffer();
|
|
@@ -233,6 +243,16 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
233
243
|
|
|
234
244
|
std::unique_ptr<InternalIteratorBase<IndexValue>> index_iter_;
|
|
235
245
|
|
|
246
|
+
bool TEST_IsBlockPinnedByMultiScan(size_t block_idx) {
|
|
247
|
+
if (!multi_scan_) {
|
|
248
|
+
return false;
|
|
249
|
+
}
|
|
250
|
+
if (block_idx >= multi_scan_->pinned_data_blocks.size()) {
|
|
251
|
+
return false;
|
|
252
|
+
}
|
|
253
|
+
return !multi_scan_->pinned_data_blocks[block_idx].IsEmpty();
|
|
254
|
+
}
|
|
255
|
+
|
|
236
256
|
private:
|
|
237
257
|
enum class IterDirection {
|
|
238
258
|
kForward,
|
|
@@ -371,29 +391,107 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
371
391
|
// *** END States used by both regular scan and multiscan
|
|
372
392
|
|
|
373
393
|
// *** BEGIN MultiScan related states ***
|
|
394
|
+
struct AsyncReadState {
|
|
395
|
+
std::unique_ptr<char[]> buf{nullptr};
|
|
396
|
+
// Indices into pinned_data_blocks that this request reads.
|
|
397
|
+
std::vector<size_t> block_indices;
|
|
398
|
+
// BlockHandle for each block in block_indices.
|
|
399
|
+
std::vector<BlockHandle> blocks;
|
|
400
|
+
void* io_handle{nullptr};
|
|
401
|
+
IOHandleDeleter del_fn{nullptr};
|
|
402
|
+
// offset for this async read request.
|
|
403
|
+
uint64_t offset{0};
|
|
404
|
+
|
|
405
|
+
// These two states are populated from the FSReadRequest
|
|
406
|
+
// by ReadAsync callback
|
|
407
|
+
Status status;
|
|
408
|
+
Slice result;
|
|
409
|
+
|
|
410
|
+
// For direct I/O support
|
|
411
|
+
AlignedBuf aligned_buf{nullptr};
|
|
412
|
+
|
|
413
|
+
bool finished{false};
|
|
414
|
+
|
|
415
|
+
AsyncReadState() = default;
|
|
416
|
+
DECLARE_DEFAULT_MOVES(AsyncReadState);
|
|
417
|
+
// Delete copy operations
|
|
418
|
+
AsyncReadState(const AsyncReadState&) = delete;
|
|
419
|
+
AsyncReadState& operator=(const AsyncReadState&) = delete;
|
|
420
|
+
|
|
421
|
+
void CleanUpIOHandle() {
|
|
422
|
+
if (io_handle != nullptr) {
|
|
423
|
+
assert(del_fn);
|
|
424
|
+
del_fn(io_handle);
|
|
425
|
+
io_handle = nullptr;
|
|
426
|
+
}
|
|
427
|
+
finished = true;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
~AsyncReadState() {
|
|
431
|
+
// Should be cleaned up before destruction.
|
|
432
|
+
assert(io_handle == nullptr);
|
|
433
|
+
}
|
|
434
|
+
};
|
|
435
|
+
|
|
374
436
|
struct MultiScanState {
|
|
375
|
-
//
|
|
376
|
-
const std::
|
|
437
|
+
// For Aborting async I/Os in destructor.
|
|
438
|
+
const std::shared_ptr<FileSystem> fs;
|
|
439
|
+
const MultiScanArgs* scan_opts;
|
|
377
440
|
std::vector<CachableEntry<Block>> pinned_data_blocks;
|
|
378
|
-
|
|
379
|
-
//
|
|
380
|
-
//
|
|
441
|
+
// The separator of each data block in above pinned_data_blocks vector.
|
|
442
|
+
// Its size is same as pinned_data_blocks.
|
|
443
|
+
// The value of separator is larger than or equal to the last key in the
|
|
444
|
+
// corresponding data block.
|
|
445
|
+
std::vector<std::string> data_block_separators;
|
|
446
|
+
// Track previously seeked key in multi-scan.
|
|
447
|
+
// This is used to ensure that the seek key is keep moving forward, as
|
|
448
|
+
// blocks that are smaller than the seek key are unpinned from memory.
|
|
449
|
+
std::string prev_seek_key_;
|
|
450
|
+
|
|
451
|
+
// Indicies into pinned_data_blocks for data blocks for each scan range.
|
|
381
452
|
// inclusive start, exclusive end
|
|
382
|
-
std::vector<std::tuple<size_t, size_t>>
|
|
453
|
+
std::vector<std::tuple<size_t, size_t>> block_index_ranges_per_scan;
|
|
383
454
|
size_t next_scan_idx;
|
|
384
455
|
size_t cur_data_block_idx;
|
|
385
456
|
|
|
457
|
+
// States for async reads.
|
|
458
|
+
//
|
|
459
|
+
// Each async state correspond to an async read request.
|
|
460
|
+
// Each async read request may read content for multiple blocks
|
|
461
|
+
// (potentially coalesced). In PollForBlock(idx), we will poll for the
|
|
462
|
+
// completion of the async read request responsible for
|
|
463
|
+
// pinned_data_blocks[idx], and populate `pinned_data_blocks` with all the
|
|
464
|
+
// blocks read. To find out the async read request responsible for
|
|
465
|
+
// pinned_data_blocks[idx], we store the mapping in
|
|
466
|
+
// block_idx_to_readreq_idx. Index i is in block_idx_to_readreq_idx and
|
|
467
|
+
// block_idx_to_readreq_idx[i] = j iff pinned_data_blocks[i] is read by
|
|
468
|
+
// async_states[j].
|
|
469
|
+
std::vector<AsyncReadState> async_states;
|
|
470
|
+
UnorderedMap<size_t, size_t> block_idx_to_readreq_idx;
|
|
471
|
+
size_t prefetch_max_idx;
|
|
472
|
+
|
|
386
473
|
MultiScanState(
|
|
387
|
-
const std::
|
|
474
|
+
const std::shared_ptr<FileSystem>& _fs, const MultiScanArgs* _scan_opts,
|
|
388
475
|
std::vector<CachableEntry<Block>>&& _pinned_data_blocks,
|
|
389
|
-
std::vector<std::
|
|
390
|
-
|
|
476
|
+
std::vector<std::string>&& _data_block_separators,
|
|
477
|
+
std::vector<std::tuple<size_t, size_t>>&& _block_index_ranges_per_scan,
|
|
478
|
+
UnorderedMap<size_t, size_t>&& _block_idx_to_readreq_idx,
|
|
479
|
+
std::vector<AsyncReadState>&& _async_states, size_t _prefetch_max_idx)
|
|
480
|
+
: fs(_fs),
|
|
481
|
+
scan_opts(_scan_opts),
|
|
391
482
|
pinned_data_blocks(std::move(_pinned_data_blocks)),
|
|
392
|
-
|
|
483
|
+
data_block_separators(std::move(_data_block_separators)),
|
|
484
|
+
block_index_ranges_per_scan(std::move(_block_index_ranges_per_scan)),
|
|
393
485
|
next_scan_idx(0),
|
|
394
|
-
cur_data_block_idx(0)
|
|
486
|
+
cur_data_block_idx(0),
|
|
487
|
+
async_states(std::move(_async_states)),
|
|
488
|
+
block_idx_to_readreq_idx(std::move(_block_idx_to_readreq_idx)),
|
|
489
|
+
prefetch_max_idx(_prefetch_max_idx) {}
|
|
490
|
+
|
|
491
|
+
~MultiScanState();
|
|
395
492
|
};
|
|
396
493
|
|
|
494
|
+
Status multi_scan_status_;
|
|
397
495
|
std::unique_ptr<MultiScanState> multi_scan_;
|
|
398
496
|
// *** END MultiScan related APIs and states ***
|
|
399
497
|
|
|
@@ -513,10 +611,121 @@ class BlockBasedTableIterator : public InternalIteratorBase<Slice> {
|
|
|
513
611
|
// *** END APIs relevant to auto tuning of readahead_size ***
|
|
514
612
|
|
|
515
613
|
// *** BEGIN APIs relevant to multiscan ***
|
|
516
|
-
|
|
517
|
-
|
|
614
|
+
|
|
615
|
+
// Wrapper for SeekMultiScanImpl for handling out of bound
|
|
616
|
+
void SeekMultiScan(const Slice* target);
|
|
617
|
+
|
|
618
|
+
// Return true if the result is out of bound
|
|
619
|
+
bool SeekMultiScanImpl(const Slice* seek_target);
|
|
518
620
|
|
|
519
621
|
void FindBlockForwardInMultiScan();
|
|
622
|
+
|
|
623
|
+
// Unpins blocks from the immediately previous scan range.
|
|
624
|
+
void UnpinPreviousScanBlocks(size_t current_scan_idx);
|
|
625
|
+
|
|
626
|
+
void PrepareReadAsyncCallBack(FSReadRequest& req, void* cb_arg) {
|
|
627
|
+
// Record status, result and sanity check offset from `req`.
|
|
628
|
+
AsyncReadState* async_state = static_cast<AsyncReadState*>(cb_arg);
|
|
629
|
+
|
|
630
|
+
async_state->status = req.status;
|
|
631
|
+
async_state->result = req.result;
|
|
632
|
+
|
|
633
|
+
if (async_state->status.ok()) {
|
|
634
|
+
assert(async_state->offset == req.offset);
|
|
635
|
+
if (async_state->offset != req.offset) {
|
|
636
|
+
async_state->status = Status::InvalidArgument(
|
|
637
|
+
"offset mismatch between async read request " +
|
|
638
|
+
std::to_string(async_state->offset) + " and async callback " +
|
|
639
|
+
std::to_string(req.offset));
|
|
640
|
+
}
|
|
641
|
+
} else {
|
|
642
|
+
assert(async_state->status.IsAborted());
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
void MultiScanSeekTargetFromBlock(const Slice* seek_target, size_t block_idx);
|
|
647
|
+
void MultiScanUnexpectedSeekTarget(const Slice* seek_target,
|
|
648
|
+
size_t block_idx);
|
|
649
|
+
|
|
650
|
+
// Return true, if there is an error, or end of file
|
|
651
|
+
bool MultiScanLoadDataBlock(size_t idx) {
|
|
652
|
+
if (idx >= multi_scan_->prefetch_max_idx) {
|
|
653
|
+
// TODO: Fix the max_prefetch_size support for multiple files.
|
|
654
|
+
// The goal is to limit the memory usage, prefetch could be done
|
|
655
|
+
// incrementally.
|
|
656
|
+
if (multi_scan_->scan_opts->max_prefetch_size == 0) {
|
|
657
|
+
// If max_prefetch_size is not set, treat this as end of file.
|
|
658
|
+
ResetDataIter();
|
|
659
|
+
assert(!is_out_of_bound_);
|
|
660
|
+
assert(!Valid());
|
|
661
|
+
} else {
|
|
662
|
+
// If max_prefetch_size is set, treat this as error.
|
|
663
|
+
multi_scan_status_ = Status::PrefetchLimitReached();
|
|
664
|
+
}
|
|
665
|
+
return true;
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
if (!multi_scan_->async_states.empty()) {
|
|
669
|
+
multi_scan_status_ = PollForBlock(idx);
|
|
670
|
+
if (!multi_scan_status_.ok()) {
|
|
671
|
+
return true;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
// This block should have been initialized
|
|
675
|
+
assert(multi_scan_->pinned_data_blocks[idx].GetValue());
|
|
676
|
+
// Note that the block_iter_ takes ownership of the pinned data block
|
|
677
|
+
// TODO: we can delegate the clean up like with pinned_iters_mgr_ if
|
|
678
|
+
// need to pin blocks longer.
|
|
679
|
+
table_->NewDataBlockIterator<DataBlockIter>(
|
|
680
|
+
read_options_, multi_scan_->pinned_data_blocks[idx], &block_iter_,
|
|
681
|
+
Status::OK());
|
|
682
|
+
return false;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
// After PollForBlock(idx), the async request that contains
|
|
686
|
+
// pinned_data_blocks[idx] should be done, and all blocks contained in this
|
|
687
|
+
// read request will be initialzed in pinned_data_blocks and pinned in block
|
|
688
|
+
// cache.
|
|
689
|
+
Status PollForBlock(size_t idx);
|
|
690
|
+
|
|
691
|
+
// Helper function to create and pin a block in cache from buffer data
|
|
692
|
+
// Handles decompressor setup with dictionary loading and block
|
|
693
|
+
// creation/pinning. The buffer_start_offset is the file offset where
|
|
694
|
+
// buffer_data starts.
|
|
695
|
+
Status CreateAndPinBlockFromBuffer(const BlockHandle& block,
|
|
696
|
+
uint64_t buffer_start_offset,
|
|
697
|
+
const Slice& buffer_data,
|
|
698
|
+
CachableEntry<Block>& pinned_block_entry);
|
|
699
|
+
|
|
700
|
+
Status CollectBlockHandles(
|
|
701
|
+
const std::vector<ScanOptions>& scan_opts,
|
|
702
|
+
std::vector<BlockHandle>* scan_block_handles,
|
|
703
|
+
std::vector<std::tuple<size_t, size_t>>* block_index_ranges_per_scan,
|
|
704
|
+
std::vector<std::string>* data_block_boundary_keys);
|
|
705
|
+
|
|
706
|
+
Status FilterAndPinCachedBlocks(
|
|
707
|
+
const std::vector<BlockHandle>& scan_block_handles,
|
|
708
|
+
const MultiScanArgs* multiscan_opts,
|
|
709
|
+
std::vector<size_t>* block_indices_to_read,
|
|
710
|
+
std::vector<CachableEntry<Block>>* pinned_data_blocks_guard,
|
|
711
|
+
size_t* prefetched_max_idx);
|
|
712
|
+
|
|
713
|
+
void PrepareIORequests(
|
|
714
|
+
const std::vector<size_t>& block_indices_to_read,
|
|
715
|
+
const std::vector<BlockHandle>& scan_block_handles,
|
|
716
|
+
const MultiScanArgs* multiscan_opts,
|
|
717
|
+
std::vector<FSReadRequest>* read_reqs,
|
|
718
|
+
UnorderedMap<size_t, size_t>* block_idx_to_readreq_idx,
|
|
719
|
+
std::vector<std::vector<size_t>>* coalesced_block_indices);
|
|
720
|
+
|
|
721
|
+
Status ExecuteIO(
|
|
722
|
+
const std::vector<BlockHandle>& scan_block_handles,
|
|
723
|
+
const MultiScanArgs* multiscan_opts,
|
|
724
|
+
const std::vector<std::vector<size_t>>& coalesced_block_indices,
|
|
725
|
+
std::vector<FSReadRequest>* read_reqs,
|
|
726
|
+
std::vector<AsyncReadState>* async_states,
|
|
727
|
+
std::vector<CachableEntry<Block>>* pinned_data_blocks_guard);
|
|
728
|
+
|
|
520
729
|
// *** END APIs relevant to multiscan ***
|
|
521
730
|
};
|
|
522
731
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -109,8 +109,8 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
|
|
|
109
109
|
CachableEntry<T>* out_parsed_block) const; \
|
|
110
110
|
template Status BlockBasedTable::CreateAndPinBlockInCache<T>( \
|
|
111
111
|
const ReadOptions& ro, const BlockHandle& handle, \
|
|
112
|
-
BlockContents* block_contents,
|
|
113
|
-
const;
|
|
112
|
+
UnownedPtr<Decompressor> decomp, BlockContents* block_contents, \
|
|
113
|
+
CachableEntry<T>* out_parsed_block) const;
|
|
114
114
|
|
|
115
115
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock);
|
|
116
116
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(DecompressorDict);
|
|
@@ -1333,25 +1333,59 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
1333
1333
|
s = FindMetaBlock(meta_iter, kUserDefinedIndexPrefix + udi_name,
|
|
1334
1334
|
&udi_block_handle);
|
|
1335
1335
|
if (!s.ok()) {
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1336
|
+
RecordTick(rep_->ioptions.statistics.get(),
|
|
1337
|
+
SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT);
|
|
1338
|
+
if (table_options.fail_if_no_udi_on_open) {
|
|
1339
|
+
ROCKS_LOG_ERROR(rep_->ioptions.logger,
|
|
1340
|
+
"Failed to find the the UDI block %s in file %s; %s",
|
|
1341
|
+
udi_name.c_str(), rep_->file->file_name().c_str(),
|
|
1342
|
+
s.ToString().c_str());
|
|
1343
|
+
// MAke the status more informative
|
|
1344
|
+
s = Status::Corruption(s.ToString(), rep_->file->file_name());
|
|
1345
|
+
return s;
|
|
1346
|
+
} else {
|
|
1347
|
+
// Emit a warning, but ignore the error status
|
|
1348
|
+
ROCKS_LOG_WARN(rep_->ioptions.logger,
|
|
1349
|
+
"Failed to find the the UDI block %s in file %s; %s",
|
|
1350
|
+
udi_name.c_str(), rep_->file->file_name().c_str(),
|
|
1351
|
+
s.ToString().c_str());
|
|
1352
|
+
s = Status::OK();
|
|
1353
|
+
}
|
|
1347
1354
|
}
|
|
1348
|
-
assert(!rep_->udi_block.IsEmpty());
|
|
1349
1355
|
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1356
|
+
// If the UDI block size is 0, that means there's effectively no user
|
|
1357
|
+
// defined index. In that case, skip setting up the reader.
|
|
1358
|
+
if (udi_block_handle.size() > 0) {
|
|
1359
|
+
// Read the block, and allocate on heap or pin in cache. The UDI block is
|
|
1360
|
+
// not compressed. RetrieveBlock will verify the checksum.
|
|
1361
|
+
if (s.ok()) {
|
|
1362
|
+
s = RetrieveBlock(prefetch_buffer, ro, udi_block_handle,
|
|
1363
|
+
rep_->decompressor.get(), &rep_->udi_block,
|
|
1364
|
+
/*get_context=*/nullptr, lookup_context,
|
|
1365
|
+
/*for_compaction=*/false, use_cache,
|
|
1366
|
+
/*async_read=*/false,
|
|
1367
|
+
/*use_block_cache_for_lookup=*/false);
|
|
1368
|
+
}
|
|
1369
|
+
if (s.ok()) {
|
|
1370
|
+
assert(!rep_->udi_block.IsEmpty());
|
|
1371
|
+
|
|
1372
|
+
std::unique_ptr<UserDefinedIndexReader> udi_reader;
|
|
1373
|
+
UserDefinedIndexOption udi_option;
|
|
1374
|
+
udi_option.comparator = rep_->internal_comparator.user_comparator();
|
|
1375
|
+
s = table_options.user_defined_index_factory->NewReader(
|
|
1376
|
+
udi_option, rep_->udi_block.GetValue()->data, udi_reader);
|
|
1377
|
+
if (s.ok()) {
|
|
1378
|
+
if (udi_reader) {
|
|
1379
|
+
index_reader = std::make_unique<UserDefinedIndexReaderWrapper>(
|
|
1380
|
+
udi_name, std::move(index_reader), std::move(udi_reader));
|
|
1381
|
+
} else {
|
|
1382
|
+
s = Status::Corruption("Failed to create UDI reader for " +
|
|
1383
|
+
udi_name + " in file " +
|
|
1384
|
+
rep_->file->file_name());
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1387
|
+
}
|
|
1388
|
+
}
|
|
1355
1389
|
}
|
|
1356
1390
|
|
|
1357
1391
|
rep_->index_reader = std::move(index_reader);
|
|
@@ -1359,7 +1393,7 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
1359
1393
|
// The partitions of partitioned index are always stored in cache. They
|
|
1360
1394
|
// are hence follow the configuration for pin and prefetch regardless of
|
|
1361
1395
|
// the value of cache_index_and_filter_blocks
|
|
1362
|
-
if (prefetch_all || pin_partition) {
|
|
1396
|
+
if (s.ok() && (prefetch_all || pin_partition)) {
|
|
1363
1397
|
s = rep_->index_reader->CacheDependencies(ro, pin_partition,
|
|
1364
1398
|
prefetch_buffer);
|
|
1365
1399
|
}
|
|
@@ -1741,13 +1775,55 @@ Status BlockBasedTable::LookupAndPinBlocksInCache(
|
|
|
1741
1775
|
|
|
1742
1776
|
template <typename TBlocklike>
|
|
1743
1777
|
Status BlockBasedTable::CreateAndPinBlockInCache(
|
|
1744
|
-
const ReadOptions& ro, const BlockHandle& handle,
|
|
1778
|
+
const ReadOptions& ro, const BlockHandle& handle,
|
|
1779
|
+
UnownedPtr<Decompressor> decomp, BlockContents* contents,
|
|
1745
1780
|
CachableEntry<TBlocklike>* out_parsed_block) const {
|
|
1746
|
-
|
|
1747
|
-
|
|
1748
|
-
|
|
1749
|
-
|
|
1750
|
-
|
|
1781
|
+
CompressionType compression_type = GetBlockCompressionType(*contents);
|
|
1782
|
+
// If we don't own the contents and we don't need to decompress, copy
|
|
1783
|
+
// the block to heap in order to have ownership. If decompression is
|
|
1784
|
+
// needed, then the decompressor will allocate a buffer.
|
|
1785
|
+
if (!contents->own_bytes() && compression_type == kNoCompression) {
|
|
1786
|
+
Slice src = Slice(contents->data.data(), BlockSizeWithTrailer(handle));
|
|
1787
|
+
*contents = BlockContents(
|
|
1788
|
+
CopyBufferToHeap(GetMemoryAllocator(rep_->table_options), src),
|
|
1789
|
+
handle.size());
|
|
1790
|
+
#ifndef NDEBUG
|
|
1791
|
+
contents->has_trailer = true;
|
|
1792
|
+
#endif
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
Status s;
|
|
1796
|
+
if (ro.fill_cache) {
|
|
1797
|
+
s = MaybeReadBlockAndLoadToCache(nullptr, ro, handle, decomp,
|
|
1798
|
+
/*for_compaction=*/false, out_parsed_block,
|
|
1799
|
+
nullptr, nullptr, contents,
|
|
1800
|
+
/*async_read=*/false,
|
|
1801
|
+
/*use_block_cache_for_lookup=*/true);
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
if (!s.ok()) {
|
|
1805
|
+
return s;
|
|
1806
|
+
}
|
|
1807
|
+
|
|
1808
|
+
// fill_cache could be false, or no block cache is configured. In that
|
|
1809
|
+
// case, decompress if necessary and take ownership of the block
|
|
1810
|
+
if (out_parsed_block->GetValue() == nullptr && contents != nullptr) {
|
|
1811
|
+
BlockContents tmp_contents;
|
|
1812
|
+
if (compression_type != kNoCompression) {
|
|
1813
|
+
s = DecompressSerializedBlock(contents->data.data(), handle.size(),
|
|
1814
|
+
compression_type, *decomp, &tmp_contents,
|
|
1815
|
+
rep_->ioptions,
|
|
1816
|
+
GetMemoryAllocator(rep_->table_options));
|
|
1817
|
+
} else {
|
|
1818
|
+
tmp_contents = std::move(*contents);
|
|
1819
|
+
}
|
|
1820
|
+
if (s.ok()) {
|
|
1821
|
+
std::unique_ptr<TBlocklike> block_holder;
|
|
1822
|
+
rep_->create_context.Create(&block_holder, std::move(tmp_contents));
|
|
1823
|
+
out_parsed_block->SetOwnedValue(std::move(block_holder));
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
return s;
|
|
1751
1827
|
}
|
|
1752
1828
|
|
|
1753
1829
|
// If contents is nullptr, this function looks up the block caches for the
|
|
@@ -311,7 +311,7 @@ class BlockBasedTable : public TableReader {
|
|
|
311
311
|
template <typename TBlocklike>
|
|
312
312
|
Status CreateAndPinBlockInCache(
|
|
313
313
|
const ReadOptions& ro, const BlockHandle& handle,
|
|
314
|
-
BlockContents* block_contents,
|
|
314
|
+
UnownedPtr<Decompressor> decomp, BlockContents* block_contents,
|
|
315
315
|
CachableEntry<TBlocklike>* out_parsed_block) const;
|
|
316
316
|
|
|
317
317
|
struct Rep;
|
|
@@ -37,8 +37,6 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
|
|
|
37
37
|
RandomAccessFileReader* file = rep_->file.get();
|
|
38
38
|
const Footer& footer = rep_->footer;
|
|
39
39
|
const ImmutableOptions& ioptions = rep_->ioptions;
|
|
40
|
-
size_t read_amp_bytes_per_bit = rep_->table_options.read_amp_bytes_per_bit;
|
|
41
|
-
MemoryAllocator* memory_allocator = GetMemoryAllocator(rep_->table_options);
|
|
42
40
|
|
|
43
41
|
if (ioptions.allow_mmap_reads) {
|
|
44
42
|
size_t idx_in_batch = 0;
|
|
@@ -266,79 +264,8 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::RetrieveMultipleBlocks)
|
|
|
266
264
|
}
|
|
267
265
|
|
|
268
266
|
if (s.ok()) {
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
// serialized block has to be inserted into a cache. That falls into the
|
|
272
|
-
// following cases -
|
|
273
|
-
// 1. serialized block is not compressed, it needs to be inserted into
|
|
274
|
-
// the uncompressed block cache if there is one
|
|
275
|
-
// 2. If the serialized block is compressed, it needs to be inserted
|
|
276
|
-
// into the compressed block cache if there is one
|
|
277
|
-
//
|
|
278
|
-
// In all other cases, the serialized block is either uncompressed into a
|
|
279
|
-
// heap buffer or there is no cache at all.
|
|
280
|
-
CompressionType compression_type =
|
|
281
|
-
GetBlockCompressionType(serialized_block);
|
|
282
|
-
if ((use_fs_scratch || use_shared_buffer) &&
|
|
283
|
-
compression_type == kNoCompression) {
|
|
284
|
-
Slice serialized =
|
|
285
|
-
Slice(req.result.data() + req_offset, BlockSizeWithTrailer(handle));
|
|
286
|
-
serialized_block = BlockContents(
|
|
287
|
-
CopyBufferToHeap(GetMemoryAllocator(rep_->table_options),
|
|
288
|
-
serialized),
|
|
289
|
-
handle.size());
|
|
290
|
-
#ifndef NDEBUG
|
|
291
|
-
serialized_block.has_trailer = true;
|
|
292
|
-
#endif
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
if (s.ok()) {
|
|
297
|
-
if (options.fill_cache) {
|
|
298
|
-
CachableEntry<Block_kData>* block_entry = &results[idx_in_batch];
|
|
299
|
-
// MaybeReadBlockAndLoadToCache will insert into the block caches if
|
|
300
|
-
// necessary. Since we're passing the serialized block contents, it
|
|
301
|
-
// will avoid looking up the block cache
|
|
302
|
-
s = MaybeReadBlockAndLoadToCache(
|
|
303
|
-
nullptr, options, handle, decomp,
|
|
304
|
-
/*for_compaction=*/false, block_entry, mget_iter->get_context,
|
|
305
|
-
/*lookup_context=*/nullptr, &serialized_block,
|
|
306
|
-
/*async_read=*/false, /*use_block_cache_for_lookup=*/true);
|
|
307
|
-
|
|
308
|
-
if (!s.ok()) {
|
|
309
|
-
statuses[idx_in_batch] = s;
|
|
310
|
-
continue;
|
|
311
|
-
}
|
|
312
|
-
// block_entry value could be null if no block cache is present, i.e
|
|
313
|
-
// BlockBasedTableOptions::no_block_cache is true and no compressed
|
|
314
|
-
// block cache is configured. In that case, fall
|
|
315
|
-
// through and set up the block explicitly
|
|
316
|
-
if (block_entry->GetValue() != nullptr) {
|
|
317
|
-
continue;
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
CompressionType compression_type =
|
|
322
|
-
GetBlockCompressionType(serialized_block);
|
|
323
|
-
BlockContents contents;
|
|
324
|
-
if (compression_type != kNoCompression) {
|
|
325
|
-
s = DecompressSerializedBlock(
|
|
326
|
-
req.result.data() + req_offset, handle.size(), compression_type,
|
|
327
|
-
*decomp, &contents, rep_->ioptions, memory_allocator);
|
|
328
|
-
} else {
|
|
329
|
-
// There are two cases here:
|
|
330
|
-
// 1) caller uses the shared buffer (scratch or direct io buffer);
|
|
331
|
-
// 2) we use the requst buffer.
|
|
332
|
-
// If scratch buffer or direct io buffer is used, we ensure that
|
|
333
|
-
// all serialized blocks are copyed to the heap as single blocks. If
|
|
334
|
-
// scratch buffer is not used, we also have no combined read, so the
|
|
335
|
-
// serialized block can be used directly.
|
|
336
|
-
contents = std::move(serialized_block);
|
|
337
|
-
}
|
|
338
|
-
if (s.ok()) {
|
|
339
|
-
results[idx_in_batch].SetOwnedValue(std::make_unique<Block_kData>(
|
|
340
|
-
std::move(contents), read_amp_bytes_per_bit, ioptions.stats));
|
|
341
|
-
}
|
|
267
|
+
s = CreateAndPinBlockInCache(options, handle, decomp, &serialized_block,
|
|
268
|
+
&results[idx_in_batch]);
|
|
342
269
|
}
|
|
343
270
|
statuses[idx_in_batch] = s;
|
|
344
271
|
}
|