@nxtedition/rocksdb 6.0.2 → 7.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BUILDING.md +12 -4
- package/binding.cc +589 -128
- package/chained-batch.js +6 -6
- package/deps/rocksdb/rocksdb/CMakeLists.txt +9 -0
- package/deps/rocksdb/rocksdb/Makefile +16 -5
- package/deps/rocksdb/rocksdb/TARGETS +23 -2
- package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +12 -4
- package/deps/rocksdb/rocksdb/db/c.cc +26 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +29 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +16 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +402 -30
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +14 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +33 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +54 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +8 -1
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +14 -15
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +331 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +221 -92
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -2
- package/deps/rocksdb/rocksdb/db/db_test_util.h +4 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +1 -171
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.h +126 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +57 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +2 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +10 -11
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +11 -1
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +12 -1
- package/deps/rocksdb/rocksdb/db/repair_test.cc +32 -10
- package/deps/rocksdb/rocksdb/db/snapshot_impl.h +3 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +19 -127
- package/deps/rocksdb/rocksdb/db/table_cache.h +3 -2
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +140 -0
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +130 -128
- package/deps/rocksdb/rocksdb/db/version_edit.cc +20 -0
- package/deps/rocksdb/rocksdb/db/version_edit.h +13 -4
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +14 -14
- package/deps/rocksdb/rocksdb/db/version_set.cc +205 -212
- package/deps/rocksdb/rocksdb/db/version_set.h +11 -0
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +154 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -9
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +15 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +159 -65
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +43 -21
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +142 -17
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +23 -27
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +23 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +4 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +189 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -1
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
- package/deps/rocksdb/rocksdb/options/options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +6 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +107 -9
- package/deps/rocksdb/rocksdb/src.mk +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +9 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +80 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +81 -757
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +21 -15
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +9 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +754 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +8 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +1 -10
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +59 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +18 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +0 -61
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +0 -13
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +60 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +2 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +39 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +46 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +2 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -1
- package/deps/rocksdb/rocksdb/table/table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/unique_id.cc +27 -0
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +3 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +23 -7
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +9 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +72 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +144 -0
- package/deps/rocksdb/rocksdb/util/compression.h +49 -0
- package/deps/rocksdb/rocksdb/util/coro_utils.h +111 -0
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +55 -0
- package/deps/rocksdb/rocksdb.gyp +16 -15
- package/index.js +186 -3
- package/iterator.js +1 -0
- package/package-lock.json +23687 -0
- package/package.json +2 -30
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/deps/liburing/liburing/README +0 -46
- package/deps/liburing/liburing/test/232c93d07b74-test.c +0 -305
- package/deps/liburing/liburing/test/35fa71a030ca-test.c +0 -329
- package/deps/liburing/liburing/test/500f9fbadef8-test.c +0 -89
- package/deps/liburing/liburing/test/7ad0e4b2f83c-test.c +0 -93
- package/deps/liburing/liburing/test/8a9973408177-test.c +0 -106
- package/deps/liburing/liburing/test/917257daa0fe-test.c +0 -53
- package/deps/liburing/liburing/test/Makefile +0 -312
- package/deps/liburing/liburing/test/a0908ae19763-test.c +0 -58
- package/deps/liburing/liburing/test/a4c0b3decb33-test.c +0 -180
- package/deps/liburing/liburing/test/accept-link.c +0 -251
- package/deps/liburing/liburing/test/accept-reuse.c +0 -164
- package/deps/liburing/liburing/test/accept-test.c +0 -79
- package/deps/liburing/liburing/test/accept.c +0 -476
- package/deps/liburing/liburing/test/across-fork.c +0 -283
- package/deps/liburing/liburing/test/b19062a56726-test.c +0 -53
- package/deps/liburing/liburing/test/b5837bd5311d-test.c +0 -77
- package/deps/liburing/liburing/test/ce593a6c480a-test.c +0 -135
- package/deps/liburing/liburing/test/close-opath.c +0 -122
- package/deps/liburing/liburing/test/config +0 -10
- package/deps/liburing/liburing/test/connect.c +0 -398
- package/deps/liburing/liburing/test/cq-full.c +0 -96
- package/deps/liburing/liburing/test/cq-overflow.c +0 -294
- package/deps/liburing/liburing/test/cq-peek-batch.c +0 -102
- package/deps/liburing/liburing/test/cq-ready.c +0 -94
- package/deps/liburing/liburing/test/cq-size.c +0 -58
- package/deps/liburing/liburing/test/d4ae271dfaae-test.c +0 -96
- package/deps/liburing/liburing/test/d77a67ed5f27-test.c +0 -65
- package/deps/liburing/liburing/test/defer.c +0 -307
- package/deps/liburing/liburing/test/double-poll-crash.c +0 -186
- package/deps/liburing/liburing/test/eeed8b54e0df-test.c +0 -114
- package/deps/liburing/liburing/test/empty-eownerdead.c +0 -42
- package/deps/liburing/liburing/test/eventfd-disable.c +0 -151
- package/deps/liburing/liburing/test/eventfd-ring.c +0 -97
- package/deps/liburing/liburing/test/eventfd.c +0 -112
- package/deps/liburing/liburing/test/fadvise.c +0 -202
- package/deps/liburing/liburing/test/fallocate.c +0 -249
- package/deps/liburing/liburing/test/fc2a85cb02ef-test.c +0 -138
- package/deps/liburing/liburing/test/file-register.c +0 -843
- package/deps/liburing/liburing/test/file-update.c +0 -173
- package/deps/liburing/liburing/test/files-exit-hang-poll.c +0 -128
- package/deps/liburing/liburing/test/files-exit-hang-timeout.c +0 -134
- package/deps/liburing/liburing/test/fixed-link.c +0 -90
- package/deps/liburing/liburing/test/fsync.c +0 -224
- package/deps/liburing/liburing/test/hardlink.c +0 -136
- package/deps/liburing/liburing/test/helpers.c +0 -135
- package/deps/liburing/liburing/test/helpers.h +0 -67
- package/deps/liburing/liburing/test/io-cancel.c +0 -537
- package/deps/liburing/liburing/test/io_uring_enter.c +0 -296
- package/deps/liburing/liburing/test/io_uring_register.c +0 -664
- package/deps/liburing/liburing/test/io_uring_setup.c +0 -192
- package/deps/liburing/liburing/test/iopoll.c +0 -366
- package/deps/liburing/liburing/test/lfs-openat-write.c +0 -117
- package/deps/liburing/liburing/test/lfs-openat.c +0 -273
- package/deps/liburing/liburing/test/link-timeout.c +0 -1107
- package/deps/liburing/liburing/test/link.c +0 -496
- package/deps/liburing/liburing/test/link_drain.c +0 -229
- package/deps/liburing/liburing/test/madvise.c +0 -195
- package/deps/liburing/liburing/test/mkdir.c +0 -108
- package/deps/liburing/liburing/test/multicqes_drain.c +0 -383
- package/deps/liburing/liburing/test/nop-all-sizes.c +0 -107
- package/deps/liburing/liburing/test/nop.c +0 -115
- package/deps/liburing/liburing/test/open-close.c +0 -146
- package/deps/liburing/liburing/test/openat2.c +0 -240
- package/deps/liburing/liburing/test/personality.c +0 -204
- package/deps/liburing/liburing/test/pipe-eof.c +0 -81
- package/deps/liburing/liburing/test/pipe-reuse.c +0 -105
- package/deps/liburing/liburing/test/poll-cancel-ton.c +0 -139
- package/deps/liburing/liburing/test/poll-cancel.c +0 -135
- package/deps/liburing/liburing/test/poll-link.c +0 -227
- package/deps/liburing/liburing/test/poll-many.c +0 -208
- package/deps/liburing/liburing/test/poll-mshot-update.c +0 -273
- package/deps/liburing/liburing/test/poll-ring.c +0 -48
- package/deps/liburing/liburing/test/poll-v-poll.c +0 -353
- package/deps/liburing/liburing/test/poll.c +0 -109
- package/deps/liburing/liburing/test/probe.c +0 -137
- package/deps/liburing/liburing/test/read-write.c +0 -876
- package/deps/liburing/liburing/test/register-restrictions.c +0 -633
- package/deps/liburing/liburing/test/rename.c +0 -134
- package/deps/liburing/liburing/test/ring-leak.c +0 -173
- package/deps/liburing/liburing/test/ring-leak2.c +0 -249
- package/deps/liburing/liburing/test/rsrc_tags.c +0 -449
- package/deps/liburing/liburing/test/runtests-loop.sh +0 -16
- package/deps/liburing/liburing/test/runtests.sh +0 -170
- package/deps/liburing/liburing/test/rw_merge_test.c +0 -97
- package/deps/liburing/liburing/test/self.c +0 -91
- package/deps/liburing/liburing/test/send_recv.c +0 -291
- package/deps/liburing/liburing/test/send_recvmsg.c +0 -345
- package/deps/liburing/liburing/test/sendmsg_fs_cve.c +0 -198
- package/deps/liburing/liburing/test/shared-wq.c +0 -84
- package/deps/liburing/liburing/test/short-read.c +0 -75
- package/deps/liburing/liburing/test/shutdown.c +0 -163
- package/deps/liburing/liburing/test/sigfd-deadlock.c +0 -74
- package/deps/liburing/liburing/test/socket-rw-eagain.c +0 -156
- package/deps/liburing/liburing/test/socket-rw.c +0 -147
- package/deps/liburing/liburing/test/splice.c +0 -511
- package/deps/liburing/liburing/test/sq-full-cpp.cc +0 -45
- package/deps/liburing/liburing/test/sq-full.c +0 -45
- package/deps/liburing/liburing/test/sq-poll-dup.c +0 -200
- package/deps/liburing/liburing/test/sq-poll-kthread.c +0 -168
- package/deps/liburing/liburing/test/sq-poll-share.c +0 -137
- package/deps/liburing/liburing/test/sq-space_left.c +0 -159
- package/deps/liburing/liburing/test/sqpoll-cancel-hang.c +0 -159
- package/deps/liburing/liburing/test/sqpoll-disable-exit.c +0 -195
- package/deps/liburing/liburing/test/sqpoll-exit-hang.c +0 -77
- package/deps/liburing/liburing/test/sqpoll-sleep.c +0 -68
- package/deps/liburing/liburing/test/statx.c +0 -172
- package/deps/liburing/liburing/test/stdout.c +0 -232
- package/deps/liburing/liburing/test/submit-link-fail.c +0 -154
- package/deps/liburing/liburing/test/submit-reuse.c +0 -239
- package/deps/liburing/liburing/test/symlink.c +0 -116
- package/deps/liburing/liburing/test/teardowns.c +0 -58
- package/deps/liburing/liburing/test/thread-exit.c +0 -131
- package/deps/liburing/liburing/test/timeout-new.c +0 -246
- package/deps/liburing/liburing/test/timeout-overflow.c +0 -204
- package/deps/liburing/liburing/test/timeout.c +0 -1354
- package/deps/liburing/liburing/test/unlink.c +0 -111
- package/deps/liburing/liburing/test/wakeup-hang.c +0 -162
- package/deps/rocksdb/rocksdb/README.md +0 -32
- package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
- package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -259,7 +259,8 @@ TEST_P(BlockBasedTableReaderTest, MultiGet) {
|
|
|
259
259
|
for (auto& key_ctx : key_context) {
|
|
260
260
|
sorted_keys.emplace_back(&key_ctx);
|
|
261
261
|
}
|
|
262
|
-
MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, ReadOptions()
|
|
262
|
+
MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, ReadOptions(),
|
|
263
|
+
fs_.get(), nullptr);
|
|
263
264
|
|
|
264
265
|
// Execute MultiGet.
|
|
265
266
|
MultiGetContext::Range range = ctx.GetMultiGetRange();
|
|
@@ -39,6 +39,14 @@ void BlockPrefetcher::PrefetchIfNeeded(
|
|
|
39
39
|
return;
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
// In case of async_io, it always creates the PrefetchBuffer.
|
|
43
|
+
if (async_io) {
|
|
44
|
+
rep->CreateFilePrefetchBufferIfNotExists(
|
|
45
|
+
initial_auto_readahead_size_, max_auto_readahead_size,
|
|
46
|
+
&prefetch_buffer_, /*implicit_auto_readahead=*/true, async_io);
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
42
50
|
size_t len = BlockBasedTable::BlockSizeWithTrailer(handle);
|
|
43
51
|
size_t offset = handle.offset();
|
|
44
52
|
|
|
@@ -188,16 +188,7 @@ class FilterBlockReader {
|
|
|
188
188
|
const Slice* const const_ikey_ptr,
|
|
189
189
|
bool* filter_checked, bool need_upper_bound_check,
|
|
190
190
|
bool no_io,
|
|
191
|
-
BlockCacheLookupContext* lookup_context)
|
|
192
|
-
if (need_upper_bound_check) {
|
|
193
|
-
return true;
|
|
194
|
-
}
|
|
195
|
-
*filter_checked = true;
|
|
196
|
-
Slice prefix = prefix_extractor->Transform(user_key_without_ts);
|
|
197
|
-
return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io,
|
|
198
|
-
const_ikey_ptr, /* get_context */ nullptr,
|
|
199
|
-
lookup_context);
|
|
200
|
-
}
|
|
191
|
+
BlockCacheLookupContext* lookup_context) = 0;
|
|
201
192
|
};
|
|
202
193
|
|
|
203
194
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -31,7 +31,7 @@ Status FilterBlockReaderCommon<TBlocklike>::ReadFilterBlock(
|
|
|
31
31
|
UncompressionDict::GetEmptyDict(), filter_block,
|
|
32
32
|
BlockType::kFilter, get_context, lookup_context,
|
|
33
33
|
/* for_compaction */ false, use_cache,
|
|
34
|
-
/* wait_for_cache */ true);
|
|
34
|
+
/* wait_for_cache */ true, /* async_read */ false);
|
|
35
35
|
|
|
36
36
|
return s;
|
|
37
37
|
}
|
|
@@ -94,6 +94,64 @@ size_t FilterBlockReaderCommon<TBlocklike>::ApproximateFilterBlockMemoryUsage()
|
|
|
94
94
|
: 0;
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
+
template <typename TBlocklike>
|
|
98
|
+
bool FilterBlockReaderCommon<TBlocklike>::RangeMayExist(
|
|
99
|
+
const Slice* iterate_upper_bound, const Slice& user_key_without_ts,
|
|
100
|
+
const SliceTransform* prefix_extractor, const Comparator* comparator,
|
|
101
|
+
const Slice* const const_ikey_ptr, bool* filter_checked,
|
|
102
|
+
bool need_upper_bound_check, bool no_io,
|
|
103
|
+
BlockCacheLookupContext* lookup_context) {
|
|
104
|
+
if (!prefix_extractor || !prefix_extractor->InDomain(user_key_without_ts)) {
|
|
105
|
+
*filter_checked = false;
|
|
106
|
+
return true;
|
|
107
|
+
}
|
|
108
|
+
Slice prefix = prefix_extractor->Transform(user_key_without_ts);
|
|
109
|
+
if (need_upper_bound_check &&
|
|
110
|
+
!IsFilterCompatible(iterate_upper_bound, prefix, comparator)) {
|
|
111
|
+
*filter_checked = false;
|
|
112
|
+
return true;
|
|
113
|
+
} else {
|
|
114
|
+
*filter_checked = true;
|
|
115
|
+
return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io,
|
|
116
|
+
const_ikey_ptr, /* get_context */ nullptr,
|
|
117
|
+
lookup_context);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
template <typename TBlocklike>
|
|
122
|
+
bool FilterBlockReaderCommon<TBlocklike>::IsFilterCompatible(
|
|
123
|
+
const Slice* iterate_upper_bound, const Slice& prefix,
|
|
124
|
+
const Comparator* comparator) const {
|
|
125
|
+
// Try to reuse the bloom filter in the SST table if prefix_extractor in
|
|
126
|
+
// mutable_cf_options has changed. If range [user_key, upper_bound) all
|
|
127
|
+
// share the same prefix then we may still be able to use the bloom filter.
|
|
128
|
+
const SliceTransform* const prefix_extractor = table_prefix_extractor();
|
|
129
|
+
if (iterate_upper_bound != nullptr && prefix_extractor) {
|
|
130
|
+
if (!prefix_extractor->InDomain(*iterate_upper_bound)) {
|
|
131
|
+
return false;
|
|
132
|
+
}
|
|
133
|
+
Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound);
|
|
134
|
+
// first check if user_key and upper_bound all share the same prefix
|
|
135
|
+
if (comparator->CompareWithoutTimestamp(prefix, false, upper_bound_xform,
|
|
136
|
+
false) != 0) {
|
|
137
|
+
// second check if user_key's prefix is the immediate predecessor of
|
|
138
|
+
// upper_bound and have the same length. If so, we know for sure all
|
|
139
|
+
// keys in the range [user_key, upper_bound) share the same prefix.
|
|
140
|
+
// Also need to make sure upper_bound are full length to ensure
|
|
141
|
+
// correctness
|
|
142
|
+
if (!full_length_enabled_ ||
|
|
143
|
+
iterate_upper_bound->size() != prefix_extractor_full_length_ ||
|
|
144
|
+
!comparator->IsSameLengthImmediateSuccessor(prefix,
|
|
145
|
+
*iterate_upper_bound)) {
|
|
146
|
+
return false;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
return true;
|
|
150
|
+
} else {
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
97
155
|
// Explicitly instantiate templates for both "blocklike" types we use.
|
|
98
156
|
// This makes it possible to keep the template definitions in the .cc file.
|
|
99
157
|
template class FilterBlockReaderCommon<BlockContents>;
|
|
@@ -26,8 +26,20 @@ class FilterBlockReaderCommon : public FilterBlockReader {
|
|
|
26
26
|
CachableEntry<TBlocklike>&& filter_block)
|
|
27
27
|
: table_(t), filter_block_(std::move(filter_block)) {
|
|
28
28
|
assert(table_);
|
|
29
|
+
const SliceTransform* const prefix_extractor = table_prefix_extractor();
|
|
30
|
+
if (prefix_extractor) {
|
|
31
|
+
full_length_enabled_ =
|
|
32
|
+
prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_);
|
|
33
|
+
}
|
|
29
34
|
}
|
|
30
35
|
|
|
36
|
+
bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
|
|
37
|
+
const SliceTransform* prefix_extractor,
|
|
38
|
+
const Comparator* comparator,
|
|
39
|
+
const Slice* const const_ikey_ptr, bool* filter_checked,
|
|
40
|
+
bool need_upper_bound_check, bool no_io,
|
|
41
|
+
BlockCacheLookupContext* lookup_context) override;
|
|
42
|
+
|
|
31
43
|
protected:
|
|
32
44
|
static Status ReadFilterBlock(const BlockBasedTable* table,
|
|
33
45
|
FilePrefetchBuffer* prefetch_buffer,
|
|
@@ -47,9 +59,15 @@ class FilterBlockReaderCommon : public FilterBlockReader {
|
|
|
47
59
|
|
|
48
60
|
size_t ApproximateFilterBlockMemoryUsage() const;
|
|
49
61
|
|
|
62
|
+
private:
|
|
63
|
+
bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix,
|
|
64
|
+
const Comparator* comparator) const;
|
|
65
|
+
|
|
50
66
|
private:
|
|
51
67
|
const BlockBasedTable* table_;
|
|
52
68
|
CachableEntry<TBlocklike> filter_block_;
|
|
69
|
+
size_t prefix_extractor_full_length_ = 0;
|
|
70
|
+
bool full_length_enabled_ = false;
|
|
53
71
|
};
|
|
54
72
|
|
|
55
73
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -120,11 +120,6 @@ FullFilterBlockReader::FullFilterBlockReader(
|
|
|
120
120
|
const BlockBasedTable* t,
|
|
121
121
|
CachableEntry<ParsedFullFilterBlock>&& filter_block)
|
|
122
122
|
: FilterBlockReaderCommon(t, std::move(filter_block)) {
|
|
123
|
-
const SliceTransform* const prefix_extractor = table_prefix_extractor();
|
|
124
|
-
if (prefix_extractor) {
|
|
125
|
-
full_length_enabled_ =
|
|
126
|
-
prefix_extractor->FullLengthEnabled(&prefix_extractor_full_length_);
|
|
127
|
-
}
|
|
128
123
|
}
|
|
129
124
|
|
|
130
125
|
bool FullFilterBlockReader::KeyMayMatch(
|
|
@@ -306,60 +301,4 @@ size_t FullFilterBlockReader::ApproximateMemoryUsage() const {
|
|
|
306
301
|
return usage;
|
|
307
302
|
}
|
|
308
303
|
|
|
309
|
-
bool FullFilterBlockReader::RangeMayExist(
|
|
310
|
-
const Slice* iterate_upper_bound, const Slice& user_key_without_ts,
|
|
311
|
-
const SliceTransform* prefix_extractor, const Comparator* comparator,
|
|
312
|
-
const Slice* const const_ikey_ptr, bool* filter_checked,
|
|
313
|
-
bool need_upper_bound_check, bool no_io,
|
|
314
|
-
BlockCacheLookupContext* lookup_context) {
|
|
315
|
-
if (!prefix_extractor || !prefix_extractor->InDomain(user_key_without_ts)) {
|
|
316
|
-
*filter_checked = false;
|
|
317
|
-
return true;
|
|
318
|
-
}
|
|
319
|
-
Slice prefix = prefix_extractor->Transform(user_key_without_ts);
|
|
320
|
-
if (need_upper_bound_check &&
|
|
321
|
-
!IsFilterCompatible(iterate_upper_bound, prefix, comparator)) {
|
|
322
|
-
*filter_checked = false;
|
|
323
|
-
return true;
|
|
324
|
-
} else {
|
|
325
|
-
*filter_checked = true;
|
|
326
|
-
return PrefixMayMatch(prefix, prefix_extractor, kNotValid, no_io,
|
|
327
|
-
const_ikey_ptr, /* get_context */ nullptr,
|
|
328
|
-
lookup_context);
|
|
329
|
-
}
|
|
330
|
-
}
|
|
331
|
-
|
|
332
|
-
bool FullFilterBlockReader::IsFilterCompatible(
|
|
333
|
-
const Slice* iterate_upper_bound, const Slice& prefix,
|
|
334
|
-
const Comparator* comparator) const {
|
|
335
|
-
// Try to reuse the bloom filter in the SST table if prefix_extractor in
|
|
336
|
-
// mutable_cf_options has changed. If range [user_key, upper_bound) all
|
|
337
|
-
// share the same prefix then we may still be able to use the bloom filter.
|
|
338
|
-
const SliceTransform* const prefix_extractor = table_prefix_extractor();
|
|
339
|
-
if (iterate_upper_bound != nullptr && prefix_extractor) {
|
|
340
|
-
if (!prefix_extractor->InDomain(*iterate_upper_bound)) {
|
|
341
|
-
return false;
|
|
342
|
-
}
|
|
343
|
-
Slice upper_bound_xform = prefix_extractor->Transform(*iterate_upper_bound);
|
|
344
|
-
// first check if user_key and upper_bound all share the same prefix
|
|
345
|
-
if (comparator->CompareWithoutTimestamp(prefix, false, upper_bound_xform,
|
|
346
|
-
false) != 0) {
|
|
347
|
-
// second check if user_key's prefix is the immediate predecessor of
|
|
348
|
-
// upper_bound and have the same length. If so, we know for sure all
|
|
349
|
-
// keys in the range [user_key, upper_bound) share the same prefix.
|
|
350
|
-
// Also need to make sure upper_bound are full length to ensure
|
|
351
|
-
// correctness
|
|
352
|
-
if (!full_length_enabled_ ||
|
|
353
|
-
iterate_upper_bound->size() != prefix_extractor_full_length_ ||
|
|
354
|
-
!comparator->IsSameLengthImmediateSuccessor(prefix,
|
|
355
|
-
*iterate_upper_bound)) {
|
|
356
|
-
return false;
|
|
357
|
-
}
|
|
358
|
-
}
|
|
359
|
-
return true;
|
|
360
|
-
} else {
|
|
361
|
-
return false;
|
|
362
|
-
}
|
|
363
|
-
}
|
|
364
|
-
|
|
365
304
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -131,25 +131,12 @@ class FullFilterBlockReader
|
|
|
131
131
|
uint64_t block_offset, const bool no_io,
|
|
132
132
|
BlockCacheLookupContext* lookup_context) override;
|
|
133
133
|
size_t ApproximateMemoryUsage() const override;
|
|
134
|
-
bool RangeMayExist(const Slice* iterate_upper_bound, const Slice& user_key,
|
|
135
|
-
const SliceTransform* prefix_extractor,
|
|
136
|
-
const Comparator* comparator,
|
|
137
|
-
const Slice* const const_ikey_ptr, bool* filter_checked,
|
|
138
|
-
bool need_upper_bound_check, bool no_io,
|
|
139
|
-
BlockCacheLookupContext* lookup_context) override;
|
|
140
|
-
|
|
141
134
|
private:
|
|
142
135
|
bool MayMatch(const Slice& entry, bool no_io, GetContext* get_context,
|
|
143
136
|
BlockCacheLookupContext* lookup_context) const;
|
|
144
137
|
void MayMatch(MultiGetRange* range, bool no_io,
|
|
145
138
|
const SliceTransform* prefix_extractor,
|
|
146
139
|
BlockCacheLookupContext* lookup_context) const;
|
|
147
|
-
bool IsFilterCompatible(const Slice* iterate_upper_bound, const Slice& prefix,
|
|
148
|
-
const Comparator* comparator) const;
|
|
149
|
-
|
|
150
|
-
private:
|
|
151
|
-
bool full_length_enabled_;
|
|
152
|
-
size_t prefix_extractor_full_length_;
|
|
153
140
|
};
|
|
154
141
|
|
|
155
142
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -27,7 +27,7 @@ Status BlockBasedTable::IndexReaderCommon::ReadIndexBlock(
|
|
|
27
27
|
prefetch_buffer, read_options, rep->footer.index_handle(),
|
|
28
28
|
UncompressionDict::GetEmptyDict(), index_block, BlockType::kIndex,
|
|
29
29
|
get_context, lookup_context, /* for_compaction */ false, use_cache,
|
|
30
|
-
/* wait_for_cache */ true);
|
|
30
|
+
/* wait_for_cache */ true, /* async_read */ false);
|
|
31
31
|
|
|
32
32
|
return s;
|
|
33
33
|
}
|
|
@@ -323,7 +323,7 @@ Status PartitionedFilterBlockReader::GetFilterPartitionBlock(
|
|
|
323
323
|
UncompressionDict::GetEmptyDict(), filter_block,
|
|
324
324
|
BlockType::kFilter, get_context, lookup_context,
|
|
325
325
|
/* for_compaction */ false, /* use_cache */ true,
|
|
326
|
-
/* wait_for_cache */ true);
|
|
326
|
+
/* wait_for_cache */ true, /* async_read */ false);
|
|
327
327
|
|
|
328
328
|
return s;
|
|
329
329
|
}
|
|
@@ -521,7 +521,8 @@ Status PartitionedFilterBlockReader::CacheDependencies(const ReadOptions& ro,
|
|
|
521
521
|
s = table()->MaybeReadBlockAndLoadToCache(
|
|
522
522
|
prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(),
|
|
523
523
|
/* wait */ true, /* for_compaction */ false, &block, BlockType::kFilter,
|
|
524
|
-
nullptr /* get_context */, &lookup_context, nullptr /* contents
|
|
524
|
+
nullptr /* get_context */, &lookup_context, nullptr /* contents */,
|
|
525
|
+
false);
|
|
525
526
|
if (!s.ok()) {
|
|
526
527
|
return s;
|
|
527
528
|
}
|
|
@@ -97,9 +97,9 @@ void PartitionedIndexIterator::InitPartitionedIndexBlock() {
|
|
|
97
97
|
table_->NewDataBlockIterator<IndexBlockIter>(
|
|
98
98
|
read_options_, partitioned_index_handle, &block_iter_,
|
|
99
99
|
BlockType::kIndex,
|
|
100
|
-
/*get_context=*/nullptr, &lookup_context_,
|
|
100
|
+
/*get_context=*/nullptr, &lookup_context_,
|
|
101
101
|
block_prefetcher_.prefetch_buffer(),
|
|
102
|
-
/*for_compaction=*/is_for_compaction);
|
|
102
|
+
/*for_compaction=*/is_for_compaction, /*async_read=*/false, s);
|
|
103
103
|
block_iter_points_to_real_block_ = true;
|
|
104
104
|
// We could check upper bound here but it is complicated to reason about
|
|
105
105
|
// upper bound in index iterator. On the other than, in large scans, index
|
|
@@ -187,7 +187,8 @@ Status PartitionIndexReader::CacheDependencies(const ReadOptions& ro,
|
|
|
187
187
|
Status s = table()->MaybeReadBlockAndLoadToCache(
|
|
188
188
|
prefetch_buffer.get(), ro, handle, UncompressionDict::GetEmptyDict(),
|
|
189
189
|
/*wait=*/true, /*for_compaction=*/false, &block, BlockType::kIndex,
|
|
190
|
-
/*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr
|
|
190
|
+
/*get_context=*/nullptr, &lookup_context, /*contents=*/nullptr,
|
|
191
|
+
/*async_read=*/false);
|
|
191
192
|
|
|
192
193
|
if (!s.ok()) {
|
|
193
194
|
return s;
|
|
@@ -62,7 +62,8 @@ Status UncompressionDictReader::ReadUncompressionDictionary(
|
|
|
62
62
|
prefetch_buffer, read_options, rep->compression_dict_handle,
|
|
63
63
|
UncompressionDict::GetEmptyDict(), uncompression_dict,
|
|
64
64
|
BlockType::kCompressionDictionary, get_context, lookup_context,
|
|
65
|
-
/* for_compaction */ false, use_cache, /* wait_for_cache */ true
|
|
65
|
+
/* for_compaction */ false, use_cache, /* wait_for_cache */ true,
|
|
66
|
+
/* async_read */ false);
|
|
66
67
|
|
|
67
68
|
if (!s.ok()) {
|
|
68
69
|
ROCKS_LOG_WARN(
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
#include "table/block_fetcher.h"
|
|
11
11
|
|
|
12
|
+
#include <cassert>
|
|
12
13
|
#include <cinttypes>
|
|
13
14
|
#include <string>
|
|
14
15
|
|
|
@@ -72,10 +73,10 @@ inline bool BlockFetcher::TryGetFromPrefetchBuffer() {
|
|
|
72
73
|
IOStatus io_s = file_->PrepareIOOptions(read_options_, opts);
|
|
73
74
|
if (io_s.ok()) {
|
|
74
75
|
bool read_from_prefetch_buffer = false;
|
|
75
|
-
if (read_options_.async_io) {
|
|
76
|
+
if (read_options_.async_io && !for_compaction_) {
|
|
76
77
|
read_from_prefetch_buffer = prefetch_buffer_->TryReadFromCacheAsync(
|
|
77
78
|
opts, file_, handle_.offset(), block_size_with_trailer_, &slice_,
|
|
78
|
-
&io_s, read_options_.rate_limiter_priority
|
|
79
|
+
&io_s, read_options_.rate_limiter_priority);
|
|
79
80
|
} else {
|
|
80
81
|
read_from_prefetch_buffer = prefetch_buffer_->TryReadFromCache(
|
|
81
82
|
opts, file_, handle_.offset(), block_size_with_trailer_, &slice_,
|
|
@@ -341,4 +342,61 @@ IOStatus BlockFetcher::ReadBlockContents() {
|
|
|
341
342
|
return io_status_;
|
|
342
343
|
}
|
|
343
344
|
|
|
345
|
+
IOStatus BlockFetcher::ReadAsyncBlockContents() {
|
|
346
|
+
if (TryGetUncompressBlockFromPersistentCache()) {
|
|
347
|
+
compression_type_ = kNoCompression;
|
|
348
|
+
#ifndef NDEBUG
|
|
349
|
+
contents_->is_raw_block = true;
|
|
350
|
+
#endif // NDEBUG
|
|
351
|
+
return IOStatus::OK();
|
|
352
|
+
} else if (!TryGetCompressedBlockFromPersistentCache()) {
|
|
353
|
+
assert(prefetch_buffer_ != nullptr);
|
|
354
|
+
if (!for_compaction_) {
|
|
355
|
+
IOOptions opts;
|
|
356
|
+
IOStatus io_s = file_->PrepareIOOptions(read_options_, opts);
|
|
357
|
+
if (!io_s.ok()) {
|
|
358
|
+
return io_s;
|
|
359
|
+
}
|
|
360
|
+
io_s = status_to_io_status(prefetch_buffer_->PrefetchAsync(
|
|
361
|
+
opts, file_, handle_.offset(), block_size_with_trailer_,
|
|
362
|
+
read_options_.rate_limiter_priority, &slice_));
|
|
363
|
+
if (io_s.IsTryAgain()) {
|
|
364
|
+
return io_s;
|
|
365
|
+
}
|
|
366
|
+
if (io_s.ok()) {
|
|
367
|
+
// Data Block is already in prefetch.
|
|
368
|
+
got_from_prefetch_buffer_ = true;
|
|
369
|
+
ProcessTrailerIfPresent();
|
|
370
|
+
if (!io_status_.ok()) {
|
|
371
|
+
return io_status_;
|
|
372
|
+
}
|
|
373
|
+
used_buf_ = const_cast<char*>(slice_.data());
|
|
374
|
+
|
|
375
|
+
if (do_uncompress_ && compression_type_ != kNoCompression) {
|
|
376
|
+
PERF_TIMER_GUARD(block_decompress_time);
|
|
377
|
+
// compressed page, uncompress, update cache
|
|
378
|
+
UncompressionContext context(compression_type_);
|
|
379
|
+
UncompressionInfo info(context, uncompression_dict_,
|
|
380
|
+
compression_type_);
|
|
381
|
+
io_status_ = status_to_io_status(UncompressBlockContents(
|
|
382
|
+
info, slice_.data(), block_size_, contents_,
|
|
383
|
+
footer_.format_version(), ioptions_, memory_allocator_));
|
|
384
|
+
#ifndef NDEBUG
|
|
385
|
+
num_heap_buf_memcpy_++;
|
|
386
|
+
#endif
|
|
387
|
+
compression_type_ = kNoCompression;
|
|
388
|
+
} else {
|
|
389
|
+
GetBlockContents();
|
|
390
|
+
}
|
|
391
|
+
InsertUncompressedBlockToPersistentCacheIfNeeded();
|
|
392
|
+
return io_status_;
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
// Fallback to sequential reading of data blocks in case of io_s returns
|
|
396
|
+
// error or for_compaction_is true.
|
|
397
|
+
return ReadBlockContents();
|
|
398
|
+
}
|
|
399
|
+
return io_status_;
|
|
400
|
+
}
|
|
401
|
+
|
|
344
402
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -112,6 +112,14 @@ class MergingIterator : public InternalIterator {
|
|
|
112
112
|
}
|
|
113
113
|
|
|
114
114
|
PERF_COUNTER_ADD(seek_child_seek_count, 1);
|
|
115
|
+
|
|
116
|
+
// child.status() is set to Status::TryAgain indicating asynchronous
|
|
117
|
+
// request for retrieval of data blocks has been submitted. So it should
|
|
118
|
+
// return at this point and Seek should be called again to retrieve the
|
|
119
|
+
// requested block and add the child to min heap.
|
|
120
|
+
if (child.status() == Status::TryAgain()) {
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
115
123
|
{
|
|
116
124
|
// Strictly, we timed slightly more than min heap operation,
|
|
117
125
|
// but these operations are very cheap.
|
|
@@ -119,6 +127,18 @@ class MergingIterator : public InternalIterator {
|
|
|
119
127
|
AddToMinHeapOrCheckStatus(&child);
|
|
120
128
|
}
|
|
121
129
|
}
|
|
130
|
+
|
|
131
|
+
for (auto& child : children_) {
|
|
132
|
+
if (child.status() == Status::TryAgain()) {
|
|
133
|
+
child.Seek(target);
|
|
134
|
+
{
|
|
135
|
+
PERF_TIMER_GUARD(seek_min_heap_time);
|
|
136
|
+
AddToMinHeapOrCheckStatus(&child);
|
|
137
|
+
}
|
|
138
|
+
PERF_COUNTER_ADD(number_async_seek, 1);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
122
142
|
direction_ = kForward;
|
|
123
143
|
{
|
|
124
144
|
PERF_TIMER_GUARD(seek_min_heap_time);
|
|
@@ -359,6 +379,13 @@ void MergingIterator::SwitchToForward() {
|
|
|
359
379
|
for (auto& child : children_) {
|
|
360
380
|
if (&child != current_) {
|
|
361
381
|
child.Seek(target);
|
|
382
|
+
// child.status() is set to Status::TryAgain indicating asynchronous
|
|
383
|
+
// request for retrieval of data blocks has been submitted. So it should
|
|
384
|
+
// return at this point and Seek should be called again to retrieve the
|
|
385
|
+
// requested block and add the child to min heap.
|
|
386
|
+
if (child.status() == Status::TryAgain()) {
|
|
387
|
+
continue;
|
|
388
|
+
}
|
|
362
389
|
if (child.Valid() && comparator_->Equal(target, child.key())) {
|
|
363
390
|
assert(child.status().ok());
|
|
364
391
|
child.Next();
|
|
@@ -366,6 +393,18 @@ void MergingIterator::SwitchToForward() {
|
|
|
366
393
|
}
|
|
367
394
|
AddToMinHeapOrCheckStatus(&child);
|
|
368
395
|
}
|
|
396
|
+
|
|
397
|
+
for (auto& child : children_) {
|
|
398
|
+
if (child.status() == Status::TryAgain()) {
|
|
399
|
+
child.Seek(target);
|
|
400
|
+
if (child.Valid() && comparator_->Equal(target, child.key())) {
|
|
401
|
+
assert(child.status().ok());
|
|
402
|
+
child.Next();
|
|
403
|
+
}
|
|
404
|
+
AddToMinHeapOrCheckStatus(&child);
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
369
408
|
direction_ = kForward;
|
|
370
409
|
}
|
|
371
410
|
|
|
@@ -14,8 +14,10 @@
|
|
|
14
14
|
#include "rocksdb/env.h"
|
|
15
15
|
#include "rocksdb/statistics.h"
|
|
16
16
|
#include "rocksdb/types.h"
|
|
17
|
+
#include "util/async_file_reader.h"
|
|
17
18
|
#include "util/autovector.h"
|
|
18
19
|
#include "util/math.h"
|
|
20
|
+
#include "util/single_thread_executor.h"
|
|
19
21
|
|
|
20
22
|
namespace ROCKSDB_NAMESPACE {
|
|
21
23
|
class GetContext;
|
|
@@ -104,11 +106,20 @@ class MultiGetContext {
|
|
|
104
106
|
|
|
105
107
|
MultiGetContext(autovector<KeyContext*, MAX_BATCH_SIZE>* sorted_keys,
|
|
106
108
|
size_t begin, size_t num_keys, SequenceNumber snapshot,
|
|
107
|
-
const ReadOptions& read_opts
|
|
109
|
+
const ReadOptions& read_opts, FileSystem* fs,
|
|
110
|
+
Statistics* stats)
|
|
108
111
|
: num_keys_(num_keys),
|
|
109
112
|
value_mask_(0),
|
|
110
113
|
value_size_(0),
|
|
111
|
-
lookup_key_ptr_(reinterpret_cast<LookupKey*>(lookup_key_stack_buf))
|
|
114
|
+
lookup_key_ptr_(reinterpret_cast<LookupKey*>(lookup_key_stack_buf))
|
|
115
|
+
#if USE_COROUTINES
|
|
116
|
+
,
|
|
117
|
+
reader_(fs, stats),
|
|
118
|
+
executor_(reader_)
|
|
119
|
+
#endif // USE_COROUTINES
|
|
120
|
+
{
|
|
121
|
+
(void)fs;
|
|
122
|
+
(void)stats;
|
|
112
123
|
assert(num_keys <= MAX_BATCH_SIZE);
|
|
113
124
|
if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) {
|
|
114
125
|
lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]);
|
|
@@ -135,6 +146,12 @@ class MultiGetContext {
|
|
|
135
146
|
}
|
|
136
147
|
}
|
|
137
148
|
|
|
149
|
+
#if USE_COROUTINES
|
|
150
|
+
SingleThreadExecutor& executor() { return executor_; }
|
|
151
|
+
|
|
152
|
+
AsyncFileReader& reader() { return reader_; }
|
|
153
|
+
#endif // USE_COROUTINES
|
|
154
|
+
|
|
138
155
|
private:
|
|
139
156
|
static const int MAX_LOOKUP_KEYS_ON_STACK = 16;
|
|
140
157
|
alignas(alignof(LookupKey))
|
|
@@ -145,6 +162,10 @@ class MultiGetContext {
|
|
|
145
162
|
uint64_t value_size_;
|
|
146
163
|
std::unique_ptr<char[]> lookup_key_heap_buf;
|
|
147
164
|
LookupKey* lookup_key_ptr_;
|
|
165
|
+
#if USE_COROUTINES
|
|
166
|
+
AsyncFileReader reader_;
|
|
167
|
+
SingleThreadExecutor executor_;
|
|
168
|
+
#endif // USE_COROUTINES
|
|
148
169
|
|
|
149
170
|
public:
|
|
150
171
|
// MultiGetContext::Range - Specifies a range of keys, by start and end index,
|
|
@@ -267,6 +288,20 @@ class MultiGetContext {
|
|
|
267
288
|
|
|
268
289
|
void AddValueSize(uint64_t value_size) { ctx_->value_size_ += value_size; }
|
|
269
290
|
|
|
291
|
+
MultiGetContext* context() const { return ctx_; }
|
|
292
|
+
|
|
293
|
+
Range Suffix(const Range& other) const {
|
|
294
|
+
size_t other_last = other.FindLastRemaining();
|
|
295
|
+
size_t my_last = FindLastRemaining();
|
|
296
|
+
|
|
297
|
+
if (my_last > other_last) {
|
|
298
|
+
return Range(*this, Iterator(this, other_last),
|
|
299
|
+
Iterator(this, my_last));
|
|
300
|
+
} else {
|
|
301
|
+
return Range(*this, begin(), begin());
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
270
305
|
private:
|
|
271
306
|
friend MultiGetContext;
|
|
272
307
|
MultiGetContext* ctx_;
|
|
@@ -283,6 +318,15 @@ class MultiGetContext {
|
|
|
283
318
|
return (((Mask{1} << end_) - 1) & ~((Mask{1} << start_) - 1) &
|
|
284
319
|
~(ctx_->value_mask_ | skip_mask_));
|
|
285
320
|
}
|
|
321
|
+
|
|
322
|
+
size_t FindLastRemaining() const {
|
|
323
|
+
Mask mask = RemainingMask();
|
|
324
|
+
size_t index = (mask >>= start_) ? start_ : 0;
|
|
325
|
+
while (mask >>= 1) {
|
|
326
|
+
index++;
|
|
327
|
+
}
|
|
328
|
+
return index;
|
|
329
|
+
}
|
|
286
330
|
};
|
|
287
331
|
|
|
288
332
|
// Return the initial range that encompasses all the keys in the batch
|
|
@@ -253,7 +253,7 @@ Status SstFileDumper::ShowAllCompressionSizes(
|
|
|
253
253
|
compression_types,
|
|
254
254
|
int32_t compress_level_from, int32_t compress_level_to,
|
|
255
255
|
uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
|
|
256
|
-
uint64_t max_dict_buffer_bytes) {
|
|
256
|
+
uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer) {
|
|
257
257
|
fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
|
|
258
258
|
for (auto& i : compression_types) {
|
|
259
259
|
if (CompressionTypeSupported(i.first)) {
|
|
@@ -262,6 +262,7 @@ Status SstFileDumper::ShowAllCompressionSizes(
|
|
|
262
262
|
compress_opt.max_dict_bytes = max_dict_bytes;
|
|
263
263
|
compress_opt.zstd_max_train_bytes = zstd_max_train_bytes;
|
|
264
264
|
compress_opt.max_dict_buffer_bytes = max_dict_buffer_bytes;
|
|
265
|
+
compress_opt.use_zstd_dict_trainer = use_zstd_dict_trainer;
|
|
265
266
|
for (int32_t j = compress_level_from; j <= compress_level_to; j++) {
|
|
266
267
|
fprintf(stdout, "Compression level: %d", j);
|
|
267
268
|
compress_opt.level = j;
|
|
@@ -44,7 +44,7 @@ class SstFileDumper {
|
|
|
44
44
|
compression_types,
|
|
45
45
|
int32_t compress_level_from, int32_t compress_level_to,
|
|
46
46
|
uint32_t max_dict_bytes, uint32_t zstd_max_train_bytes,
|
|
47
|
-
uint64_t max_dict_buffer_bytes);
|
|
47
|
+
uint64_t max_dict_buffer_bytes, bool use_zstd_dict_trainer);
|
|
48
48
|
|
|
49
49
|
Status ShowCompressionSize(size_t block_size, CompressionType compress_type,
|
|
50
50
|
const CompressionOptions& compress_opt);
|
|
@@ -10,6 +10,10 @@
|
|
|
10
10
|
#pragma once
|
|
11
11
|
#include <memory>
|
|
12
12
|
#include "db/range_tombstone_fragmenter.h"
|
|
13
|
+
#if USE_COROUTINES
|
|
14
|
+
#include "folly/experimental/coro/Coroutine.h"
|
|
15
|
+
#include "folly/experimental/coro/Task.h"
|
|
16
|
+
#endif
|
|
13
17
|
#include "rocksdb/slice_transform.h"
|
|
14
18
|
#include "table/get_context.h"
|
|
15
19
|
#include "table/internal_iterator.h"
|
|
@@ -120,6 +124,15 @@ class TableReader {
|
|
|
120
124
|
}
|
|
121
125
|
}
|
|
122
126
|
|
|
127
|
+
#if USE_COROUTINES
|
|
128
|
+
virtual folly::coro::Task<void> MultiGetCoroutine(
|
|
129
|
+
const ReadOptions& readOptions, const MultiGetContext::Range* mget_range,
|
|
130
|
+
const SliceTransform* prefix_extractor, bool skip_filters = false) {
|
|
131
|
+
MultiGet(readOptions, mget_range, prefix_extractor, skip_filters);
|
|
132
|
+
co_return;
|
|
133
|
+
}
|
|
134
|
+
#endif // USE_COROUTINES
|
|
135
|
+
|
|
123
136
|
// Prefetch data corresponding to a give range of keys
|
|
124
137
|
// Typically this functionality is required for table implementations that
|
|
125
138
|
// persists the data on a non volatile storage medium like disk/SSD
|
|
@@ -107,6 +107,20 @@ Status GetSstInternalUniqueId(const std::string &db_id,
|
|
|
107
107
|
return Status::OK();
|
|
108
108
|
}
|
|
109
109
|
|
|
110
|
+
Status GetSstInternalUniqueId(const std::string &db_id,
|
|
111
|
+
const std::string &db_session_id,
|
|
112
|
+
uint64_t file_number, UniqueId64x2 *out) {
|
|
113
|
+
UniqueId64x3 tmp{};
|
|
114
|
+
Status s = GetSstInternalUniqueId(db_id, db_session_id, file_number, &tmp);
|
|
115
|
+
if (s.ok()) {
|
|
116
|
+
(*out)[0] = tmp[0];
|
|
117
|
+
(*out)[1] = tmp[1];
|
|
118
|
+
} else {
|
|
119
|
+
*out = {0, 0};
|
|
120
|
+
}
|
|
121
|
+
return s;
|
|
122
|
+
}
|
|
123
|
+
|
|
110
124
|
namespace {
|
|
111
125
|
// For InternalUniqueIdToExternal / ExternalUniqueIdToInternal we want all
|
|
112
126
|
// zeros in first 128 bits to map to itself, so that excluding zero in
|
|
@@ -148,6 +162,19 @@ std::string EncodeUniqueIdBytes(UniqueIdPtr in) {
|
|
|
148
162
|
return ret;
|
|
149
163
|
}
|
|
150
164
|
|
|
165
|
+
Status DecodeUniqueIdBytes(const std::string &unique_id, UniqueIdPtr out) {
|
|
166
|
+
if (unique_id.size() != (out.extended ? 24 : 16)) {
|
|
167
|
+
return Status::NotSupported("Not a valid unique_id");
|
|
168
|
+
}
|
|
169
|
+
const char *buf = &unique_id.front();
|
|
170
|
+
out.ptr[0] = DecodeFixed64(&buf[0]);
|
|
171
|
+
out.ptr[1] = DecodeFixed64(&buf[8]);
|
|
172
|
+
if (out.extended) {
|
|
173
|
+
out.ptr[2] = DecodeFixed64(&buf[16]);
|
|
174
|
+
}
|
|
175
|
+
return Status::OK();
|
|
176
|
+
}
|
|
177
|
+
|
|
151
178
|
template <typename ID>
|
|
152
179
|
Status GetUniqueIdFromTablePropertiesHelper(const TableProperties &props,
|
|
153
180
|
std::string *out_id) {
|