@nxtedition/rocksdb 6.0.2 → 7.0.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/BUILDING.md +12 -4
- package/binding.cc +589 -128
- package/chained-batch.js +6 -6
- package/deps/rocksdb/rocksdb/CMakeLists.txt +9 -0
- package/deps/rocksdb/rocksdb/Makefile +16 -5
- package/deps/rocksdb/rocksdb/TARGETS +23 -2
- package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +7 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +33 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +26 -0
- package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +29 -0
- package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +10 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +12 -4
- package/deps/rocksdb/rocksdb/db/c.cc +26 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +3 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +29 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +16 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +402 -30
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +14 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +33 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +54 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +8 -1
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +14 -15
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +331 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +221 -92
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -2
- package/deps/rocksdb/rocksdb/db/db_test_util.h +4 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +1 -171
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_test_util.h +126 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +57 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +13 -2
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +2 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +10 -11
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +11 -1
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +12 -1
- package/deps/rocksdb/rocksdb/db/repair_test.cc +32 -10
- package/deps/rocksdb/rocksdb/db/snapshot_impl.h +3 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +19 -127
- package/deps/rocksdb/rocksdb/db/table_cache.h +3 -2
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +140 -0
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +130 -128
- package/deps/rocksdb/rocksdb/db/version_edit.cc +20 -0
- package/deps/rocksdb/rocksdb/db/version_edit.h +13 -4
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +14 -14
- package/deps/rocksdb/rocksdb/db/version_set.cc +205 -212
- package/deps/rocksdb/rocksdb/db/version_set.h +11 -0
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +154 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +10 -9
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +13 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +15 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +159 -65
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +43 -21
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +142 -17
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +23 -27
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +2 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +23 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/snapshot.h +4 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +189 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -1
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
- package/deps/rocksdb/rocksdb/options/options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +6 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +107 -9
- package/deps/rocksdb/rocksdb/src.mk +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +9 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +80 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +81 -757
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +21 -15
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +9 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +754 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +8 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +1 -10
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +59 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +18 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +0 -61
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +0 -13
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +60 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +2 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +39 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +46 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +2 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -1
- package/deps/rocksdb/rocksdb/table/table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/unique_id.cc +27 -0
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +3 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +23 -7
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +9 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +72 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +144 -0
- package/deps/rocksdb/rocksdb/util/compression.h +49 -0
- package/deps/rocksdb/rocksdb/util/coro_utils.h +111 -0
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +55 -0
- package/deps/rocksdb/rocksdb.gyp +16 -15
- package/index.js +186 -3
- package/iterator.js +1 -0
- package/package-lock.json +23687 -0
- package/package.json +2 -30
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/deps/liburing/liburing/README +0 -46
- package/deps/liburing/liburing/test/232c93d07b74-test.c +0 -305
- package/deps/liburing/liburing/test/35fa71a030ca-test.c +0 -329
- package/deps/liburing/liburing/test/500f9fbadef8-test.c +0 -89
- package/deps/liburing/liburing/test/7ad0e4b2f83c-test.c +0 -93
- package/deps/liburing/liburing/test/8a9973408177-test.c +0 -106
- package/deps/liburing/liburing/test/917257daa0fe-test.c +0 -53
- package/deps/liburing/liburing/test/Makefile +0 -312
- package/deps/liburing/liburing/test/a0908ae19763-test.c +0 -58
- package/deps/liburing/liburing/test/a4c0b3decb33-test.c +0 -180
- package/deps/liburing/liburing/test/accept-link.c +0 -251
- package/deps/liburing/liburing/test/accept-reuse.c +0 -164
- package/deps/liburing/liburing/test/accept-test.c +0 -79
- package/deps/liburing/liburing/test/accept.c +0 -476
- package/deps/liburing/liburing/test/across-fork.c +0 -283
- package/deps/liburing/liburing/test/b19062a56726-test.c +0 -53
- package/deps/liburing/liburing/test/b5837bd5311d-test.c +0 -77
- package/deps/liburing/liburing/test/ce593a6c480a-test.c +0 -135
- package/deps/liburing/liburing/test/close-opath.c +0 -122
- package/deps/liburing/liburing/test/config +0 -10
- package/deps/liburing/liburing/test/connect.c +0 -398
- package/deps/liburing/liburing/test/cq-full.c +0 -96
- package/deps/liburing/liburing/test/cq-overflow.c +0 -294
- package/deps/liburing/liburing/test/cq-peek-batch.c +0 -102
- package/deps/liburing/liburing/test/cq-ready.c +0 -94
- package/deps/liburing/liburing/test/cq-size.c +0 -58
- package/deps/liburing/liburing/test/d4ae271dfaae-test.c +0 -96
- package/deps/liburing/liburing/test/d77a67ed5f27-test.c +0 -65
- package/deps/liburing/liburing/test/defer.c +0 -307
- package/deps/liburing/liburing/test/double-poll-crash.c +0 -186
- package/deps/liburing/liburing/test/eeed8b54e0df-test.c +0 -114
- package/deps/liburing/liburing/test/empty-eownerdead.c +0 -42
- package/deps/liburing/liburing/test/eventfd-disable.c +0 -151
- package/deps/liburing/liburing/test/eventfd-ring.c +0 -97
- package/deps/liburing/liburing/test/eventfd.c +0 -112
- package/deps/liburing/liburing/test/fadvise.c +0 -202
- package/deps/liburing/liburing/test/fallocate.c +0 -249
- package/deps/liburing/liburing/test/fc2a85cb02ef-test.c +0 -138
- package/deps/liburing/liburing/test/file-register.c +0 -843
- package/deps/liburing/liburing/test/file-update.c +0 -173
- package/deps/liburing/liburing/test/files-exit-hang-poll.c +0 -128
- package/deps/liburing/liburing/test/files-exit-hang-timeout.c +0 -134
- package/deps/liburing/liburing/test/fixed-link.c +0 -90
- package/deps/liburing/liburing/test/fsync.c +0 -224
- package/deps/liburing/liburing/test/hardlink.c +0 -136
- package/deps/liburing/liburing/test/helpers.c +0 -135
- package/deps/liburing/liburing/test/helpers.h +0 -67
- package/deps/liburing/liburing/test/io-cancel.c +0 -537
- package/deps/liburing/liburing/test/io_uring_enter.c +0 -296
- package/deps/liburing/liburing/test/io_uring_register.c +0 -664
- package/deps/liburing/liburing/test/io_uring_setup.c +0 -192
- package/deps/liburing/liburing/test/iopoll.c +0 -366
- package/deps/liburing/liburing/test/lfs-openat-write.c +0 -117
- package/deps/liburing/liburing/test/lfs-openat.c +0 -273
- package/deps/liburing/liburing/test/link-timeout.c +0 -1107
- package/deps/liburing/liburing/test/link.c +0 -496
- package/deps/liburing/liburing/test/link_drain.c +0 -229
- package/deps/liburing/liburing/test/madvise.c +0 -195
- package/deps/liburing/liburing/test/mkdir.c +0 -108
- package/deps/liburing/liburing/test/multicqes_drain.c +0 -383
- package/deps/liburing/liburing/test/nop-all-sizes.c +0 -107
- package/deps/liburing/liburing/test/nop.c +0 -115
- package/deps/liburing/liburing/test/open-close.c +0 -146
- package/deps/liburing/liburing/test/openat2.c +0 -240
- package/deps/liburing/liburing/test/personality.c +0 -204
- package/deps/liburing/liburing/test/pipe-eof.c +0 -81
- package/deps/liburing/liburing/test/pipe-reuse.c +0 -105
- package/deps/liburing/liburing/test/poll-cancel-ton.c +0 -139
- package/deps/liburing/liburing/test/poll-cancel.c +0 -135
- package/deps/liburing/liburing/test/poll-link.c +0 -227
- package/deps/liburing/liburing/test/poll-many.c +0 -208
- package/deps/liburing/liburing/test/poll-mshot-update.c +0 -273
- package/deps/liburing/liburing/test/poll-ring.c +0 -48
- package/deps/liburing/liburing/test/poll-v-poll.c +0 -353
- package/deps/liburing/liburing/test/poll.c +0 -109
- package/deps/liburing/liburing/test/probe.c +0 -137
- package/deps/liburing/liburing/test/read-write.c +0 -876
- package/deps/liburing/liburing/test/register-restrictions.c +0 -633
- package/deps/liburing/liburing/test/rename.c +0 -134
- package/deps/liburing/liburing/test/ring-leak.c +0 -173
- package/deps/liburing/liburing/test/ring-leak2.c +0 -249
- package/deps/liburing/liburing/test/rsrc_tags.c +0 -449
- package/deps/liburing/liburing/test/runtests-loop.sh +0 -16
- package/deps/liburing/liburing/test/runtests.sh +0 -170
- package/deps/liburing/liburing/test/rw_merge_test.c +0 -97
- package/deps/liburing/liburing/test/self.c +0 -91
- package/deps/liburing/liburing/test/send_recv.c +0 -291
- package/deps/liburing/liburing/test/send_recvmsg.c +0 -345
- package/deps/liburing/liburing/test/sendmsg_fs_cve.c +0 -198
- package/deps/liburing/liburing/test/shared-wq.c +0 -84
- package/deps/liburing/liburing/test/short-read.c +0 -75
- package/deps/liburing/liburing/test/shutdown.c +0 -163
- package/deps/liburing/liburing/test/sigfd-deadlock.c +0 -74
- package/deps/liburing/liburing/test/socket-rw-eagain.c +0 -156
- package/deps/liburing/liburing/test/socket-rw.c +0 -147
- package/deps/liburing/liburing/test/splice.c +0 -511
- package/deps/liburing/liburing/test/sq-full-cpp.cc +0 -45
- package/deps/liburing/liburing/test/sq-full.c +0 -45
- package/deps/liburing/liburing/test/sq-poll-dup.c +0 -200
- package/deps/liburing/liburing/test/sq-poll-kthread.c +0 -168
- package/deps/liburing/liburing/test/sq-poll-share.c +0 -137
- package/deps/liburing/liburing/test/sq-space_left.c +0 -159
- package/deps/liburing/liburing/test/sqpoll-cancel-hang.c +0 -159
- package/deps/liburing/liburing/test/sqpoll-disable-exit.c +0 -195
- package/deps/liburing/liburing/test/sqpoll-exit-hang.c +0 -77
- package/deps/liburing/liburing/test/sqpoll-sleep.c +0 -68
- package/deps/liburing/liburing/test/statx.c +0 -172
- package/deps/liburing/liburing/test/stdout.c +0 -232
- package/deps/liburing/liburing/test/submit-link-fail.c +0 -154
- package/deps/liburing/liburing/test/submit-reuse.c +0 -239
- package/deps/liburing/liburing/test/symlink.c +0 -116
- package/deps/liburing/liburing/test/teardowns.c +0 -58
- package/deps/liburing/liburing/test/thread-exit.c +0 -131
- package/deps/liburing/liburing/test/timeout-new.c +0 -246
- package/deps/liburing/liburing/test/timeout-overflow.c +0 -204
- package/deps/liburing/liburing/test/timeout.c +0 -1354
- package/deps/liburing/liburing/test/unlink.c +0 -111
- package/deps/liburing/liburing/test/wakeup-hang.c +0 -162
- package/deps/rocksdb/rocksdb/README.md +0 -32
- package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
- package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -89,6 +89,7 @@ class FilePrefetchBuffer {
|
|
|
89
89
|
// while curr_ is being consumed. If data is overlapping in two buffers,
|
|
90
90
|
// data is copied to third buffer to return continuous buffer.
|
|
91
91
|
bufs_.resize(3);
|
|
92
|
+
(void)async_io_;
|
|
92
93
|
}
|
|
93
94
|
|
|
94
95
|
~FilePrefetchBuffer() {
|
|
@@ -131,10 +132,21 @@ class FilePrefetchBuffer {
|
|
|
131
132
|
uint64_t offset, size_t n,
|
|
132
133
|
Env::IOPriority rate_limiter_priority);
|
|
133
134
|
|
|
135
|
+
// Request for reading the data from a file asynchronously.
|
|
136
|
+
// If data already exists in the buffer, result will be updated.
|
|
137
|
+
// reader : the file reader.
|
|
138
|
+
// offset : the file offset to start reading from.
|
|
139
|
+
// n : the number of bytes to read.
|
|
140
|
+
// rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to
|
|
141
|
+
// bypass.
|
|
142
|
+
// result : if data already exists in the buffer, result will
|
|
143
|
+
// be updated with the data.
|
|
144
|
+
//
|
|
145
|
+
// If data already exist in the buffer, it will return Status::OK, otherwise
|
|
146
|
+
// it will send asynchronous request and return Status::TryAgain.
|
|
134
147
|
Status PrefetchAsync(const IOOptions& opts, RandomAccessFileReader* reader,
|
|
135
|
-
uint64_t offset, size_t
|
|
136
|
-
Env::IOPriority rate_limiter_priority,
|
|
137
|
-
bool& copy_to_third_buffer);
|
|
148
|
+
uint64_t offset, size_t n,
|
|
149
|
+
Env::IOPriority rate_limiter_priority, Slice* result);
|
|
138
150
|
|
|
139
151
|
// Tries returning the data for a file read from this buffer if that data is
|
|
140
152
|
// in the buffer.
|
|
@@ -159,8 +171,7 @@ class FilePrefetchBuffer {
|
|
|
159
171
|
bool TryReadFromCacheAsync(const IOOptions& opts,
|
|
160
172
|
RandomAccessFileReader* reader, uint64_t offset,
|
|
161
173
|
size_t n, Slice* result, Status* status,
|
|
162
|
-
Env::IOPriority rate_limiter_priority
|
|
163
|
-
bool for_compaction /* = false */);
|
|
174
|
+
Env::IOPriority rate_limiter_priority);
|
|
164
175
|
|
|
165
176
|
// The minimum `offset` ever passed to TryReadFromCache(). This will nly be
|
|
166
177
|
// tracked if track_min_offset = true.
|
|
@@ -207,22 +218,6 @@ class FilePrefetchBuffer {
|
|
|
207
218
|
}
|
|
208
219
|
}
|
|
209
220
|
|
|
210
|
-
bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
|
|
211
|
-
// Prefetch only if this read is sequential otherwise reset readahead_size_
|
|
212
|
-
// to initial value.
|
|
213
|
-
if (!IsBlockSequential(offset)) {
|
|
214
|
-
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
|
|
215
|
-
ResetValues();
|
|
216
|
-
return false;
|
|
217
|
-
}
|
|
218
|
-
num_file_reads_++;
|
|
219
|
-
if (num_file_reads_ <= kMinNumFileReadsToStartAutoReadahead) {
|
|
220
|
-
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
|
|
221
|
-
return false;
|
|
222
|
-
}
|
|
223
|
-
return true;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
221
|
// Callback function passed to underlying FS in case of asynchronous reads.
|
|
227
222
|
void PrefetchAsyncCallback(const FSReadRequest& req, void* cb_arg);
|
|
228
223
|
|
|
@@ -234,6 +229,17 @@ class FilePrefetchBuffer {
|
|
|
234
229
|
size_t roundup_len, size_t index, bool refit_tail,
|
|
235
230
|
uint64_t& chunk_len);
|
|
236
231
|
|
|
232
|
+
// It calls Poll API if any there is any pending asynchronous request. It then
|
|
233
|
+
// checks if data is in any buffer. It clears the outdated data and swaps the
|
|
234
|
+
// buffers if required.
|
|
235
|
+
void PollAndUpdateBuffersIfNeeded(uint64_t offset);
|
|
236
|
+
|
|
237
|
+
Status PrefetchAsyncInternal(const IOOptions& opts,
|
|
238
|
+
RandomAccessFileReader* reader, uint64_t offset,
|
|
239
|
+
size_t length, size_t readahead_size,
|
|
240
|
+
Env::IOPriority rate_limiter_priority,
|
|
241
|
+
bool& copy_to_third_buffer);
|
|
242
|
+
|
|
237
243
|
Status Read(const IOOptions& opts, RandomAccessFileReader* reader,
|
|
238
244
|
Env::IOPriority rate_limiter_priority, uint64_t read_len,
|
|
239
245
|
uint64_t chunk_len, uint64_t rounddown_start, uint32_t index);
|
|
@@ -256,6 +262,22 @@ class FilePrefetchBuffer {
|
|
|
256
262
|
readahead_size_ = initial_auto_readahead_size_;
|
|
257
263
|
}
|
|
258
264
|
|
|
265
|
+
bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
|
|
266
|
+
// Prefetch only if this read is sequential otherwise reset readahead_size_
|
|
267
|
+
// to initial value.
|
|
268
|
+
if (!IsBlockSequential(offset)) {
|
|
269
|
+
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
|
|
270
|
+
ResetValues();
|
|
271
|
+
return false;
|
|
272
|
+
}
|
|
273
|
+
num_file_reads_++;
|
|
274
|
+
if (num_file_reads_ <= kMinNumFileReadsToStartAutoReadahead) {
|
|
275
|
+
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
|
|
276
|
+
return false;
|
|
277
|
+
}
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
280
|
+
|
|
259
281
|
std::vector<BufferInfo> bufs_;
|
|
260
282
|
// curr_ represents the index for bufs_ indicating which buffer is being
|
|
261
283
|
// consumed currently.
|
|
@@ -534,15 +534,24 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
534
534
|
* initially (2 more data blocks).
|
|
535
535
|
*/
|
|
536
536
|
iter->Seek(BuildKey(0));
|
|
537
|
+
ASSERT_TRUE(iter->Valid());
|
|
537
538
|
iter->Seek(BuildKey(1000));
|
|
539
|
+
ASSERT_TRUE(iter->Valid());
|
|
538
540
|
iter->Seek(BuildKey(1004)); // Prefetch Data
|
|
541
|
+
ASSERT_TRUE(iter->Valid());
|
|
539
542
|
iter->Seek(BuildKey(1008));
|
|
543
|
+
ASSERT_TRUE(iter->Valid());
|
|
540
544
|
iter->Seek(BuildKey(1011));
|
|
545
|
+
ASSERT_TRUE(iter->Valid());
|
|
541
546
|
iter->Seek(BuildKey(1015)); // Prefetch Data
|
|
547
|
+
ASSERT_TRUE(iter->Valid());
|
|
542
548
|
iter->Seek(BuildKey(1019));
|
|
549
|
+
ASSERT_TRUE(iter->Valid());
|
|
543
550
|
// Missed 2 blocks but they are already in buffer so no reset.
|
|
544
551
|
iter->Seek(BuildKey(103)); // Already in buffer.
|
|
552
|
+
ASSERT_TRUE(iter->Valid());
|
|
545
553
|
iter->Seek(BuildKey(1033)); // Prefetch Data
|
|
554
|
+
ASSERT_TRUE(iter->Valid());
|
|
546
555
|
if (support_prefetch && !use_direct_io) {
|
|
547
556
|
ASSERT_EQ(fs->GetPrefetchCount(), 3);
|
|
548
557
|
fs->ClearPrefetchCount();
|
|
@@ -558,10 +567,15 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
558
567
|
*/
|
|
559
568
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
560
569
|
iter->Seek(BuildKey(0));
|
|
570
|
+
ASSERT_TRUE(iter->Valid());
|
|
561
571
|
iter->Seek(BuildKey(1008));
|
|
572
|
+
ASSERT_TRUE(iter->Valid());
|
|
562
573
|
iter->Seek(BuildKey(1019));
|
|
574
|
+
ASSERT_TRUE(iter->Valid());
|
|
563
575
|
iter->Seek(BuildKey(1033));
|
|
576
|
+
ASSERT_TRUE(iter->Valid());
|
|
564
577
|
iter->Seek(BuildKey(1048));
|
|
578
|
+
ASSERT_TRUE(iter->Valid());
|
|
565
579
|
if (support_prefetch && !use_direct_io) {
|
|
566
580
|
ASSERT_EQ(fs->GetPrefetchCount(), 0);
|
|
567
581
|
fs->ClearPrefetchCount();
|
|
@@ -576,9 +590,13 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
576
590
|
*/
|
|
577
591
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
578
592
|
iter->Seek(BuildKey(0));
|
|
593
|
+
ASSERT_TRUE(iter->Valid());
|
|
579
594
|
iter->Seek(BuildKey(1));
|
|
595
|
+
ASSERT_TRUE(iter->Valid());
|
|
580
596
|
iter->Seek(BuildKey(10));
|
|
597
|
+
ASSERT_TRUE(iter->Valid());
|
|
581
598
|
iter->Seek(BuildKey(100));
|
|
599
|
+
ASSERT_TRUE(iter->Valid());
|
|
582
600
|
if (support_prefetch && !use_direct_io) {
|
|
583
601
|
ASSERT_EQ(fs->GetPrefetchCount(), 0);
|
|
584
602
|
fs->ClearPrefetchCount();
|
|
@@ -596,14 +614,21 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
596
614
|
*/
|
|
597
615
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
598
616
|
iter->Seek(BuildKey(0));
|
|
617
|
+
ASSERT_TRUE(iter->Valid());
|
|
599
618
|
iter->Seek(BuildKey(1000));
|
|
619
|
+
ASSERT_TRUE(iter->Valid());
|
|
600
620
|
iter->Seek(BuildKey(1004)); // This iteration will prefetch buffer
|
|
621
|
+
ASSERT_TRUE(iter->Valid());
|
|
601
622
|
iter->Seek(BuildKey(1008));
|
|
623
|
+
ASSERT_TRUE(iter->Valid());
|
|
602
624
|
iter->Seek(
|
|
603
625
|
BuildKey(996)); // Reseek won't prefetch any data and
|
|
604
626
|
// readahead_size will be initiallized to 8*1024.
|
|
627
|
+
ASSERT_TRUE(iter->Valid());
|
|
605
628
|
iter->Seek(BuildKey(992));
|
|
629
|
+
ASSERT_TRUE(iter->Valid());
|
|
606
630
|
iter->Seek(BuildKey(989));
|
|
631
|
+
ASSERT_TRUE(iter->Valid());
|
|
607
632
|
if (support_prefetch && !use_direct_io) {
|
|
608
633
|
ASSERT_EQ(fs->GetPrefetchCount(), 1);
|
|
609
634
|
fs->ClearPrefetchCount();
|
|
@@ -615,11 +640,17 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
615
640
|
// Read sequentially to confirm readahead_size is reset to initial value (2
|
|
616
641
|
// more data blocks)
|
|
617
642
|
iter->Seek(BuildKey(1011));
|
|
643
|
+
ASSERT_TRUE(iter->Valid());
|
|
618
644
|
iter->Seek(BuildKey(1015));
|
|
645
|
+
ASSERT_TRUE(iter->Valid());
|
|
619
646
|
iter->Seek(BuildKey(1019)); // Prefetch Data
|
|
647
|
+
ASSERT_TRUE(iter->Valid());
|
|
620
648
|
iter->Seek(BuildKey(1022));
|
|
649
|
+
ASSERT_TRUE(iter->Valid());
|
|
621
650
|
iter->Seek(BuildKey(1026));
|
|
651
|
+
ASSERT_TRUE(iter->Valid());
|
|
622
652
|
iter->Seek(BuildKey(103)); // Prefetch Data
|
|
653
|
+
ASSERT_TRUE(iter->Valid());
|
|
623
654
|
if (support_prefetch && !use_direct_io) {
|
|
624
655
|
ASSERT_EQ(fs->GetPrefetchCount(), 2);
|
|
625
656
|
fs->ClearPrefetchCount();
|
|
@@ -634,12 +665,19 @@ TEST_P(PrefetchTest, PrefetchWhenReseek) {
|
|
|
634
665
|
*/
|
|
635
666
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
636
667
|
iter->Seek(BuildKey(0));
|
|
668
|
+
ASSERT_TRUE(iter->Valid());
|
|
637
669
|
iter->Seek(BuildKey(1167));
|
|
670
|
+
ASSERT_TRUE(iter->Valid());
|
|
638
671
|
iter->Seek(BuildKey(1334)); // This iteration will prefetch buffer
|
|
672
|
+
ASSERT_TRUE(iter->Valid());
|
|
639
673
|
iter->Seek(BuildKey(1499));
|
|
674
|
+
ASSERT_TRUE(iter->Valid());
|
|
640
675
|
iter->Seek(BuildKey(1667));
|
|
676
|
+
ASSERT_TRUE(iter->Valid());
|
|
641
677
|
iter->Seek(BuildKey(1847));
|
|
678
|
+
ASSERT_TRUE(iter->Valid());
|
|
642
679
|
iter->Seek(BuildKey(1999));
|
|
680
|
+
ASSERT_TRUE(iter->Valid());
|
|
643
681
|
if (support_prefetch && !use_direct_io) {
|
|
644
682
|
ASSERT_EQ(fs->GetPrefetchCount(), 1);
|
|
645
683
|
fs->ClearPrefetchCount();
|
|
@@ -766,8 +804,11 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) {
|
|
|
766
804
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
767
805
|
// Warm up the cache
|
|
768
806
|
iter->Seek(BuildKey(1011));
|
|
807
|
+
ASSERT_TRUE(iter->Valid());
|
|
769
808
|
iter->Seek(BuildKey(1015));
|
|
809
|
+
ASSERT_TRUE(iter->Valid());
|
|
770
810
|
iter->Seek(BuildKey(1019));
|
|
811
|
+
ASSERT_TRUE(iter->Valid());
|
|
771
812
|
if (support_prefetch && !use_direct_io) {
|
|
772
813
|
ASSERT_EQ(fs->GetPrefetchCount(), 1);
|
|
773
814
|
fs->ClearPrefetchCount();
|
|
@@ -780,20 +821,31 @@ TEST_P(PrefetchTest, PrefetchWhenReseekwithCache) {
|
|
|
780
821
|
// After caching, blocks will be read from cache (Sequential blocks)
|
|
781
822
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
|
|
782
823
|
iter->Seek(BuildKey(0));
|
|
824
|
+
ASSERT_TRUE(iter->Valid());
|
|
783
825
|
iter->Seek(BuildKey(1000));
|
|
826
|
+
ASSERT_TRUE(iter->Valid());
|
|
784
827
|
iter->Seek(BuildKey(1004)); // Prefetch data (not in cache).
|
|
828
|
+
ASSERT_TRUE(iter->Valid());
|
|
785
829
|
// Missed one sequential block but next is in already in buffer so readahead
|
|
786
830
|
// will not be reset.
|
|
787
831
|
iter->Seek(BuildKey(1011));
|
|
832
|
+
ASSERT_TRUE(iter->Valid());
|
|
788
833
|
// Prefetch data but blocks are in cache so no prefetch and reset.
|
|
789
834
|
iter->Seek(BuildKey(1015));
|
|
835
|
+
ASSERT_TRUE(iter->Valid());
|
|
790
836
|
iter->Seek(BuildKey(1019));
|
|
837
|
+
ASSERT_TRUE(iter->Valid());
|
|
791
838
|
iter->Seek(BuildKey(1022));
|
|
839
|
+
ASSERT_TRUE(iter->Valid());
|
|
792
840
|
// Prefetch data with readahead_size = 4 blocks.
|
|
793
841
|
iter->Seek(BuildKey(1026));
|
|
842
|
+
ASSERT_TRUE(iter->Valid());
|
|
794
843
|
iter->Seek(BuildKey(103));
|
|
844
|
+
ASSERT_TRUE(iter->Valid());
|
|
795
845
|
iter->Seek(BuildKey(1033));
|
|
846
|
+
ASSERT_TRUE(iter->Valid());
|
|
796
847
|
iter->Seek(BuildKey(1037));
|
|
848
|
+
ASSERT_TRUE(iter->Valid());
|
|
797
849
|
|
|
798
850
|
if (support_prefetch && !use_direct_io) {
|
|
799
851
|
ASSERT_EQ(fs->GetPrefetchCount(), 3);
|
|
@@ -881,7 +933,7 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhead) {
|
|
|
881
933
|
[&](void*) { buff_prefetch_count++; });
|
|
882
934
|
|
|
883
935
|
SyncPoint::GetInstance()->SetCallBack(
|
|
884
|
-
"FilePrefetchBuffer::
|
|
936
|
+
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
885
937
|
[&](void*) { buff_async_prefetch_count++; });
|
|
886
938
|
|
|
887
939
|
// The callback checks, since reads are sequential, readahead_size doesn't
|
|
@@ -955,7 +1007,7 @@ class PrefetchTest2 : public DBTestBase,
|
|
|
955
1007
|
INSTANTIATE_TEST_CASE_P(PrefetchTest2, PrefetchTest2, ::testing::Bool());
|
|
956
1008
|
|
|
957
1009
|
#ifndef ROCKSDB_LITE
|
|
958
|
-
TEST_P(PrefetchTest2,
|
|
1010
|
+
TEST_P(PrefetchTest2, NonSequentialReadsWithAdaptiveReadahead) {
|
|
959
1011
|
const int kNumKeys = 1000;
|
|
960
1012
|
// Set options
|
|
961
1013
|
std::shared_ptr<MockFS> fs =
|
|
@@ -1002,9 +1054,8 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
|
|
|
1002
1054
|
int set_readahead = 0;
|
|
1003
1055
|
size_t readahead_size = 0;
|
|
1004
1056
|
|
|
1005
|
-
SyncPoint::GetInstance()->SetCallBack(
|
|
1006
|
-
|
|
1007
|
-
[&](void*) { buff_prefetch_count++; });
|
|
1057
|
+
SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
|
|
1058
|
+
[&](void*) { buff_prefetch_count++; });
|
|
1008
1059
|
SyncPoint::GetInstance()->SetCallBack(
|
|
1009
1060
|
"BlockPrefetcher::SetReadaheadState",
|
|
1010
1061
|
[&](void* /*arg*/) { set_readahead++; });
|
|
@@ -1018,13 +1069,15 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
|
|
|
1018
1069
|
// Iterate until prefetch is done.
|
|
1019
1070
|
ReadOptions ro;
|
|
1020
1071
|
ro.adaptive_readahead = true;
|
|
1021
|
-
// TODO akanksha: Remove after adding new units.
|
|
1022
|
-
ro.async_io = true;
|
|
1023
1072
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
|
|
1073
|
+
|
|
1024
1074
|
iter->SeekToFirst();
|
|
1075
|
+
ASSERT_TRUE(iter->Valid());
|
|
1076
|
+
|
|
1025
1077
|
while (iter->Valid() && buff_prefetch_count == 0) {
|
|
1026
1078
|
iter->Next();
|
|
1027
1079
|
}
|
|
1080
|
+
|
|
1028
1081
|
ASSERT_EQ(readahead_size, 8 * 1024);
|
|
1029
1082
|
ASSERT_EQ(buff_prefetch_count, 1);
|
|
1030
1083
|
ASSERT_EQ(set_readahead, 0);
|
|
@@ -1033,9 +1086,12 @@ TEST_P(PrefetchTest2, NonSequentialReads) {
|
|
|
1033
1086
|
// Move to last file and check readahead size fallbacks to 8KB. So next
|
|
1034
1087
|
// readahead size after prefetch should be 8 * 1024;
|
|
1035
1088
|
iter->Seek(BuildKey(4004));
|
|
1089
|
+
ASSERT_TRUE(iter->Valid());
|
|
1090
|
+
|
|
1036
1091
|
while (iter->Valid() && buff_prefetch_count == 0) {
|
|
1037
1092
|
iter->Next();
|
|
1038
1093
|
}
|
|
1094
|
+
|
|
1039
1095
|
ASSERT_EQ(readahead_size, 8 * 1024);
|
|
1040
1096
|
ASSERT_EQ(set_readahead, 0);
|
|
1041
1097
|
ASSERT_EQ(buff_prefetch_count, 1);
|
|
@@ -1099,7 +1155,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
|
|
|
1099
1155
|
size_t decrease_readahead_size = 8 * 1024;
|
|
1100
1156
|
|
|
1101
1157
|
SyncPoint::GetInstance()->SetCallBack(
|
|
1102
|
-
"FilePrefetchBuffer::
|
|
1158
|
+
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
1103
1159
|
[&](void*) { buff_prefetch_count++; });
|
|
1104
1160
|
SyncPoint::GetInstance()->SetCallBack(
|
|
1105
1161
|
"FilePrefetchBuffer::TryReadFromCache", [&](void* arg) {
|
|
@@ -1120,8 +1176,11 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
|
|
|
1120
1176
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
|
|
1121
1177
|
// Warm up the cache
|
|
1122
1178
|
iter->Seek(BuildKey(1011));
|
|
1179
|
+
ASSERT_TRUE(iter->Valid());
|
|
1123
1180
|
iter->Seek(BuildKey(1015));
|
|
1181
|
+
ASSERT_TRUE(iter->Valid());
|
|
1124
1182
|
iter->Seek(BuildKey(1019));
|
|
1183
|
+
ASSERT_TRUE(iter->Valid());
|
|
1125
1184
|
buff_prefetch_count = 0;
|
|
1126
1185
|
}
|
|
1127
1186
|
|
|
@@ -1129,26 +1188,39 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
|
|
|
1129
1188
|
ASSERT_OK(options.statistics->Reset());
|
|
1130
1189
|
// After caching, blocks will be read from cache (Sequential blocks)
|
|
1131
1190
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
|
|
1132
|
-
iter->Seek(
|
|
1191
|
+
iter->Seek(
|
|
1192
|
+
BuildKey(0)); // In cache so it will decrease the readahead_size.
|
|
1133
1193
|
ASSERT_TRUE(iter->Valid());
|
|
1134
|
-
|
|
1194
|
+
expected_current_readahead_size = std::max(
|
|
1195
|
+
decrease_readahead_size,
|
|
1196
|
+
(expected_current_readahead_size >= decrease_readahead_size
|
|
1197
|
+
? (expected_current_readahead_size - decrease_readahead_size)
|
|
1198
|
+
: 0));
|
|
1199
|
+
|
|
1200
|
+
iter->Seek(BuildKey(1000)); // Prefetch the block.
|
|
1135
1201
|
ASSERT_TRUE(iter->Valid());
|
|
1136
|
-
|
|
1202
|
+
ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
|
|
1203
|
+
expected_current_readahead_size *= 2;
|
|
1204
|
+
|
|
1205
|
+
iter->Seek(BuildKey(1004)); // Prefetch the block.
|
|
1137
1206
|
ASSERT_TRUE(iter->Valid());
|
|
1138
1207
|
ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
|
|
1208
|
+
expected_current_readahead_size *= 2;
|
|
1139
1209
|
|
|
1140
|
-
//
|
|
1141
|
-
// readahead will not be reset.
|
|
1210
|
+
// 1011 is already in cache but won't reset??
|
|
1142
1211
|
iter->Seek(BuildKey(1011));
|
|
1143
1212
|
ASSERT_TRUE(iter->Valid());
|
|
1144
|
-
ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
|
|
1145
1213
|
|
|
1146
1214
|
// Eligible to Prefetch data (not in buffer) but block is in cache so no
|
|
1147
1215
|
// prefetch will happen and will result in decrease in readahead_size.
|
|
1148
1216
|
// readahead_size will be 8 * 1024
|
|
1149
1217
|
iter->Seek(BuildKey(1015));
|
|
1150
1218
|
ASSERT_TRUE(iter->Valid());
|
|
1151
|
-
expected_current_readahead_size
|
|
1219
|
+
expected_current_readahead_size = std::max(
|
|
1220
|
+
decrease_readahead_size,
|
|
1221
|
+
(expected_current_readahead_size >= decrease_readahead_size
|
|
1222
|
+
? (expected_current_readahead_size - decrease_readahead_size)
|
|
1223
|
+
: 0));
|
|
1152
1224
|
|
|
1153
1225
|
// 1016 is the same block as 1015. So no change in readahead_size.
|
|
1154
1226
|
iter->Seek(BuildKey(1016));
|
|
@@ -1169,7 +1241,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
|
|
|
1169
1241
|
iter->Seek(BuildKey(1022));
|
|
1170
1242
|
ASSERT_TRUE(iter->Valid());
|
|
1171
1243
|
ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
|
|
1172
|
-
ASSERT_EQ(buff_prefetch_count,
|
|
1244
|
+
ASSERT_EQ(buff_prefetch_count, 3);
|
|
1173
1245
|
|
|
1174
1246
|
// Check stats to make sure async prefetch is done.
|
|
1175
1247
|
{
|
|
@@ -1179,6 +1251,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadIfInCache) {
|
|
|
1179
1251
|
ASSERT_EQ(async_read_bytes.count, 0);
|
|
1180
1252
|
} else {
|
|
1181
1253
|
ASSERT_GT(async_read_bytes.count, 0);
|
|
1254
|
+
ASSERT_GT(get_perf_context()->number_async_seek, 0);
|
|
1182
1255
|
}
|
|
1183
1256
|
}
|
|
1184
1257
|
|
|
@@ -1264,7 +1337,7 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
|
|
|
1264
1337
|
}
|
|
1265
1338
|
|
|
1266
1339
|
SyncPoint::GetInstance()->SetCallBack(
|
|
1267
|
-
"FilePrefetchBuffer::
|
|
1340
|
+
"FilePrefetchBuffer::PrefetchAsyncInternal:Start",
|
|
1268
1341
|
[&](void*) { buff_prefetch_count++; });
|
|
1269
1342
|
|
|
1270
1343
|
SyncPoint::GetInstance()->SetCallBack(
|
|
@@ -1275,12 +1348,15 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
|
|
|
1275
1348
|
// Read the keys.
|
|
1276
1349
|
{
|
|
1277
1350
|
ASSERT_OK(options.statistics->Reset());
|
|
1351
|
+
get_perf_context()->Reset();
|
|
1352
|
+
|
|
1278
1353
|
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
|
|
1279
1354
|
int num_keys = 0;
|
|
1280
1355
|
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
1281
1356
|
ASSERT_OK(iter->status());
|
|
1282
1357
|
num_keys++;
|
|
1283
1358
|
}
|
|
1359
|
+
|
|
1284
1360
|
ASSERT_EQ(num_keys, total_keys);
|
|
1285
1361
|
ASSERT_GT(buff_prefetch_count, 0);
|
|
1286
1362
|
|
|
@@ -1301,6 +1377,55 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncWithPosixFS) {
|
|
|
1301
1377
|
}
|
|
1302
1378
|
ASSERT_GT(prefetched_bytes_discarded.count, 0);
|
|
1303
1379
|
}
|
|
1380
|
+
ASSERT_EQ(get_perf_context()->number_async_seek, 0);
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
{
|
|
1384
|
+
// Read the keys using seek.
|
|
1385
|
+
{
|
|
1386
|
+
ASSERT_OK(options.statistics->Reset());
|
|
1387
|
+
get_perf_context()->Reset();
|
|
1388
|
+
|
|
1389
|
+
auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
|
|
1390
|
+
int num_keys = 0;
|
|
1391
|
+
iter->Seek(BuildKey(450));
|
|
1392
|
+
while (iter->Valid()) {
|
|
1393
|
+
ASSERT_OK(iter->status());
|
|
1394
|
+
num_keys++;
|
|
1395
|
+
iter->Next();
|
|
1396
|
+
}
|
|
1397
|
+
ASSERT_OK(iter->status());
|
|
1398
|
+
|
|
1399
|
+
iter->Seek(BuildKey(450));
|
|
1400
|
+
while (iter->Valid()) {
|
|
1401
|
+
ASSERT_OK(iter->status());
|
|
1402
|
+
num_keys++;
|
|
1403
|
+
iter->Prev();
|
|
1404
|
+
}
|
|
1405
|
+
|
|
1406
|
+
ASSERT_EQ(num_keys, total_keys + 1);
|
|
1407
|
+
ASSERT_GT(buff_prefetch_count, 0);
|
|
1408
|
+
|
|
1409
|
+
// Check stats to make sure async prefetch is done.
|
|
1410
|
+
{
|
|
1411
|
+
HistogramData async_read_bytes;
|
|
1412
|
+
options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
|
|
1413
|
+
HistogramData prefetched_bytes_discarded;
|
|
1414
|
+
options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED,
|
|
1415
|
+
&prefetched_bytes_discarded);
|
|
1416
|
+
|
|
1417
|
+
// Not all platforms support iouring. In that case, ReadAsync in posix
|
|
1418
|
+
// won't submit async requests.
|
|
1419
|
+
if (read_async_called) {
|
|
1420
|
+
ASSERT_GT(async_read_bytes.count, 0);
|
|
1421
|
+
ASSERT_GT(get_perf_context()->number_async_seek, 0);
|
|
1422
|
+
} else {
|
|
1423
|
+
ASSERT_EQ(async_read_bytes.count, 0);
|
|
1424
|
+
ASSERT_EQ(get_perf_context()->number_async_seek, 0);
|
|
1425
|
+
}
|
|
1426
|
+
ASSERT_GT(prefetched_bytes_discarded.count, 0);
|
|
1427
|
+
}
|
|
1428
|
+
}
|
|
1304
1429
|
}
|
|
1305
1430
|
|
|
1306
1431
|
SyncPoint::GetInstance()->DisableProcessing();
|
|
@@ -55,9 +55,9 @@ IOStatus WritableFileWriter::Append(const Slice& data, uint32_t crc32c_checksum,
|
|
|
55
55
|
|
|
56
56
|
{
|
|
57
57
|
IOOptions io_options;
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
58
|
+
io_options.rate_limiter_priority =
|
|
59
|
+
WritableFileWriter::DecideRateLimiterPriority(
|
|
60
|
+
writable_file_->GetIOPriority(), op_rate_limiter_priority);
|
|
61
61
|
IOSTATS_TIMER_GUARD(prepare_write_nanos);
|
|
62
62
|
TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
|
|
63
63
|
writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
|
|
@@ -338,9 +338,9 @@ IOStatus WritableFileWriter::Flush(Env::IOPriority op_rate_limiter_priority) {
|
|
|
338
338
|
}
|
|
339
339
|
#endif
|
|
340
340
|
IOOptions io_options;
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
341
|
+
io_options.rate_limiter_priority =
|
|
342
|
+
WritableFileWriter::DecideRateLimiterPriority(
|
|
343
|
+
writable_file_->GetIOPriority(), op_rate_limiter_priority);
|
|
344
344
|
s = writable_file_->Flush(io_options, nullptr);
|
|
345
345
|
#ifndef ROCKSDB_LITE
|
|
346
346
|
if (ShouldNotifyListeners()) {
|
|
@@ -507,11 +507,11 @@ IOStatus WritableFileWriter::WriteBuffered(
|
|
|
507
507
|
size_t left = size;
|
|
508
508
|
DataVerificationInfo v_info;
|
|
509
509
|
char checksum_buf[sizeof(uint32_t)];
|
|
510
|
-
IOOptions io_options;
|
|
511
510
|
Env::IOPriority rate_limiter_priority_used =
|
|
512
511
|
WritableFileWriter::DecideRateLimiterPriority(
|
|
513
|
-
writable_file_->GetIOPriority(), op_rate_limiter_priority
|
|
514
|
-
|
|
512
|
+
writable_file_->GetIOPriority(), op_rate_limiter_priority);
|
|
513
|
+
IOOptions io_options;
|
|
514
|
+
io_options.rate_limiter_priority = rate_limiter_priority_used;
|
|
515
515
|
|
|
516
516
|
while (left > 0) {
|
|
517
517
|
size_t allowed = left;
|
|
@@ -596,11 +596,11 @@ IOStatus WritableFileWriter::WriteBufferedWithChecksum(
|
|
|
596
596
|
size_t left = size;
|
|
597
597
|
DataVerificationInfo v_info;
|
|
598
598
|
char checksum_buf[sizeof(uint32_t)];
|
|
599
|
-
IOOptions io_options;
|
|
600
599
|
Env::IOPriority rate_limiter_priority_used =
|
|
601
600
|
WritableFileWriter::DecideRateLimiterPriority(
|
|
602
|
-
writable_file_->GetIOPriority(), op_rate_limiter_priority
|
|
603
|
-
|
|
601
|
+
writable_file_->GetIOPriority(), op_rate_limiter_priority);
|
|
602
|
+
IOOptions io_options;
|
|
603
|
+
io_options.rate_limiter_priority = rate_limiter_priority_used;
|
|
604
604
|
// Check how much is allowed. Here, we loop until the rate limiter allows to
|
|
605
605
|
// write the entire buffer.
|
|
606
606
|
// TODO: need to be improved since it sort of defeats the purpose of the rate
|
|
@@ -726,11 +726,11 @@ IOStatus WritableFileWriter::WriteDirect(
|
|
|
726
726
|
size_t left = buf_.CurrentSize();
|
|
727
727
|
DataVerificationInfo v_info;
|
|
728
728
|
char checksum_buf[sizeof(uint32_t)];
|
|
729
|
-
IOOptions io_options;
|
|
730
729
|
Env::IOPriority rate_limiter_priority_used =
|
|
731
730
|
WritableFileWriter::DecideRateLimiterPriority(
|
|
732
|
-
writable_file_->GetIOPriority(), op_rate_limiter_priority
|
|
733
|
-
|
|
731
|
+
writable_file_->GetIOPriority(), op_rate_limiter_priority);
|
|
732
|
+
IOOptions io_options;
|
|
733
|
+
io_options.rate_limiter_priority = rate_limiter_priority_used;
|
|
734
734
|
|
|
735
735
|
while (left > 0) {
|
|
736
736
|
// Check how much is allowed
|
|
@@ -827,11 +827,11 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
|
|
|
827
827
|
DataVerificationInfo v_info;
|
|
828
828
|
char checksum_buf[sizeof(uint32_t)];
|
|
829
829
|
|
|
830
|
-
IOOptions io_options;
|
|
831
830
|
Env::IOPriority rate_limiter_priority_used =
|
|
832
831
|
WritableFileWriter::DecideRateLimiterPriority(
|
|
833
|
-
writable_file_->GetIOPriority(), op_rate_limiter_priority
|
|
834
|
-
|
|
832
|
+
writable_file_->GetIOPriority(), op_rate_limiter_priority);
|
|
833
|
+
IOOptions io_options;
|
|
834
|
+
io_options.rate_limiter_priority = rate_limiter_priority_used;
|
|
835
835
|
// Check how much is allowed. Here, we loop until the rate limiter allows to
|
|
836
836
|
// write the entire buffer.
|
|
837
837
|
// TODO: need to be improved since it sort of defeats the purpose of the rate
|
|
@@ -901,21 +901,17 @@ IOStatus WritableFileWriter::WriteDirectWithChecksum(
|
|
|
901
901
|
#endif // !ROCKSDB_LITE
|
|
902
902
|
Env::IOPriority WritableFileWriter::DecideRateLimiterPriority(
|
|
903
903
|
Env::IOPriority writable_file_io_priority,
|
|
904
|
-
Env::IOPriority op_rate_limiter_priority
|
|
905
|
-
Env::IOPriority& iooptions_io_priority) {
|
|
906
|
-
Env::IOPriority rate_limiter_priority{Env::IO_TOTAL};
|
|
904
|
+
Env::IOPriority op_rate_limiter_priority) {
|
|
907
905
|
if (writable_file_io_priority == Env::IO_TOTAL &&
|
|
908
906
|
op_rate_limiter_priority == Env::IO_TOTAL) {
|
|
909
|
-
|
|
907
|
+
return Env::IO_TOTAL;
|
|
910
908
|
} else if (writable_file_io_priority == Env::IO_TOTAL) {
|
|
911
|
-
|
|
909
|
+
return op_rate_limiter_priority;
|
|
912
910
|
} else if (op_rate_limiter_priority == Env::IO_TOTAL) {
|
|
913
|
-
|
|
911
|
+
return writable_file_io_priority;
|
|
914
912
|
} else {
|
|
915
|
-
|
|
913
|
+
return op_rate_limiter_priority;
|
|
916
914
|
}
|
|
917
|
-
iooptions_io_priority = rate_limiter_priority;
|
|
918
|
-
return rate_limiter_priority;
|
|
919
915
|
}
|
|
920
916
|
|
|
921
917
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -277,11 +277,10 @@ class WritableFileWriter {
|
|
|
277
277
|
const char* GetFileChecksumFuncName() const;
|
|
278
278
|
|
|
279
279
|
private:
|
|
280
|
-
// Decide the Rate Limiter priority
|
|
280
|
+
// Decide the Rate Limiter priority.
|
|
281
281
|
static Env::IOPriority DecideRateLimiterPriority(
|
|
282
282
|
Env::IOPriority writable_file_io_priority,
|
|
283
|
-
Env::IOPriority op_rate_limiter_priority
|
|
284
|
-
Env::IOPriority& iooptions_io_priority);
|
|
283
|
+
Env::IOPriority op_rate_limiter_priority);
|
|
285
284
|
|
|
286
285
|
// Used when os buffering is OFF and we are writing
|
|
287
286
|
// DMA such as in Direct I/O mode
|
|
@@ -100,8 +100,9 @@ struct CompressionOptions {
|
|
|
100
100
|
//
|
|
101
101
|
// The dictionary is created by sampling the SST file data. If
|
|
102
102
|
// `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
|
|
103
|
-
// dictionary generator
|
|
104
|
-
//
|
|
103
|
+
// dictionary generator (see comments for option `use_zstd_dict_trainer` for
|
|
104
|
+
// detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
|
|
105
|
+
// random samples are used directly as the dictionary.
|
|
105
106
|
//
|
|
106
107
|
// When compression dictionary is disabled, we compress and write each block
|
|
107
108
|
// before buffering data for the next one. When compression dictionary is
|
|
@@ -173,6 +174,20 @@ struct CompressionOptions {
|
|
|
173
174
|
// Default: 0 (unlimited)
|
|
174
175
|
uint64_t max_dict_buffer_bytes;
|
|
175
176
|
|
|
177
|
+
// Use zstd trainer to generate dictionaries. When this option is set to true,
|
|
178
|
+
// zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
|
|
179
|
+
// buffered data will be passed to zstd dictionary trainer to generate a
|
|
180
|
+
// dictionary of size max_dict_bytes.
|
|
181
|
+
//
|
|
182
|
+
// When this option is false, zstd's API ZDICT_finalizeDictionary() will be
|
|
183
|
+
// called to generate dictionaries. zstd_max_train_bytes of training sampled
|
|
184
|
+
// data will be passed to this API. Using this API should save CPU time on
|
|
185
|
+
// dictionary training, but the compression ratio may not be as good as using
|
|
186
|
+
// a dictionary trainer.
|
|
187
|
+
//
|
|
188
|
+
// Default: true
|
|
189
|
+
bool use_zstd_dict_trainer;
|
|
190
|
+
|
|
176
191
|
CompressionOptions()
|
|
177
192
|
: window_bits(-14),
|
|
178
193
|
level(kDefaultCompressionLevel),
|
|
@@ -181,11 +196,13 @@ struct CompressionOptions {
|
|
|
181
196
|
zstd_max_train_bytes(0),
|
|
182
197
|
parallel_threads(1),
|
|
183
198
|
enabled(false),
|
|
184
|
-
max_dict_buffer_bytes(0)
|
|
199
|
+
max_dict_buffer_bytes(0),
|
|
200
|
+
use_zstd_dict_trainer(true) {}
|
|
185
201
|
CompressionOptions(int wbits, int _lev, int _strategy,
|
|
186
202
|
uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
|
|
187
203
|
uint32_t _parallel_threads, bool _enabled,
|
|
188
|
-
uint64_t _max_dict_buffer_bytes
|
|
204
|
+
uint64_t _max_dict_buffer_bytes,
|
|
205
|
+
bool _use_zstd_dict_trainer)
|
|
189
206
|
: window_bits(wbits),
|
|
190
207
|
level(_lev),
|
|
191
208
|
strategy(_strategy),
|
|
@@ -193,7 +210,8 @@ struct CompressionOptions {
|
|
|
193
210
|
zstd_max_train_bytes(_zstd_max_train_bytes),
|
|
194
211
|
parallel_threads(_parallel_threads),
|
|
195
212
|
enabled(_enabled),
|
|
196
|
-
max_dict_buffer_bytes(_max_dict_buffer_bytes)
|
|
213
|
+
max_dict_buffer_bytes(_max_dict_buffer_bytes),
|
|
214
|
+
use_zstd_dict_trainer(_use_zstd_dict_trainer) {}
|
|
197
215
|
};
|
|
198
216
|
|
|
199
217
|
// Temperature of a file. Used to pass to FileSystem for a different
|