@nxtedition/rocksdb 7.1.14 → 7.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +1 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
- package/deps/rocksdb/rocksdb/Makefile +91 -11
- package/deps/rocksdb/rocksdb/TARGETS +8 -4
- package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
- package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
- package/deps/rocksdb/rocksdb/db/c.cc +29 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
- package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
- package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
- package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
- package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
- package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
- package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
- package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
- package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
- package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
- package/deps/rocksdb/rocksdb/file/filename.h +4 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options.cc +2 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +26 -29
- package/deps/rocksdb/rocksdb/table/format.h +44 -26
- package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
- package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
- package/deps/rocksdb/rocksdb/util/timer.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
- package/deps/rocksdb/rocksdb.gyp +0 -1
- package/index.js +6 -10
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#include "file/file_prefetch_buffer.h"
|
|
11
11
|
|
|
12
12
|
#include <algorithm>
|
|
13
|
+
#include <cassert>
|
|
13
14
|
|
|
14
15
|
#include "file/random_access_file_reader.h"
|
|
15
16
|
#include "monitoring/histogram.h"
|
|
@@ -23,8 +24,8 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
23
24
|
|
|
24
25
|
void FilePrefetchBuffer::CalculateOffsetAndLen(size_t alignment,
|
|
25
26
|
uint64_t offset,
|
|
26
|
-
size_t roundup_len,
|
|
27
|
-
bool refit_tail,
|
|
27
|
+
size_t roundup_len,
|
|
28
|
+
uint32_t index, bool refit_tail,
|
|
28
29
|
uint64_t& chunk_len) {
|
|
29
30
|
uint64_t chunk_offset_in_buffer = 0;
|
|
30
31
|
bool copy_data_to_new_buffer = false;
|
|
@@ -32,9 +33,7 @@ void FilePrefetchBuffer::CalculateOffsetAndLen(size_t alignment,
|
|
|
32
33
|
// If only a few bytes exist -- reuse them & read only what is really needed.
|
|
33
34
|
// This is typically the case of incremental reading of data.
|
|
34
35
|
// If no bytes exist in buffer -- full pread.
|
|
35
|
-
if (
|
|
36
|
-
offset >= bufs_[index].offset_ &&
|
|
37
|
-
offset <= bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize()) {
|
|
36
|
+
if (DoesBufferContainData(index) && IsOffsetInBuffer(offset, index)) {
|
|
38
37
|
// Only a few requested bytes are in the buffer. memmove those chunk of
|
|
39
38
|
// bytes to the beginning, and memcpy them back into the new buffer if a
|
|
40
39
|
// new buffer is created.
|
|
@@ -43,7 +42,7 @@ void FilePrefetchBuffer::CalculateOffsetAndLen(size_t alignment,
|
|
|
43
42
|
chunk_len = static_cast<uint64_t>(bufs_[index].buffer_.CurrentSize()) -
|
|
44
43
|
chunk_offset_in_buffer;
|
|
45
44
|
assert(chunk_offset_in_buffer % alignment == 0);
|
|
46
|
-
|
|
45
|
+
assert(chunk_len % alignment == 0);
|
|
47
46
|
assert(chunk_offset_in_buffer + chunk_len <=
|
|
48
47
|
bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
|
|
49
48
|
if (chunk_len > 0) {
|
|
@@ -108,7 +107,7 @@ Status FilePrefetchBuffer::Read(const IOOptions& opts,
|
|
|
108
107
|
|
|
109
108
|
Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts,
|
|
110
109
|
RandomAccessFileReader* reader,
|
|
111
|
-
uint64_t read_len,
|
|
110
|
+
uint64_t read_len,
|
|
112
111
|
uint64_t rounddown_start, uint32_t index) {
|
|
113
112
|
// callback for async read request.
|
|
114
113
|
auto fp = std::bind(&FilePrefetchBuffer::PrefetchAsyncCallback, this,
|
|
@@ -116,15 +115,18 @@ Status FilePrefetchBuffer::ReadAsync(const IOOptions& opts,
|
|
|
116
115
|
FSReadRequest req;
|
|
117
116
|
Slice result;
|
|
118
117
|
req.len = read_len;
|
|
119
|
-
req.offset = rounddown_start
|
|
118
|
+
req.offset = rounddown_start;
|
|
120
119
|
req.result = result;
|
|
121
|
-
req.scratch = bufs_[index].buffer_.BufferStart()
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
120
|
+
req.scratch = bufs_[index].buffer_.BufferStart();
|
|
121
|
+
bufs_[index].async_req_len_ = req.len;
|
|
122
|
+
|
|
123
|
+
Status s =
|
|
124
|
+
reader->ReadAsync(req, opts, fp, &(bufs_[index].pos_),
|
|
125
|
+
&(bufs_[index].io_handle_), &(bufs_[index].del_fn_),
|
|
126
|
+
/*aligned_buf=*/nullptr);
|
|
125
127
|
req.status.PermitUncheckedError();
|
|
126
128
|
if (s.ok()) {
|
|
127
|
-
async_read_in_progress_ = true;
|
|
129
|
+
bufs_[index].async_read_in_progress_ = true;
|
|
128
130
|
}
|
|
129
131
|
return s;
|
|
130
132
|
}
|
|
@@ -170,8 +172,7 @@ void FilePrefetchBuffer::CopyDataToBuffer(uint32_t src, uint64_t& offset,
|
|
|
170
172
|
}
|
|
171
173
|
uint64_t copy_offset = (offset - bufs_[src].offset_);
|
|
172
174
|
size_t copy_len = 0;
|
|
173
|
-
if (offset
|
|
174
|
-
bufs_[src].offset_ + bufs_[src].buffer_.CurrentSize()) {
|
|
175
|
+
if (IsDataBlockInBuffer(offset, length, src)) {
|
|
175
176
|
// All the bytes are in src.
|
|
176
177
|
copy_len = length;
|
|
177
178
|
} else {
|
|
@@ -194,65 +195,121 @@ void FilePrefetchBuffer::CopyDataToBuffer(uint32_t src, uint64_t& offset,
|
|
|
194
195
|
}
|
|
195
196
|
}
|
|
196
197
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
// buffer and FilePrefetchBuffer should wait for Poll before accessing the
|
|
202
|
-
// second buffer.
|
|
203
|
-
std::vector<void*> handles;
|
|
204
|
-
handles.emplace_back(io_handle_);
|
|
205
|
-
StopWatch sw(clock_, stats_, POLL_WAIT_MICROS);
|
|
206
|
-
fs_->Poll(handles, 1).PermitUncheckedError();
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
// Reset and Release io_handle_ after the Poll API as request has been
|
|
210
|
-
// completed.
|
|
211
|
-
async_read_in_progress_ = false;
|
|
212
|
-
if (io_handle_ != nullptr && del_fn_ != nullptr) {
|
|
213
|
-
del_fn_(io_handle_);
|
|
214
|
-
io_handle_ = nullptr;
|
|
215
|
-
del_fn_ = nullptr;
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
// Index of second buffer.
|
|
198
|
+
// Clear the buffers if it contains outdated data. Outdated data can be
|
|
199
|
+
// because previous sequential reads were read from the cache instead of these
|
|
200
|
+
// buffer. In that case outdated IOs should be aborted.
|
|
201
|
+
void FilePrefetchBuffer::AbortIOIfNeeded(uint64_t offset) {
|
|
219
202
|
uint32_t second = curr_ ^ 1;
|
|
203
|
+
std::vector<void*> handles;
|
|
204
|
+
autovector<uint32_t> buf_pos;
|
|
205
|
+
if (IsBufferOutdatedWithAsyncProgress(offset, curr_)) {
|
|
206
|
+
handles.emplace_back(bufs_[curr_].io_handle_);
|
|
207
|
+
buf_pos.emplace_back(curr_);
|
|
208
|
+
}
|
|
209
|
+
if (IsBufferOutdatedWithAsyncProgress(offset, second)) {
|
|
210
|
+
handles.emplace_back(bufs_[second].io_handle_);
|
|
211
|
+
buf_pos.emplace_back(second);
|
|
212
|
+
}
|
|
213
|
+
if (!handles.empty()) {
|
|
214
|
+
StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
|
|
215
|
+
Status s = fs_->AbortIO(handles);
|
|
216
|
+
assert(s.ok());
|
|
217
|
+
}
|
|
220
218
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
bufs_[
|
|
219
|
+
for (auto& pos : buf_pos) {
|
|
220
|
+
// Release io_handle.
|
|
221
|
+
DestroyAndClearIOHandle(pos);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (bufs_[second].io_handle_ == nullptr) {
|
|
225
|
+
bufs_[second].async_read_in_progress_ = false;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
if (bufs_[curr_].io_handle_ == nullptr &&
|
|
229
|
+
bufs_[curr_].async_read_in_progress_) {
|
|
230
|
+
bufs_[curr_].async_read_in_progress_ = false;
|
|
231
|
+
curr_ = curr_ ^ 1;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
void FilePrefetchBuffer::AbortAllIOs() {
|
|
236
|
+
uint32_t second = curr_ ^ 1;
|
|
237
|
+
std::vector<void*> handles;
|
|
238
|
+
for (uint32_t i = 0; i < 2; i++) {
|
|
239
|
+
if (bufs_[i].async_read_in_progress_ && bufs_[i].io_handle_ != nullptr) {
|
|
240
|
+
handles.emplace_back(bufs_[i].io_handle_);
|
|
232
241
|
}
|
|
233
242
|
}
|
|
243
|
+
if (!handles.empty()) {
|
|
244
|
+
StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
|
|
245
|
+
Status s = fs_->AbortIO(handles);
|
|
246
|
+
assert(s.ok());
|
|
247
|
+
}
|
|
234
248
|
|
|
235
|
-
//
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
249
|
+
// Release io_handles.
|
|
250
|
+
if (bufs_[curr_].io_handle_ != nullptr && bufs_[curr_].del_fn_ != nullptr) {
|
|
251
|
+
DestroyAndClearIOHandle(curr_);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
if (bufs_[second].io_handle_ != nullptr && bufs_[second].del_fn_ != nullptr) {
|
|
255
|
+
DestroyAndClearIOHandle(second);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Clear the buffers if it contains outdated data. Outdated data can be
|
|
260
|
+
// because previous sequential reads were read from the cache instead of these
|
|
261
|
+
// buffer.
|
|
262
|
+
void FilePrefetchBuffer::UpdateBuffersIfNeeded(uint64_t offset) {
|
|
263
|
+
uint32_t second = curr_ ^ 1;
|
|
264
|
+
if (IsBufferOutdated(offset, curr_)) {
|
|
265
|
+
bufs_[curr_].buffer_.Clear();
|
|
266
|
+
}
|
|
267
|
+
if (IsBufferOutdated(offset, second)) {
|
|
268
|
+
bufs_[second].buffer_.Clear();
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// If data starts from second buffer, make it curr_. Second buffer can be
|
|
272
|
+
// either partial filled or full.
|
|
273
|
+
if (!bufs_[second].async_read_in_progress_ && DoesBufferContainData(second) &&
|
|
274
|
+
IsOffsetInBuffer(offset, second)) {
|
|
240
275
|
// Clear the curr_ as buffers have been swapped and curr_ contains the
|
|
241
276
|
// outdated data and switch the buffers.
|
|
242
|
-
bufs_[curr_].
|
|
277
|
+
if (!bufs_[curr_].async_read_in_progress_) {
|
|
278
|
+
bufs_[curr_].buffer_.Clear();
|
|
279
|
+
}
|
|
243
280
|
curr_ = curr_ ^ 1;
|
|
244
281
|
}
|
|
245
282
|
}
|
|
246
283
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
//
|
|
284
|
+
void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) {
|
|
285
|
+
if (bufs_[curr_].async_read_in_progress_ && fs_ != nullptr) {
|
|
286
|
+
if (bufs_[curr_].io_handle_ != nullptr) {
|
|
287
|
+
// Wait for prefetch data to complete.
|
|
288
|
+
// No mutex is needed as async_read_in_progress behaves as mutex and is
|
|
289
|
+
// updated by main thread only.
|
|
290
|
+
std::vector<void*> handles;
|
|
291
|
+
handles.emplace_back(bufs_[curr_].io_handle_);
|
|
292
|
+
StopWatch sw(clock_, stats_, POLL_WAIT_MICROS);
|
|
293
|
+
fs_->Poll(handles, 1).PermitUncheckedError();
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Reset and Release io_handle after the Poll API as request has been
|
|
297
|
+
// completed.
|
|
298
|
+
DestroyAndClearIOHandle(curr_);
|
|
299
|
+
}
|
|
300
|
+
UpdateBuffersIfNeeded(offset);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// If async_io is enabled in case of sequential reads, PrefetchAsyncInternal is
|
|
304
|
+
// called. When buffers are switched, we clear the curr_ buffer as we assume the
|
|
305
|
+
// data has been consumed because of sequential reads.
|
|
306
|
+
// Data in buffers will always be sequential with curr_ following second and
|
|
307
|
+
// not vice versa.
|
|
251
308
|
//
|
|
252
309
|
// Scenarios for prefetching asynchronously:
|
|
253
|
-
// Case1: If both buffers are empty, prefetch n bytes
|
|
254
|
-
// synchronously in curr_
|
|
255
|
-
//
|
|
310
|
+
// Case1: If both buffers are empty, prefetch n + readahead_size_/2 bytes
|
|
311
|
+
// synchronously in curr_ and prefetch readahead_size_/2 async in second
|
|
312
|
+
// buffer.
|
|
256
313
|
// Case2: If second buffer has partial or full data, make it current and
|
|
257
314
|
// prefetch readahead_size_/2 async in second buffer. In case of
|
|
258
315
|
// partial data, prefetch remaining bytes from size n synchronously to
|
|
@@ -260,9 +317,10 @@ void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) {
|
|
|
260
317
|
// Case3: If curr_ has partial data, prefetch remaining bytes from size n
|
|
261
318
|
// synchronously in curr_ to fulfill the requested bytes request and
|
|
262
319
|
// prefetch readahead_size_/2 bytes async in second buffer.
|
|
263
|
-
// Case4: If data is in both buffers, copy requested data from
|
|
264
|
-
//
|
|
265
|
-
//
|
|
320
|
+
// Case4: (Special case) If data is in both buffers, copy requested data from
|
|
321
|
+
// curr_, send async request on curr_, wait for poll to fill second
|
|
322
|
+
// buffer (if any), and copy remaining data from second buffer to third
|
|
323
|
+
// buffer.
|
|
266
324
|
Status FilePrefetchBuffer::PrefetchAsyncInternal(
|
|
267
325
|
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
|
|
268
326
|
size_t length, size_t readahead_size, Env::IOPriority rate_limiter_priority,
|
|
@@ -273,39 +331,30 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(
|
|
|
273
331
|
|
|
274
332
|
TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsyncInternal:Start");
|
|
275
333
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
|
|
281
|
-
offset + length <=
|
|
282
|
-
bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
|
|
283
|
-
offset += length;
|
|
284
|
-
length = 0;
|
|
334
|
+
size_t alignment = reader->file()->GetRequiredBufferAlignment();
|
|
335
|
+
Status s;
|
|
336
|
+
uint64_t tmp_offset = offset;
|
|
337
|
+
size_t tmp_length = length;
|
|
285
338
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
if (
|
|
290
|
-
|
|
339
|
+
// 1. Abort IO and swap buffers if needed to point curr_ to first buffer with
|
|
340
|
+
// data.
|
|
341
|
+
{
|
|
342
|
+
if (!explicit_prefetch_submitted_) {
|
|
343
|
+
AbortIOIfNeeded(offset);
|
|
291
344
|
}
|
|
345
|
+
UpdateBuffersIfNeeded(offset);
|
|
292
346
|
}
|
|
293
|
-
|
|
294
|
-
async_request_submitted_ = false;
|
|
295
|
-
|
|
296
|
-
Status s;
|
|
297
|
-
size_t prefetch_size = length + readahead_size;
|
|
298
|
-
size_t alignment = reader->file()->GetRequiredBufferAlignment();
|
|
299
|
-
// Index of second buffer.
|
|
300
347
|
uint32_t second = curr_ ^ 1;
|
|
301
348
|
|
|
302
|
-
//
|
|
303
|
-
//
|
|
304
|
-
if (bufs_[curr_].
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
349
|
+
// 2. If data is overlapping over two buffers, copy the data from curr_ and
|
|
350
|
+
// call ReadAsync on curr_.
|
|
351
|
+
if (!bufs_[curr_].async_read_in_progress_ && DoesBufferContainData(curr_) &&
|
|
352
|
+
IsOffsetInBuffer(offset, curr_) &&
|
|
353
|
+
(/*Data extends over curr_ buffer and second buffer either has data or in
|
|
354
|
+
process of population=*/
|
|
355
|
+
(offset + length > bufs_[second].offset_) &&
|
|
356
|
+
(bufs_[second].async_read_in_progress_ ||
|
|
357
|
+
DoesBufferContainData(second)))) {
|
|
309
358
|
// Allocate new buffer to third buffer;
|
|
310
359
|
bufs_[2].buffer_.Clear();
|
|
311
360
|
bufs_[2].buffer_.Alignment(alignment);
|
|
@@ -313,25 +362,92 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(
|
|
|
313
362
|
bufs_[2].offset_ = offset;
|
|
314
363
|
copy_to_third_buffer = true;
|
|
315
364
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
365
|
+
CopyDataToBuffer(curr_, tmp_offset, tmp_length);
|
|
366
|
+
|
|
367
|
+
// Call async prefetching on curr_ since data has been consumed in curr_
|
|
368
|
+
// only if data lies within second buffer.
|
|
369
|
+
size_t second_size = bufs_[second].async_read_in_progress_
|
|
370
|
+
? bufs_[second].async_req_len_
|
|
371
|
+
: bufs_[second].buffer_.CurrentSize();
|
|
372
|
+
if (tmp_offset + tmp_length <= bufs_[second].offset_ + second_size) {
|
|
373
|
+
uint64_t rounddown_start = bufs_[second].offset_ + second_size;
|
|
374
|
+
uint64_t roundup_end =
|
|
375
|
+
Roundup(rounddown_start + readahead_size, alignment);
|
|
376
|
+
uint64_t roundup_len = roundup_end - rounddown_start;
|
|
377
|
+
uint64_t chunk_len = 0;
|
|
378
|
+
CalculateOffsetAndLen(alignment, rounddown_start, roundup_len, curr_,
|
|
379
|
+
false, chunk_len);
|
|
380
|
+
assert(chunk_len == 0);
|
|
381
|
+
assert(roundup_len >= chunk_len);
|
|
382
|
+
|
|
383
|
+
bufs_[curr_].offset_ = rounddown_start;
|
|
384
|
+
uint64_t read_len = static_cast<size_t>(roundup_len - chunk_len);
|
|
385
|
+
s = ReadAsync(opts, reader, read_len, rounddown_start, curr_);
|
|
386
|
+
if (!s.ok()) {
|
|
387
|
+
DestroyAndClearIOHandle(curr_);
|
|
388
|
+
bufs_[curr_].buffer_.Clear();
|
|
389
|
+
return s;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
curr_ = curr_ ^ 1;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// 3. Call Poll only if data is needed for the second buffer.
|
|
396
|
+
// - Return if whole data is in curr_ and second buffer in progress.
|
|
397
|
+
// - If second buffer is empty, it will go for ReadAsync for second buffer.
|
|
398
|
+
if (!bufs_[curr_].async_read_in_progress_ && DoesBufferContainData(curr_) &&
|
|
399
|
+
IsDataBlockInBuffer(offset, length, curr_)) {
|
|
400
|
+
// Whole data is in curr_.
|
|
401
|
+
UpdateBuffersIfNeeded(offset);
|
|
402
|
+
second = curr_ ^ 1;
|
|
403
|
+
if (bufs_[second].async_read_in_progress_) {
|
|
320
404
|
return s;
|
|
321
405
|
}
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
406
|
+
} else {
|
|
407
|
+
PollAndUpdateBuffersIfNeeded(offset);
|
|
408
|
+
second = curr_ ^ 1;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
if (copy_to_third_buffer) {
|
|
412
|
+
offset = tmp_offset;
|
|
413
|
+
length = tmp_length;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// 4. After polling and swapping buffers, if all the requested bytes are in
|
|
417
|
+
// curr_, it will only go for async prefetching.
|
|
418
|
+
// copy_to_third_buffer is a special case so it will be handled separately.
|
|
419
|
+
if (!copy_to_third_buffer && DoesBufferContainData(curr_) &&
|
|
420
|
+
IsDataBlockInBuffer(offset, length, curr_)) {
|
|
421
|
+
offset += length;
|
|
422
|
+
length = 0;
|
|
423
|
+
|
|
424
|
+
// Since async request was submitted directly by calling PrefetchAsync in
|
|
425
|
+
// last call, we don't need to prefetch further as this call is to poll
|
|
426
|
+
// the data submitted in previous call.
|
|
427
|
+
if (explicit_prefetch_submitted_) {
|
|
428
|
+
return s;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// 5. Data is overlapping i.e. some of the data has been copied to third
|
|
433
|
+
// buffer
|
|
434
|
+
// and remaining will be updated below.
|
|
435
|
+
if (copy_to_third_buffer) {
|
|
436
|
+
CopyDataToBuffer(curr_, offset, length);
|
|
437
|
+
|
|
438
|
+
// Length == 0: All the requested data has been copied to third buffer and
|
|
439
|
+
// it has already gone for async prefetching. It can return without doing
|
|
440
|
+
// anything further.
|
|
325
441
|
// Length > 0: More data needs to be consumed so it will continue async and
|
|
326
442
|
// sync prefetching and copy the remaining data to third buffer in the end.
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
prefetch_size = length + readahead_size;
|
|
443
|
+
if (length == 0) {
|
|
444
|
+
return s;
|
|
445
|
+
}
|
|
331
446
|
}
|
|
332
447
|
|
|
448
|
+
// 6. Go for ReadAsync and Read (if needed).
|
|
449
|
+
size_t prefetch_size = length + readahead_size;
|
|
333
450
|
size_t _offset = static_cast<size_t>(offset);
|
|
334
|
-
second = curr_ ^ 1;
|
|
335
451
|
|
|
336
452
|
// offset and size alignment for curr_ buffer with synchronous prefetching
|
|
337
453
|
uint64_t rounddown_start1 = Rounddown(_offset, alignment);
|
|
@@ -368,19 +484,34 @@ Status FilePrefetchBuffer::PrefetchAsyncInternal(
|
|
|
368
484
|
uint64_t chunk_len2 = 0;
|
|
369
485
|
CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second,
|
|
370
486
|
false /*refit_tail*/, chunk_len2);
|
|
371
|
-
|
|
487
|
+
assert(chunk_len2 == 0);
|
|
372
488
|
// Update the buffer offset.
|
|
373
489
|
bufs_[second].offset_ = rounddown_start2;
|
|
374
490
|
assert(roundup_len2 >= chunk_len2);
|
|
375
491
|
uint64_t read_len2 = static_cast<size_t>(roundup_len2 - chunk_len2);
|
|
376
|
-
ReadAsync(opts, reader, read_len2,
|
|
377
|
-
|
|
492
|
+
Status tmp_s = ReadAsync(opts, reader, read_len2, rounddown_start2, second);
|
|
493
|
+
if (!tmp_s.ok()) {
|
|
494
|
+
DestroyAndClearIOHandle(second);
|
|
495
|
+
bufs_[second].buffer_.Clear();
|
|
496
|
+
}
|
|
378
497
|
}
|
|
379
498
|
|
|
380
499
|
if (read_len1 > 0) {
|
|
381
500
|
s = Read(opts, reader, rate_limiter_priority, read_len1, chunk_len1,
|
|
382
501
|
rounddown_start1, curr_);
|
|
383
502
|
if (!s.ok()) {
|
|
503
|
+
if (bufs_[second].io_handle_ != nullptr) {
|
|
504
|
+
std::vector<void*> handles;
|
|
505
|
+
handles.emplace_back(bufs_[second].io_handle_);
|
|
506
|
+
{
|
|
507
|
+
StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
|
|
508
|
+
Status status = fs_->AbortIO(handles);
|
|
509
|
+
assert(status.ok());
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
DestroyAndClearIOHandle(second);
|
|
513
|
+
bufs_[second].buffer_.Clear();
|
|
514
|
+
bufs_[curr_].buffer_.Clear();
|
|
384
515
|
return s;
|
|
385
516
|
}
|
|
386
517
|
}
|
|
@@ -462,12 +593,18 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
|
|
|
462
593
|
return false;
|
|
463
594
|
}
|
|
464
595
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
596
|
+
if (explicit_prefetch_submitted_) {
|
|
597
|
+
if (prev_offset_ != offset) {
|
|
598
|
+
// Random offset called. So abort the IOs.
|
|
599
|
+
AbortAllIOs();
|
|
600
|
+
bufs_[curr_].buffer_.Clear();
|
|
601
|
+
bufs_[curr_ ^ 1].buffer_.Clear();
|
|
602
|
+
explicit_prefetch_submitted_ = false;
|
|
603
|
+
return false;
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
if (!explicit_prefetch_submitted_ && offset < bufs_[curr_].offset_) {
|
|
471
608
|
return false;
|
|
472
609
|
}
|
|
473
610
|
|
|
@@ -479,8 +616,11 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
|
|
|
479
616
|
// If readahead is not enabled: return false.
|
|
480
617
|
TEST_SYNC_POINT_CALLBACK("FilePrefetchBuffer::TryReadFromCache",
|
|
481
618
|
&readahead_size_);
|
|
482
|
-
|
|
483
|
-
|
|
619
|
+
|
|
620
|
+
if (explicit_prefetch_submitted_ ||
|
|
621
|
+
(bufs_[curr_].async_read_in_progress_ ||
|
|
622
|
+
offset + n >
|
|
623
|
+
bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize())) {
|
|
484
624
|
if (readahead_size_ > 0) {
|
|
485
625
|
Status s;
|
|
486
626
|
assert(reader != nullptr);
|
|
@@ -493,11 +633,11 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
|
|
|
493
633
|
return false;
|
|
494
634
|
}
|
|
495
635
|
}
|
|
496
|
-
|
|
497
636
|
// Prefetch n + readahead_size_/2 synchronously as remaining
|
|
498
637
|
// readahead_size_/2 will be prefetched asynchronously.
|
|
499
638
|
s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2,
|
|
500
639
|
rate_limiter_priority, copy_to_third_buffer);
|
|
640
|
+
explicit_prefetch_submitted_ = false;
|
|
501
641
|
if (!s.ok()) {
|
|
502
642
|
if (status) {
|
|
503
643
|
*status = s;
|
|
@@ -507,11 +647,12 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
|
|
|
507
647
|
#endif
|
|
508
648
|
return false;
|
|
509
649
|
}
|
|
510
|
-
prefetched =
|
|
650
|
+
prefetched = explicit_prefetch_submitted_ ? false : true;
|
|
511
651
|
} else {
|
|
512
652
|
return false;
|
|
513
653
|
}
|
|
514
654
|
}
|
|
655
|
+
|
|
515
656
|
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
|
|
516
657
|
|
|
517
658
|
uint32_t index = curr_;
|
|
@@ -523,14 +664,12 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
|
|
|
523
664
|
if (prefetched) {
|
|
524
665
|
readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
|
|
525
666
|
}
|
|
526
|
-
async_request_submitted_ = false;
|
|
527
667
|
return true;
|
|
528
668
|
}
|
|
529
669
|
|
|
530
670
|
void FilePrefetchBuffer::PrefetchAsyncCallback(const FSReadRequest& req,
|
|
531
|
-
void*
|
|
532
|
-
uint32_t index =
|
|
533
|
-
|
|
671
|
+
void* cb_arg) {
|
|
672
|
+
uint32_t index = *(static_cast<uint32_t*>(cb_arg));
|
|
534
673
|
#ifndef NDEBUG
|
|
535
674
|
if (req.result.size() < req.len) {
|
|
536
675
|
// Fake an IO error to force db_stress fault injection to ignore
|
|
@@ -565,82 +704,133 @@ Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
|
|
|
565
704
|
if (!enable_) {
|
|
566
705
|
return Status::NotSupported();
|
|
567
706
|
}
|
|
707
|
+
|
|
568
708
|
TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
|
|
569
709
|
|
|
570
|
-
|
|
710
|
+
num_file_reads_ = 0;
|
|
711
|
+
explicit_prefetch_submitted_ = false;
|
|
712
|
+
bool is_eligible_for_prefetching = false;
|
|
713
|
+
if (readahead_size_ > 0 &&
|
|
714
|
+
(!implicit_auto_readahead_ ||
|
|
715
|
+
num_file_reads_ + 1 >= num_file_reads_for_auto_readahead_)) {
|
|
716
|
+
is_eligible_for_prefetching = true;
|
|
717
|
+
}
|
|
571
718
|
|
|
572
|
-
//
|
|
573
|
-
|
|
719
|
+
// 1. Cancel any pending async read to make code simpler as buffers can be out
|
|
720
|
+
// of sync.
|
|
721
|
+
AbortAllIOs();
|
|
574
722
|
|
|
723
|
+
// 2. Clear outdated data.
|
|
724
|
+
UpdateBuffersIfNeeded(offset);
|
|
725
|
+
uint32_t second = curr_ ^ 1;
|
|
575
726
|
// Since PrefetchAsync can be called on non sequential reads. So offset can
|
|
576
|
-
// be less than buffers' offset. In that case it clears
|
|
577
|
-
//
|
|
578
|
-
if (
|
|
727
|
+
// be less than curr_ buffers' offset. In that case also it clears both
|
|
728
|
+
// buffers.
|
|
729
|
+
if (DoesBufferContainData(curr_) && !IsOffsetInBuffer(offset, curr_)) {
|
|
579
730
|
bufs_[curr_].buffer_.Clear();
|
|
731
|
+
bufs_[second].buffer_.Clear();
|
|
580
732
|
}
|
|
581
733
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
734
|
+
UpdateReadPattern(offset, n, /*decrease_readaheadsize=*/false);
|
|
735
|
+
|
|
736
|
+
bool data_found = false;
|
|
737
|
+
|
|
738
|
+
// 3. If curr_ has full data.
|
|
739
|
+
if (DoesBufferContainData(curr_) && IsDataBlockInBuffer(offset, n, curr_)) {
|
|
586
740
|
uint64_t offset_in_buffer = offset - bufs_[curr_].offset_;
|
|
587
741
|
*result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n);
|
|
588
|
-
|
|
742
|
+
data_found = true;
|
|
743
|
+
// Update num_file_reads_ as TryReadFromCacheAsync won't be called for
|
|
744
|
+
// poll and update num_file_reads_ if data is found.
|
|
745
|
+
num_file_reads_++;
|
|
746
|
+
|
|
747
|
+
// 3.1 If second also has some data or is not eligible for prefetching,
|
|
748
|
+
// return.
|
|
749
|
+
if (!is_eligible_for_prefetching || DoesBufferContainData(second)) {
|
|
750
|
+
return Status::OK();
|
|
751
|
+
}
|
|
752
|
+
} else {
|
|
753
|
+
// Partial data in curr_.
|
|
754
|
+
bufs_[curr_].buffer_.Clear();
|
|
589
755
|
}
|
|
756
|
+
bufs_[second].buffer_.Clear();
|
|
590
757
|
|
|
591
758
|
Status s;
|
|
592
759
|
size_t alignment = reader->file()->GetRequiredBufferAlignment();
|
|
593
|
-
|
|
594
|
-
// TODO akanksha: Handle the scenario if data is overlapping in 2 buffers.
|
|
595
|
-
// Currently, tt covers 2 scenarios. Either one buffer (curr_) has no data or
|
|
596
|
-
// it has partial data. It ignores the contents in second buffer (overlapping
|
|
597
|
-
// data in 2 buffers) and send the request to re-read that data again.
|
|
598
|
-
|
|
599
|
-
// Clear the second buffer in order to do asynchronous prefetching.
|
|
600
|
-
bufs_[second].buffer_.Clear();
|
|
601
|
-
|
|
760
|
+
size_t prefetch_size = is_eligible_for_prefetching ? readahead_size_ / 2 : 0;
|
|
602
761
|
size_t offset_to_read = static_cast<size_t>(offset);
|
|
603
|
-
uint64_t
|
|
604
|
-
uint64_t
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
762
|
+
uint64_t rounddown_start1 = 0;
|
|
763
|
+
uint64_t roundup_end1 = 0;
|
|
764
|
+
uint64_t rounddown_start2 = 0;
|
|
765
|
+
uint64_t roundup_end2 = 0;
|
|
766
|
+
uint64_t chunk_len1 = 0;
|
|
767
|
+
uint64_t chunk_len2 = 0;
|
|
768
|
+
size_t read_len1 = 0;
|
|
769
|
+
size_t read_len2 = 0;
|
|
770
|
+
|
|
771
|
+
// - If curr_ is empty.
|
|
772
|
+
// - Call async read for full data + prefetch_size on curr_.
|
|
773
|
+
// - Call async read for prefetch_size on second if eligible.
|
|
774
|
+
// - If curr_ is filled.
|
|
775
|
+
// - prefetch_size on second.
|
|
776
|
+
// Calculate length and offsets for reading.
|
|
777
|
+
if (!DoesBufferContainData(curr_)) {
|
|
778
|
+
// Prefetch full data + prefetch_size in curr_.
|
|
779
|
+
rounddown_start1 = Rounddown(offset_to_read, alignment);
|
|
780
|
+
roundup_end1 = Roundup(offset_to_read + n + prefetch_size, alignment);
|
|
781
|
+
uint64_t roundup_len1 = roundup_end1 - rounddown_start1;
|
|
782
|
+
assert(roundup_len1 >= alignment);
|
|
783
|
+
assert(roundup_len1 % alignment == 0);
|
|
784
|
+
|
|
785
|
+
CalculateOffsetAndLen(alignment, rounddown_start1, roundup_len1, curr_,
|
|
786
|
+
false, chunk_len1);
|
|
787
|
+
assert(chunk_len1 == 0);
|
|
788
|
+
assert(roundup_len1 >= chunk_len1);
|
|
789
|
+
read_len1 = static_cast<size_t>(roundup_len1 - chunk_len1);
|
|
790
|
+
bufs_[curr_].offset_ = rounddown_start1;
|
|
616
791
|
}
|
|
617
792
|
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
// Update the buffer offset.
|
|
627
|
-
bufs_[second].offset_ = rounddown_start;
|
|
628
|
-
assert(roundup_len >= chunk_len);
|
|
629
|
-
|
|
630
|
-
size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
|
|
793
|
+
if (is_eligible_for_prefetching) {
|
|
794
|
+
if (DoesBufferContainData(curr_)) {
|
|
795
|
+
rounddown_start2 =
|
|
796
|
+
bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize();
|
|
797
|
+
} else {
|
|
798
|
+
rounddown_start2 = roundup_end1;
|
|
799
|
+
}
|
|
631
800
|
|
|
632
|
-
|
|
801
|
+
roundup_end2 = Roundup(rounddown_start2 + prefetch_size, alignment);
|
|
802
|
+
uint64_t roundup_len2 = roundup_end2 - rounddown_start2;
|
|
633
803
|
|
|
634
|
-
|
|
635
|
-
|
|
804
|
+
assert(roundup_len2 >= alignment);
|
|
805
|
+
CalculateOffsetAndLen(alignment, rounddown_start2, roundup_len2, second,
|
|
806
|
+
false, chunk_len2);
|
|
807
|
+
assert(chunk_len2 == 0);
|
|
808
|
+
assert(roundup_len2 >= chunk_len2);
|
|
809
|
+
read_len2 = static_cast<size_t>(roundup_len2 - chunk_len2);
|
|
810
|
+
// Update the buffer offset.
|
|
811
|
+
bufs_[second].offset_ = rounddown_start2;
|
|
636
812
|
}
|
|
637
813
|
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
814
|
+
if (read_len1) {
|
|
815
|
+
s = ReadAsync(opts, reader, read_len1, rounddown_start1, curr_);
|
|
816
|
+
if (!s.ok()) {
|
|
817
|
+
DestroyAndClearIOHandle(curr_);
|
|
818
|
+
bufs_[curr_].buffer_.Clear();
|
|
819
|
+
return s;
|
|
820
|
+
}
|
|
821
|
+
explicit_prefetch_submitted_ = true;
|
|
822
|
+
prev_len_ = 0;
|
|
823
|
+
}
|
|
824
|
+
if (read_len2) {
|
|
825
|
+
s = ReadAsync(opts, reader, read_len2, rounddown_start2, second);
|
|
826
|
+
if (!s.ok()) {
|
|
827
|
+
DestroyAndClearIOHandle(second);
|
|
828
|
+
bufs_[second].buffer_.Clear();
|
|
829
|
+
return s;
|
|
830
|
+
}
|
|
831
|
+
readahead_size_ = std::min(max_readahead_size_, readahead_size_ * 2);
|
|
832
|
+
}
|
|
833
|
+
return (data_found ? Status::OK() : Status::TryAgain());
|
|
645
834
|
}
|
|
835
|
+
|
|
646
836
|
} // namespace ROCKSDB_NAMESPACE
|