@nxtedition/rocksdb 7.1.14 → 7.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +1 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +72 -18
- package/deps/rocksdb/rocksdb/Makefile +91 -11
- package/deps/rocksdb/rocksdb/TARGETS +8 -4
- package/deps/rocksdb/rocksdb/cache/cache.cc +5 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +13 -8
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +116 -57
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +958 -459
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +407 -622
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +104 -40
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +23 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +350 -184
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +12 -2
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +130 -43
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +24 -2
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +423 -98
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +19 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +10 -7
- package/deps/rocksdb/rocksdb/crash_test.mk +2 -2
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +46 -26
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +9 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +90 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +56 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +64 -59
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +11 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +92 -62
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +159 -136
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -13
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +129 -57
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +81 -3
- package/deps/rocksdb/rocksdb/db/c.cc +29 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +10 -1
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +21 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -36
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +344 -102
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +163 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +52 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +35 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +8 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +167 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +0 -117
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +6 -49
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +18 -11
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +4 -10
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +12 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -93
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +28 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +11 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +76 -138
- package/deps/rocksdb/rocksdb/db/db_iter.h +26 -23
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +931 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +44 -22
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +6 -14
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +45 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +2 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +24 -12
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +7 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +3 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +79 -18
- package/deps/rocksdb/rocksdb/db/memtable.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +26 -4
- package/deps/rocksdb/rocksdb/db/memtable_list.h +2 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +113 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +110 -0
- package/deps/rocksdb/rocksdb/db/{periodic_work_scheduler_test.cc → periodic_task_scheduler_test.cc} +33 -39
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +12 -20
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +6 -5
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +20 -5
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +14 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +17 -8
- package/deps/rocksdb/rocksdb/db/repair_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +49 -66
- package/deps/rocksdb/rocksdb/db/table_cache.cc +92 -63
- package/deps/rocksdb/rocksdb/db/table_cache.h +16 -9
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +2 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +3 -3
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set.cc +379 -145
- package/deps/rocksdb/rocksdb/db/version_set.h +26 -24
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +9 -9
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +5 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +71 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +14 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +23 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +26 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +105 -34
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +16 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +282 -25
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +3 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +367 -177
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +144 -56
- package/deps/rocksdb/rocksdb/file/filename.cc +3 -3
- package/deps/rocksdb/rocksdb/file/filename.h +4 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +415 -0
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +2 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +36 -45
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +21 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +15 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +163 -68
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +26 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +23 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +21 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +17 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +2 -1
- package/deps/rocksdb/rocksdb/logging/env_logger.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +4 -2
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +2 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +15 -1
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +17 -0
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +14 -3
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +3 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +50 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +1 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +31 -32
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options.cc +2 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block.h +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +110 -99
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +11 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +138 -83
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +25 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +31 -30
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -13
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +17 -19
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +26 -29
- package/deps/rocksdb/rocksdb/table/format.h +44 -26
- package/deps/rocksdb/rocksdb/table/get_context.cc +17 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +7 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +950 -104
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +28 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +3 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -1
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +10 -9
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.h +22 -20
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +1 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +9 -21
- package/deps/rocksdb/rocksdb/table/table_test.cc +12 -12
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +4 -4
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +1 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +116 -34
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +6 -1
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/util/autovector.h +12 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +3 -2
- package/deps/rocksdb/rocksdb/util/stderr_logger.cc +30 -0
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +5 -18
- package/deps/rocksdb/rocksdb/util/timer.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +34 -53
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +9 -14
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +3 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +26 -8
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +114 -16
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +59 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +39 -0
- package/deps/rocksdb/rocksdb.gyp +0 -1
- package/index.js +6 -10
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +0 -168
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +0 -90
|
@@ -20,24 +20,42 @@
|
|
|
20
20
|
#include "rocksdb/file_system.h"
|
|
21
21
|
#include "rocksdb/options.h"
|
|
22
22
|
#include "util/aligned_buffer.h"
|
|
23
|
+
#include "util/autovector.h"
|
|
24
|
+
#include "util/stop_watch.h"
|
|
23
25
|
|
|
24
26
|
namespace ROCKSDB_NAMESPACE {
|
|
25
27
|
|
|
26
|
-
#define
|
|
28
|
+
#define DEFAULT_DECREMENT 8 * 1024
|
|
27
29
|
|
|
28
30
|
struct IOOptions;
|
|
29
31
|
class RandomAccessFileReader;
|
|
30
32
|
|
|
31
33
|
struct BufferInfo {
|
|
32
34
|
AlignedBuffer buffer_;
|
|
35
|
+
|
|
33
36
|
uint64_t offset_ = 0;
|
|
37
|
+
|
|
38
|
+
// Below parameters are used in case of async read flow.
|
|
39
|
+
// Length requested for in ReadAsync.
|
|
40
|
+
size_t async_req_len_ = 0;
|
|
41
|
+
|
|
42
|
+
// async_read_in_progress can be used as mutex. Callback can update the buffer
|
|
43
|
+
// and its size but async_read_in_progress is only set by main thread.
|
|
44
|
+
bool async_read_in_progress_ = false;
|
|
45
|
+
|
|
46
|
+
// io_handle is allocated and used by underlying file system in case of
|
|
47
|
+
// asynchronous reads.
|
|
48
|
+
void* io_handle_ = nullptr;
|
|
49
|
+
|
|
50
|
+
IOHandleDeleter del_fn_ = nullptr;
|
|
51
|
+
|
|
52
|
+
// pos represents the index of this buffer in vector of BufferInfo.
|
|
53
|
+
uint32_t pos_ = 0;
|
|
34
54
|
};
|
|
35
55
|
|
|
36
56
|
// FilePrefetchBuffer is a smart buffer to store and read data from a file.
|
|
37
57
|
class FilePrefetchBuffer {
|
|
38
58
|
public:
|
|
39
|
-
static const int kMinNumFileReadsToStartAutoReadahead = 2;
|
|
40
|
-
|
|
41
59
|
// Constructor.
|
|
42
60
|
//
|
|
43
61
|
// All arguments are optional.
|
|
@@ -54,9 +72,6 @@ class FilePrefetchBuffer {
|
|
|
54
72
|
// it. Used for adaptable readahead of the file footer/metadata.
|
|
55
73
|
// implicit_auto_readahead : Readahead is enabled implicitly by rocksdb after
|
|
56
74
|
// doing sequential scans for two times.
|
|
57
|
-
// async_io : When async_io is enabled, if it's implicit_auto_readahead, it
|
|
58
|
-
// prefetches data asynchronously in second buffer while curr_ is being
|
|
59
|
-
// consumed.
|
|
60
75
|
//
|
|
61
76
|
// Automatic readhead is enabled for a file if readahead_size
|
|
62
77
|
// and max_readahead_size are passed in.
|
|
@@ -65,8 +80,10 @@ class FilePrefetchBuffer {
|
|
|
65
80
|
FilePrefetchBuffer(size_t readahead_size = 0, size_t max_readahead_size = 0,
|
|
66
81
|
bool enable = true, bool track_min_offset = false,
|
|
67
82
|
bool implicit_auto_readahead = false,
|
|
68
|
-
uint64_t num_file_reads = 0,
|
|
69
|
-
|
|
83
|
+
uint64_t num_file_reads = 0,
|
|
84
|
+
uint64_t num_file_reads_for_auto_readahead = 0,
|
|
85
|
+
FileSystem* fs = nullptr, SystemClock* clock = nullptr,
|
|
86
|
+
Statistics* stats = nullptr)
|
|
70
87
|
: curr_(0),
|
|
71
88
|
readahead_size_(readahead_size),
|
|
72
89
|
initial_auto_readahead_size_(readahead_size),
|
|
@@ -77,47 +94,83 @@ class FilePrefetchBuffer {
|
|
|
77
94
|
implicit_auto_readahead_(implicit_auto_readahead),
|
|
78
95
|
prev_offset_(0),
|
|
79
96
|
prev_len_(0),
|
|
97
|
+
num_file_reads_for_auto_readahead_(num_file_reads_for_auto_readahead),
|
|
80
98
|
num_file_reads_(num_file_reads),
|
|
81
|
-
|
|
82
|
-
del_fn_(nullptr),
|
|
83
|
-
async_read_in_progress_(false),
|
|
84
|
-
async_request_submitted_(false),
|
|
99
|
+
explicit_prefetch_submitted_(false),
|
|
85
100
|
fs_(fs),
|
|
86
101
|
clock_(clock),
|
|
87
102
|
stats_(stats) {
|
|
88
|
-
assert((num_file_reads_ >=
|
|
103
|
+
assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) ||
|
|
89
104
|
(num_file_reads_ == 0));
|
|
90
|
-
// If
|
|
91
|
-
// while curr_ is being consumed. If data is overlapping in
|
|
92
|
-
// data is copied to third buffer to return continuous buffer.
|
|
105
|
+
// If ReadOptions.async_io is enabled, data is asynchronously filled in
|
|
106
|
+
// second buffer while curr_ is being consumed. If data is overlapping in
|
|
107
|
+
// two buffers, data is copied to third buffer to return continuous buffer.
|
|
93
108
|
bufs_.resize(3);
|
|
109
|
+
for (uint32_t i = 0; i < 2; i++) {
|
|
110
|
+
bufs_[i].pos_ = i;
|
|
111
|
+
}
|
|
94
112
|
}
|
|
95
113
|
|
|
96
114
|
~FilePrefetchBuffer() {
|
|
97
115
|
// Abort any pending async read request before destroying the class object.
|
|
98
|
-
if (
|
|
116
|
+
if (fs_ != nullptr) {
|
|
99
117
|
std::vector<void*> handles;
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
118
|
+
for (uint32_t i = 0; i < 2; i++) {
|
|
119
|
+
if (bufs_[i].async_read_in_progress_ &&
|
|
120
|
+
bufs_[i].io_handle_ != nullptr) {
|
|
121
|
+
handles.emplace_back(bufs_[i].io_handle_);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
if (!handles.empty()) {
|
|
125
|
+
StopWatch sw(clock_, stats_, ASYNC_PREFETCH_ABORT_MICROS);
|
|
126
|
+
Status s = fs_->AbortIO(handles);
|
|
127
|
+
assert(s.ok());
|
|
128
|
+
}
|
|
103
129
|
}
|
|
104
130
|
|
|
105
131
|
// Prefetch buffer bytes discarded.
|
|
106
132
|
uint64_t bytes_discarded = 0;
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
133
|
+
// Iterated over 2 buffers.
|
|
134
|
+
for (int i = 0; i < 2; i++) {
|
|
135
|
+
int first = i;
|
|
136
|
+
int second = i ^ 1;
|
|
137
|
+
|
|
138
|
+
if (DoesBufferContainData(first)) {
|
|
139
|
+
// If last block was read completely from first and some bytes in
|
|
140
|
+
// first buffer are still unconsumed.
|
|
141
|
+
if (prev_offset_ >= bufs_[first].offset_ &&
|
|
142
|
+
prev_offset_ + prev_len_ <
|
|
143
|
+
bufs_[first].offset_ + bufs_[first].buffer_.CurrentSize()) {
|
|
144
|
+
bytes_discarded += bufs_[first].buffer_.CurrentSize() -
|
|
145
|
+
(prev_offset_ + prev_len_ - bufs_[first].offset_);
|
|
146
|
+
}
|
|
147
|
+
// If data was in second buffer and some/whole block bytes were read
|
|
148
|
+
// from second buffer.
|
|
149
|
+
else if (prev_offset_ < bufs_[first].offset_ &&
|
|
150
|
+
!DoesBufferContainData(second)) {
|
|
151
|
+
// If last block read was completely from different buffer, this
|
|
152
|
+
// buffer is unconsumed.
|
|
153
|
+
if (prev_offset_ + prev_len_ <= bufs_[first].offset_) {
|
|
154
|
+
bytes_discarded += bufs_[first].buffer_.CurrentSize();
|
|
155
|
+
}
|
|
156
|
+
// If last block read overlaps with this buffer and some data is
|
|
157
|
+
// still unconsumed and previous buffer (second) is not cleared.
|
|
158
|
+
else if (prev_offset_ + prev_len_ > bufs_[first].offset_ &&
|
|
159
|
+
bufs_[first].offset_ + bufs_[first].buffer_.CurrentSize() ==
|
|
160
|
+
bufs_[second].offset_) {
|
|
161
|
+
bytes_discarded += bufs_[first].buffer_.CurrentSize() -
|
|
162
|
+
(/*bytes read from this buffer=*/prev_len_ -
|
|
163
|
+
(bufs_[first].offset_ - prev_offset_));
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
112
167
|
}
|
|
113
|
-
RecordInHistogram(stats_, PREFETCHED_BYTES_DISCARDED, bytes_discarded);
|
|
114
168
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
io_handle_ = nullptr;
|
|
119
|
-
del_fn_ = nullptr;
|
|
169
|
+
for (uint32_t i = 0; i < 2; i++) {
|
|
170
|
+
// Release io_handle.
|
|
171
|
+
DestroyAndClearIOHandle(i);
|
|
120
172
|
}
|
|
173
|
+
RecordInHistogram(stats_, PREFETCHED_BYTES_DISCARDED, bytes_discarded);
|
|
121
174
|
}
|
|
122
175
|
|
|
123
176
|
// Load data into the buffer from a file.
|
|
@@ -126,9 +179,6 @@ class FilePrefetchBuffer {
|
|
|
126
179
|
// n : the number of bytes to read.
|
|
127
180
|
// rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to
|
|
128
181
|
// bypass.
|
|
129
|
-
// is_async_read : if the data should be prefetched by calling read
|
|
130
|
-
// asynchronously. It should be set true when called
|
|
131
|
-
// from TryReadFromCache.
|
|
132
182
|
Status Prefetch(const IOOptions& opts, RandomAccessFileReader* reader,
|
|
133
183
|
uint64_t offset, size_t n,
|
|
134
184
|
Env::IOPriority rate_limiter_priority);
|
|
@@ -194,7 +244,7 @@ class FilePrefetchBuffer {
|
|
|
194
244
|
}
|
|
195
245
|
|
|
196
246
|
void DecreaseReadAheadIfEligible(uint64_t offset, size_t size,
|
|
197
|
-
size_t value =
|
|
247
|
+
size_t value = DEFAULT_DECREMENT) {
|
|
198
248
|
// Decrease the readahead_size if
|
|
199
249
|
// - its enabled internally by RocksDB (implicit_auto_readahead_) and,
|
|
200
250
|
// - readahead_size is greater than 0 and,
|
|
@@ -203,12 +253,14 @@ class FilePrefetchBuffer {
|
|
|
203
253
|
// - few/no bytes are in buffer and,
|
|
204
254
|
// - block is sequential with the previous read and,
|
|
205
255
|
// - num_file_reads_ + 1 (including this read) >
|
|
206
|
-
//
|
|
256
|
+
// num_file_reads_for_auto_readahead_
|
|
257
|
+
size_t curr_size = bufs_[curr_].async_read_in_progress_
|
|
258
|
+
? bufs_[curr_].async_req_len_
|
|
259
|
+
: bufs_[curr_].buffer_.CurrentSize();
|
|
207
260
|
if (implicit_auto_readahead_ && readahead_size_ > 0) {
|
|
208
|
-
if ((offset + size >
|
|
209
|
-
bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) &&
|
|
261
|
+
if ((offset + size > bufs_[curr_].offset_ + curr_size) &&
|
|
210
262
|
IsBlockSequential(offset) &&
|
|
211
|
-
(num_file_reads_ + 1 >
|
|
263
|
+
(num_file_reads_ + 1 > num_file_reads_for_auto_readahead_)) {
|
|
212
264
|
readahead_size_ =
|
|
213
265
|
std::max(initial_auto_readahead_size_,
|
|
214
266
|
(readahead_size_ >= value ? readahead_size_ - value : 0));
|
|
@@ -224,8 +276,14 @@ class FilePrefetchBuffer {
|
|
|
224
276
|
// and data present in buffer_. It also allocates new buffer or refit tail if
|
|
225
277
|
// required.
|
|
226
278
|
void CalculateOffsetAndLen(size_t alignment, uint64_t offset,
|
|
227
|
-
size_t roundup_len,
|
|
228
|
-
uint64_t& chunk_len);
|
|
279
|
+
size_t roundup_len, uint32_t index,
|
|
280
|
+
bool refit_tail, uint64_t& chunk_len);
|
|
281
|
+
|
|
282
|
+
void AbortIOIfNeeded(uint64_t offset);
|
|
283
|
+
|
|
284
|
+
void AbortAllIOs();
|
|
285
|
+
|
|
286
|
+
void UpdateBuffersIfNeeded(uint64_t offset);
|
|
229
287
|
|
|
230
288
|
// It calls Poll API if any there is any pending asynchronous request. It then
|
|
231
289
|
// checks if data is in any buffer. It clears the outdated data and swaps the
|
|
@@ -243,8 +301,7 @@ class FilePrefetchBuffer {
|
|
|
243
301
|
uint64_t chunk_len, uint64_t rounddown_start, uint32_t index);
|
|
244
302
|
|
|
245
303
|
Status ReadAsync(const IOOptions& opts, RandomAccessFileReader* reader,
|
|
246
|
-
uint64_t read_len, uint64_t
|
|
247
|
-
uint64_t rounddown_start, uint32_t index);
|
|
304
|
+
uint64_t read_len, uint64_t rounddown_start, uint32_t index);
|
|
248
305
|
|
|
249
306
|
// Copy the data from src to third buffer.
|
|
250
307
|
void CopyDataToBuffer(uint32_t src, uint64_t& offset, size_t& length);
|
|
@@ -273,25 +330,60 @@ class FilePrefetchBuffer {
|
|
|
273
330
|
// Since async request was submitted in last call directly by calling
|
|
274
331
|
// PrefetchAsync, it skips num_file_reads_ check as this call is to poll the
|
|
275
332
|
// data submitted in previous call.
|
|
276
|
-
if (
|
|
333
|
+
if (explicit_prefetch_submitted_) {
|
|
277
334
|
return true;
|
|
278
335
|
}
|
|
279
|
-
if (num_file_reads_ <=
|
|
336
|
+
if (num_file_reads_ <= num_file_reads_for_auto_readahead_) {
|
|
280
337
|
UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
|
|
281
338
|
return false;
|
|
282
339
|
}
|
|
283
340
|
return true;
|
|
284
341
|
}
|
|
285
342
|
|
|
343
|
+
// Helper functions.
|
|
344
|
+
bool IsDataBlockInBuffer(uint64_t offset, size_t length, uint32_t index) {
|
|
345
|
+
return (offset >= bufs_[index].offset_ &&
|
|
346
|
+
offset + length <=
|
|
347
|
+
bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
|
|
348
|
+
}
|
|
349
|
+
bool IsOffsetInBuffer(uint64_t offset, uint32_t index) {
|
|
350
|
+
return (offset >= bufs_[index].offset_ &&
|
|
351
|
+
offset < bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
|
|
352
|
+
}
|
|
353
|
+
bool DoesBufferContainData(uint32_t index) {
|
|
354
|
+
return bufs_[index].buffer_.CurrentSize() > 0;
|
|
355
|
+
}
|
|
356
|
+
bool IsBufferOutdated(uint64_t offset, uint32_t index) {
|
|
357
|
+
return (
|
|
358
|
+
!bufs_[index].async_read_in_progress_ && DoesBufferContainData(index) &&
|
|
359
|
+
offset >= bufs_[index].offset_ + bufs_[index].buffer_.CurrentSize());
|
|
360
|
+
}
|
|
361
|
+
bool IsBufferOutdatedWithAsyncProgress(uint64_t offset, uint32_t index) {
|
|
362
|
+
return (bufs_[index].async_read_in_progress_ &&
|
|
363
|
+
bufs_[index].io_handle_ != nullptr &&
|
|
364
|
+
offset >= bufs_[index].offset_ + bufs_[index].async_req_len_);
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
void DestroyAndClearIOHandle(uint32_t index) {
|
|
368
|
+
if (bufs_[index].io_handle_ != nullptr && bufs_[index].del_fn_ != nullptr) {
|
|
369
|
+
bufs_[index].del_fn_(bufs_[index].io_handle_);
|
|
370
|
+
bufs_[index].io_handle_ = nullptr;
|
|
371
|
+
bufs_[index].del_fn_ = nullptr;
|
|
372
|
+
}
|
|
373
|
+
bufs_[index].async_read_in_progress_ = false;
|
|
374
|
+
}
|
|
375
|
+
|
|
286
376
|
std::vector<BufferInfo> bufs_;
|
|
287
377
|
// curr_ represents the index for bufs_ indicating which buffer is being
|
|
288
378
|
// consumed currently.
|
|
289
379
|
uint32_t curr_;
|
|
380
|
+
|
|
290
381
|
size_t readahead_size_;
|
|
291
382
|
size_t initial_auto_readahead_size_;
|
|
292
383
|
// FilePrefetchBuffer object won't be created from Iterator flow if
|
|
293
384
|
// max_readahead_size_ = 0.
|
|
294
385
|
size_t max_readahead_size_;
|
|
386
|
+
|
|
295
387
|
// The minimum `offset` ever passed to TryReadFromCache().
|
|
296
388
|
size_t min_offset_read_;
|
|
297
389
|
// if false, TryReadFromCache() always return false, and we only take stats
|
|
@@ -306,20 +398,16 @@ class FilePrefetchBuffer {
|
|
|
306
398
|
bool implicit_auto_readahead_;
|
|
307
399
|
uint64_t prev_offset_;
|
|
308
400
|
size_t prev_len_;
|
|
309
|
-
// num_file_reads_ is only used when
|
|
401
|
+
// num_file_reads_ and num_file_reads_for_auto_readahead_ is only used when
|
|
402
|
+
// implicit_auto_readahead_ is set.
|
|
403
|
+
uint64_t num_file_reads_for_auto_readahead_;
|
|
310
404
|
uint64_t num_file_reads_;
|
|
311
405
|
|
|
312
|
-
//
|
|
313
|
-
//
|
|
314
|
-
|
|
315
|
-
IOHandleDeleter del_fn_;
|
|
316
|
-
bool async_read_in_progress_;
|
|
317
|
-
|
|
318
|
-
// If async_request_submitted_ is set then it indicates RocksDB called
|
|
319
|
-
// PrefetchAsync to submit request. It needs to TryReadFromCacheAsync to poll
|
|
320
|
-
// the submitted request without checking if data is sequential and
|
|
406
|
+
// If explicit_prefetch_submitted_ is set then it indicates RocksDB called
|
|
407
|
+
// PrefetchAsync to submit request. It needs to call TryReadFromCacheAsync to
|
|
408
|
+
// poll the submitted request without checking if data is sequential and
|
|
321
409
|
// num_file_reads_.
|
|
322
|
-
bool
|
|
410
|
+
bool explicit_prefetch_submitted_;
|
|
323
411
|
|
|
324
412
|
FileSystem* fs_;
|
|
325
413
|
SystemClock* clock_;
|
|
@@ -388,7 +388,7 @@ bool ParseFileName(const std::string& fname, uint64_t* number,
|
|
|
388
388
|
|
|
389
389
|
IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
|
|
390
390
|
uint64_t descriptor_number,
|
|
391
|
-
FSDirectory*
|
|
391
|
+
FSDirectory* dir_contains_current_file) {
|
|
392
392
|
// Remove leading "dbname/" and add newline to manifest file name
|
|
393
393
|
std::string manifest = DescriptorFileName(dbname, descriptor_number);
|
|
394
394
|
Slice contents = manifest;
|
|
@@ -404,8 +404,8 @@ IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
|
|
|
404
404
|
TEST_SYNC_POINT_CALLBACK("SetCurrentFile:AfterRename", &s);
|
|
405
405
|
}
|
|
406
406
|
if (s.ok()) {
|
|
407
|
-
if (
|
|
408
|
-
s =
|
|
407
|
+
if (dir_contains_current_file != nullptr) {
|
|
408
|
+
s = dir_contains_current_file->FsyncWithDirOptions(
|
|
409
409
|
IOOptions(), nullptr, DirFsyncOptions(CurrentFileName(dbname)));
|
|
410
410
|
}
|
|
411
411
|
} else {
|
|
@@ -160,10 +160,12 @@ extern bool ParseFileName(const std::string& filename, uint64_t* number,
|
|
|
160
160
|
FileType* type, WalFileType* log_type = nullptr);
|
|
161
161
|
|
|
162
162
|
// Make the CURRENT file point to the descriptor file with the
|
|
163
|
-
// specified number.
|
|
163
|
+
// specified number. On its success and when dir_contains_current_file is not
|
|
164
|
+
// nullptr, the function will fsync the directory containing the CURRENT file
|
|
165
|
+
// when
|
|
164
166
|
extern IOStatus SetCurrentFile(FileSystem* fs, const std::string& dbname,
|
|
165
167
|
uint64_t descriptor_number,
|
|
166
|
-
FSDirectory*
|
|
168
|
+
FSDirectory* dir_contains_current_file);
|
|
167
169
|
|
|
168
170
|
// Make the IDENTITY file for the db
|
|
169
171
|
extern Status SetIdentityFile(Env* env, const std::string& dbname,
|