@nxtedition/rocksdb 10.1.5 → 10.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +19 -11
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -5
- package/deps/rocksdb/rocksdb/Makefile +38 -15
- package/deps/rocksdb/rocksdb/TARGETS +10 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +4 -2
- package/deps/rocksdb/rocksdb/db/builder.cc +2 -2
- package/deps/rocksdb/rocksdb/db/builder.h +1 -1
- package/deps/rocksdb/rocksdb/db/c.cc +205 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +189 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +28 -0
- package/deps/rocksdb/rocksdb/db/column_family.h +17 -0
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +234 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +11 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +22 -25
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +112 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +72 -21
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +77 -0
- package/deps/rocksdb/rocksdb/db/convenience.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +269 -112
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +107 -43
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +93 -24
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +157 -68
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +56 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -105
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +39 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +21 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +107 -63
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +43 -2
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -6
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +92 -2
- package/deps/rocksdb/rocksdb/db/error_handler.cc +34 -39
- package/deps/rocksdb/rocksdb/db/error_handler.h +3 -4
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +6 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +71 -15
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +383 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +88 -72
- package/deps/rocksdb/rocksdb/db/flush_job.cc +30 -3
- package/deps/rocksdb/rocksdb/db/flush_job.h +14 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +60 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +20 -1
- package/deps/rocksdb/rocksdb/db/log_writer.cc +24 -0
- package/deps/rocksdb/rocksdb/db/log_writer.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/memtable.h +10 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +4 -4
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +10 -3
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +8 -10
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +30 -0
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +9 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +17 -2
- package/deps/rocksdb/rocksdb/db/table_cache.h +9 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +9 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -3
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_edit.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -6
- package/deps/rocksdb/rocksdb/db/version_set.cc +54 -31
- package/deps/rocksdb/rocksdb/db/version_set.h +14 -7
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +37 -29
- package/deps/rocksdb/rocksdb/db/wal_manager.h +6 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +54 -23
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +46 -5
- package/deps/rocksdb/rocksdb/db/write_thread.cc +53 -5
- package/deps/rocksdb/rocksdb/db/write_thread.h +36 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +57 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +11 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +10 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +25 -88
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.cc +93 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.h +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +43 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +109 -21
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +666 -205
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +55 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -16
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +782 -494
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +21 -0
- package/deps/rocksdb/rocksdb/env/env.cc +6 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +0 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +34 -19
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +29 -32
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +41 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +63 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +16 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +0 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +76 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +12 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +31 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_write_callback.h +29 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +17 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +9 -5
- package/deps/rocksdb/rocksdb/options/options.cc +3 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +1 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +3 -2
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +15 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +31 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +10 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +11 -15
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -21
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +9 -11
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +14 -9
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +82 -41
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +13 -14
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +18 -22
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +9 -10
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +3 -2
- package/deps/rocksdb/rocksdb/table/format.cc +1 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +18 -13
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +5 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +8 -7
- package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +1 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +19 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +434 -110
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -1
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +3 -0
- package/deps/rocksdb/rocksdb/util/aligned_storage.h +24 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/random.cc +2 -1
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +1 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +33 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +7 -0
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +33 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +5 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +10 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +13 -13
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +104 -48
- package/deps/rocksdb/rocksdb/utilities/debug.cc +16 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +647 -235
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -157
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector_test.cc +139 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +105 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +64 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +43 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +154 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +158 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +16 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +9 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +43 -7
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/chained-batch-test.js.tap +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/get-test.js.tap +0 -0
- package/.tap/test-results/test/abstract-level-test.js.tap +0 -1077
- package/.tap/test-results/test/batch-test.js.tap +0 -12
- package/.tap/test-results/test/chained-batch-gc-test.js.tap +0 -11
- package/.tap/test-results/test/cleanup-hanging-iterators-test.js.tap +0 -135
- package/.tap/test-results/test/clear-gc-test.js.tap +0 -13
- package/.tap/test-results/test/column-test.js.tap +0 -55
- package/.tap/test-results/test/common.js.tap +0 -0
- package/.tap/test-results/test/compression-test.js.tap +0 -30
- package/.tap/test-results/test/db-identity.js.tap +0 -12
- package/.tap/test-results/test/electron.js.tap +0 -0
- package/.tap/test-results/test/env-cleanup-hook-test.js.tap +0 -40
- package/.tap/test-results/test/env-cleanup-hook.js.tap +0 -0
- package/.tap/test-results/test/gc.js.tap +0 -0
- package/.tap/test-results/test/getproperty-test.js.tap +0 -29
- package/.tap/test-results/test/iterator-gc-test.js.tap +0 -15
- package/.tap/test-results/test/iterator-hwm-test.js.tap +0 -131
- package/.tap/test-results/test/iterator-recursion-test.js.tap +0 -12
- package/.tap/test-results/test/iterator-starvation-test.js.tap +0 -73
- package/.tap/test-results/test/iterator-test.js.tap +0 -6
- package/.tap/test-results/test/leak-tester-batch.js.tap +0 -0
- package/.tap/test-results/test/leak-tester-iterator.js.tap +0 -0
- package/.tap/test-results/test/leak-tester.js.tap +0 -0
- package/.tap/test-results/test/lock-test.js.tap +0 -18
- package/.tap/test-results/test/lock.js.tap +0 -0
- package/.tap/test-results/test/make.js.tap +0 -0
- package/.tap/test-results/test/max-rev-merge.js.tap +0 -0
- package/.tap/test-results/test/merge-operator-test.js.tap +0 -12
- package/.tap/test-results/test/mkdir-test.js.tap +0 -15
- package/.tap/test-results/test/segfault-test.js.tap +0 -76
- package/.tap/test-results/test/stack-blower.js.tap +0 -0
- package/deps/rocksdb/rocksdb/README.md +0 -29
- package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
- package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
#pragma once
|
|
10
10
|
|
|
11
11
|
#include <atomic>
|
|
12
|
+
#include <cstdint>
|
|
12
13
|
#include <deque>
|
|
13
14
|
#include <functional>
|
|
14
15
|
#include <limits>
|
|
@@ -57,6 +58,7 @@
|
|
|
57
58
|
#include "rocksdb/status.h"
|
|
58
59
|
#include "rocksdb/trace_reader_writer.h"
|
|
59
60
|
#include "rocksdb/transaction_log.h"
|
|
61
|
+
#include "rocksdb/user_write_callback.h"
|
|
60
62
|
#include "rocksdb/utilities/replayer.h"
|
|
61
63
|
#include "rocksdb/write_buffer_manager.h"
|
|
62
64
|
#include "table/merging_iterator.h"
|
|
@@ -231,6 +233,10 @@ class DBImpl : public DB {
|
|
|
231
233
|
using DB::Write;
|
|
232
234
|
Status Write(const WriteOptions& options, WriteBatch* updates) override;
|
|
233
235
|
|
|
236
|
+
using DB::WriteWithCallback;
|
|
237
|
+
Status WriteWithCallback(const WriteOptions& options, WriteBatch* updates,
|
|
238
|
+
UserWriteCallback* user_write_cb) override;
|
|
239
|
+
|
|
234
240
|
using DB::Get;
|
|
235
241
|
Status Get(const ReadOptions& _read_options,
|
|
236
242
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
@@ -501,8 +507,16 @@ class DBImpl : public DB {
|
|
|
501
507
|
// All the returned filenames start with "/"
|
|
502
508
|
Status GetLiveFiles(std::vector<std::string>&, uint64_t* manifest_file_size,
|
|
503
509
|
bool flush_memtable = true) override;
|
|
504
|
-
Status GetSortedWalFiles(
|
|
505
|
-
Status
|
|
510
|
+
Status GetSortedWalFiles(VectorWalPtr& files) override;
|
|
511
|
+
Status GetSortedWalFilesImpl(VectorWalPtr& files, bool need_seqnos);
|
|
512
|
+
|
|
513
|
+
// Get the known flushed sizes of WALs that might still be written to
|
|
514
|
+
// or have pending sync.
|
|
515
|
+
// NOTE: unlike alive_log_files_, this function includes WALs that might
|
|
516
|
+
// be obsolete (but not obsolete to a pending Checkpoint) and not yet fully
|
|
517
|
+
// synced.
|
|
518
|
+
Status GetOpenWalSizes(std::map<uint64_t, uint64_t>& number_to_size);
|
|
519
|
+
Status GetCurrentWalFile(std::unique_ptr<WalFile>* current_log_file) override;
|
|
506
520
|
Status GetCreationTimeOfOldestFile(uint64_t* creation_time) override;
|
|
507
521
|
|
|
508
522
|
Status GetUpdatesSince(
|
|
@@ -688,7 +702,8 @@ class DBImpl : public DB {
|
|
|
688
702
|
// thread to determine whether it is safe to perform the write.
|
|
689
703
|
virtual Status WriteWithCallback(const WriteOptions& write_options,
|
|
690
704
|
WriteBatch* my_batch,
|
|
691
|
-
WriteCallback* callback
|
|
705
|
+
WriteCallback* callback,
|
|
706
|
+
UserWriteCallback* user_write_cb = nullptr);
|
|
692
707
|
|
|
693
708
|
// Returns the sequence number that is guaranteed to be smaller than or equal
|
|
694
709
|
// to the sequence number of any key that could be inserted into the current
|
|
@@ -1447,6 +1462,9 @@ class DBImpl : public DB {
|
|
|
1447
1462
|
Status RenameTempFileToOptionsFile(const std::string& file_name);
|
|
1448
1463
|
Status DeleteObsoleteOptionsFiles();
|
|
1449
1464
|
|
|
1465
|
+
void NotifyOnManualFlushScheduled(autovector<ColumnFamilyData*> cfds,
|
|
1466
|
+
FlushReason flush_reason);
|
|
1467
|
+
|
|
1450
1468
|
void NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
|
|
1451
1469
|
const MutableCFOptions& mutable_cf_options,
|
|
1452
1470
|
int job_id, FlushReason flush_reason);
|
|
@@ -1497,6 +1515,7 @@ class DBImpl : public DB {
|
|
|
1497
1515
|
// batch that does not have duplicate keys.
|
|
1498
1516
|
Status WriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1499
1517
|
WriteCallback* callback = nullptr,
|
|
1518
|
+
UserWriteCallback* user_write_cb = nullptr,
|
|
1500
1519
|
uint64_t* log_used = nullptr, uint64_t log_ref = 0,
|
|
1501
1520
|
bool disable_memtable = false, uint64_t* seq_used = nullptr,
|
|
1502
1521
|
size_t batch_cnt = 0,
|
|
@@ -1505,6 +1524,7 @@ class DBImpl : public DB {
|
|
|
1505
1524
|
|
|
1506
1525
|
Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1507
1526
|
WriteCallback* callback = nullptr,
|
|
1527
|
+
UserWriteCallback* user_write_cb = nullptr,
|
|
1508
1528
|
uint64_t* log_used = nullptr, uint64_t log_ref = 0,
|
|
1509
1529
|
bool disable_memtable = false,
|
|
1510
1530
|
uint64_t* seq_used = nullptr);
|
|
@@ -1531,7 +1551,8 @@ class DBImpl : public DB {
|
|
|
1531
1551
|
// marks start of a new sub-batch.
|
|
1532
1552
|
Status WriteImplWALOnly(
|
|
1533
1553
|
WriteThread* write_thread, const WriteOptions& options,
|
|
1534
|
-
WriteBatch* updates, WriteCallback* callback,
|
|
1554
|
+
WriteBatch* updates, WriteCallback* callback,
|
|
1555
|
+
UserWriteCallback* user_write_cb, uint64_t* log_used,
|
|
1535
1556
|
const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
|
|
1536
1557
|
PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
|
|
1537
1558
|
const PublishLastSeq publish_last_seq, const bool disable_memtable);
|
|
@@ -1703,8 +1724,11 @@ class DBImpl : public DB {
|
|
|
1703
1724
|
return w;
|
|
1704
1725
|
}
|
|
1705
1726
|
Status ClearWriter() {
|
|
1706
|
-
|
|
1707
|
-
|
|
1727
|
+
Status s;
|
|
1728
|
+
if (writer->file()) {
|
|
1729
|
+
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
1730
|
+
s = writer->WriteBuffer(WriteOptions());
|
|
1731
|
+
}
|
|
1708
1732
|
delete writer;
|
|
1709
1733
|
writer = nullptr;
|
|
1710
1734
|
return s;
|
|
@@ -1719,10 +1743,16 @@ class DBImpl : public DB {
|
|
|
1719
1743
|
|
|
1720
1744
|
void PrepareForSync() {
|
|
1721
1745
|
assert(!getting_synced);
|
|
1722
|
-
//
|
|
1723
|
-
assert(writer->file()->GetFlushedSize() >= pre_sync_size);
|
|
1746
|
+
// Ensure the head of logs_ is marked as getting_synced if any is.
|
|
1724
1747
|
getting_synced = true;
|
|
1725
|
-
|
|
1748
|
+
// If last sync failed on a later WAL, this could be a fully synced
|
|
1749
|
+
// and closed WAL that just needs to be recorded as synced in the
|
|
1750
|
+
// manifest.
|
|
1751
|
+
if (writer->file()) {
|
|
1752
|
+
// Size is expected to be monotonically increasing.
|
|
1753
|
+
assert(writer->file()->GetFlushedSize() >= pre_sync_size);
|
|
1754
|
+
pre_sync_size = writer->file()->GetFlushedSize();
|
|
1755
|
+
}
|
|
1726
1756
|
}
|
|
1727
1757
|
|
|
1728
1758
|
void FinishSync() {
|
|
@@ -1920,7 +1950,7 @@ class DBImpl : public DB {
|
|
|
1920
1950
|
void ReleaseFileNumberFromPendingOutputs(
|
|
1921
1951
|
std::unique_ptr<std::list<uint64_t>::iterator>& v);
|
|
1922
1952
|
|
|
1923
|
-
IOStatus
|
|
1953
|
+
IOStatus SyncClosedWals(const WriteOptions& write_options,
|
|
1924
1954
|
JobContext* job_context, VersionEdit* synced_wals,
|
|
1925
1955
|
bool error_recovery_in_prog);
|
|
1926
1956
|
|
|
@@ -2179,7 +2209,8 @@ class DBImpl : public DB {
|
|
|
2179
2209
|
void GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
|
|
2180
2210
|
FlushReason flush_reason, FlushRequest* req);
|
|
2181
2211
|
|
|
2182
|
-
|
|
2212
|
+
// Returns true if `req` is successfully enqueued.
|
|
2213
|
+
bool SchedulePendingFlush(const FlushRequest& req);
|
|
2183
2214
|
|
|
2184
2215
|
void SchedulePendingCompaction(ColumnFamilyData* cfd);
|
|
2185
2216
|
void SchedulePendingPurge(std::string fname, std::string dir_to_sync,
|
|
@@ -2255,6 +2286,11 @@ class DBImpl : public DB {
|
|
|
2255
2286
|
ColumnFamilyData* PickCompactionFromQueue(
|
|
2256
2287
|
std::unique_ptr<TaskLimiterToken>* token, LogBuffer* log_buffer);
|
|
2257
2288
|
|
|
2289
|
+
IOStatus SyncWalImpl(bool include_current_wal,
|
|
2290
|
+
const WriteOptions& write_options,
|
|
2291
|
+
JobContext* job_context, VersionEdit* synced_wals,
|
|
2292
|
+
bool error_recovery_in_prog);
|
|
2293
|
+
|
|
2258
2294
|
// helper function to call after some of the logs_ were synced
|
|
2259
2295
|
void MarkLogsSynced(uint64_t up_to, bool synced_dir, VersionEdit* edit);
|
|
2260
2296
|
Status ApplyWALToManifest(const ReadOptions& read_options,
|
|
@@ -2312,6 +2348,9 @@ class DBImpl : public DB {
|
|
|
2312
2348
|
bool HaveManualCompaction(ColumnFamilyData* cfd);
|
|
2313
2349
|
bool MCOverlap(ManualCompactionState* m, ManualCompactionState* m1);
|
|
2314
2350
|
void UpdateDeletionCompactionStats(const std::unique_ptr<Compaction>& c);
|
|
2351
|
+
|
|
2352
|
+
// May open and read table files for table property.
|
|
2353
|
+
// Should not be called while holding mutex_.
|
|
2315
2354
|
void BuildCompactionJobInfo(const ColumnFamilyData* cfd, Compaction* c,
|
|
2316
2355
|
const Status& st,
|
|
2317
2356
|
const CompactionJobStats& compaction_job_stats,
|
|
@@ -2527,8 +2566,10 @@ class DBImpl : public DB {
|
|
|
2527
2566
|
|
|
2528
2567
|
bool persistent_stats_cfd_exists_ = true;
|
|
2529
2568
|
|
|
2530
|
-
//
|
|
2531
|
-
//
|
|
2569
|
+
// The current WAL file and those that have not been found obsolete from
|
|
2570
|
+
// memtable flushes. A WAL not on this list might still be pending writer
|
|
2571
|
+
// flush and/or sync and close and might still be in logs_. alive_log_files_
|
|
2572
|
+
// is protected by mutex_ and log_write_mutex_ with details as follows:
|
|
2532
2573
|
// 1. read by FindObsoleteFiles() which can be called in either application
|
|
2533
2574
|
// thread or RocksDB bg threads, both mutex_ and log_write_mutex_ are
|
|
2534
2575
|
// held.
|
|
@@ -2579,7 +2620,7 @@ class DBImpl : public DB {
|
|
|
2579
2620
|
// 8. read by MarkLogsNotSynced() and MarkLogsSynced() are protected by
|
|
2580
2621
|
// log_write_mutex_.
|
|
2581
2622
|
// 9. erase() by MarkLogsSynced() protected by log_write_mutex_.
|
|
2582
|
-
// 10. read by
|
|
2623
|
+
// 10. read by SyncClosedWals() protected by only log_write_mutex_. This can
|
|
2583
2624
|
// happen in bg flush threads after DB::Open() returns success to
|
|
2584
2625
|
// applications.
|
|
2585
2626
|
// 11. reads, e.g. front(), iteration, and back() called by PreprocessWrite()
|
|
@@ -2592,7 +2633,7 @@ class DBImpl : public DB {
|
|
|
2592
2633
|
// 13. emplace_back() by SwitchMemtable() hold both mutex_ and
|
|
2593
2634
|
// log_write_mutex_. This happens in the write group leader. Can conflict
|
|
2594
2635
|
// with bg threads calling FindObsoleteFiles(), MarkLogsSynced(),
|
|
2595
|
-
//
|
|
2636
|
+
// SyncClosedWals(), etc. as well as application threads calling
|
|
2596
2637
|
// FlushWAL(), SyncWAL(), LockWAL(). This is fine because all parties
|
|
2597
2638
|
// require at least log_write_mutex_.
|
|
2598
2639
|
// 14. iteration called in WriteToWAL(write_group) protected by
|
|
@@ -87,6 +87,9 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
|
|
|
87
87
|
mutex_.AssertHeld();
|
|
88
88
|
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
89
89
|
ColumnFamilyData* cfd = flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
90
|
+
if (cfd->GetAndClearFlushSkipReschedule()) {
|
|
91
|
+
return false;
|
|
92
|
+
}
|
|
90
93
|
uint64_t max_memtable_id =
|
|
91
94
|
flush_req.cfd_to_max_mem_id_to_persist.begin()->second;
|
|
92
95
|
if (cfd->IsDropped() ||
|
|
@@ -98,15 +101,20 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
|
|
|
98
101
|
// alleviated if we continue with the flush instead of postponing it.
|
|
99
102
|
const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
|
|
100
103
|
|
|
101
|
-
//
|
|
102
|
-
//
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
// Use the same criteria as WaitUntilFlushWouldNotStallWrites does w.r.t
|
|
105
|
+
// defining what a write stall is about to happen means. If this uses a
|
|
106
|
+
// stricter criteria, for example, a write stall is about to happen if the
|
|
107
|
+
// last memtable is 10% full, there is a possibility that manual flush could
|
|
108
|
+
// be waiting in `WaitUntilFlushWouldNotStallWrites` with the incorrect
|
|
109
|
+
// expectation that others will clear up the excessive memtables and
|
|
110
|
+
// eventually let it proceed. The others in this case won't start clearing
|
|
111
|
+
// until the last memtable is 10% full. To avoid that scenario, the criteria
|
|
112
|
+
// this uses should be the same or less strict than
|
|
113
|
+
// `WaitUntilFlushWouldNotStallWrites` does.
|
|
107
114
|
WriteStallCondition write_stall =
|
|
108
115
|
ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
109
|
-
cfd->
|
|
116
|
+
cfd->GetUnflushedMemTableCountForWriteStallCheck(),
|
|
117
|
+
/*num_l0_files=*/0,
|
|
110
118
|
/*num_compaction_needed_bytes=*/0, mutable_cf_options,
|
|
111
119
|
*cfd->ioptions())
|
|
112
120
|
.first;
|
|
@@ -116,89 +124,19 @@ bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
|
|
|
116
124
|
return true;
|
|
117
125
|
}
|
|
118
126
|
|
|
119
|
-
IOStatus DBImpl::
|
|
127
|
+
IOStatus DBImpl::SyncClosedWals(const WriteOptions& write_options,
|
|
120
128
|
JobContext* job_context,
|
|
121
129
|
VersionEdit* synced_wals,
|
|
122
130
|
bool error_recovery_in_prog) {
|
|
123
|
-
TEST_SYNC_POINT("DBImpl::
|
|
124
|
-
InstrumentedMutexLock l(&log_write_mutex_);
|
|
125
|
-
autovector<log::Writer*, 1> logs_to_sync;
|
|
126
|
-
uint64_t current_log_number = logfile_number_;
|
|
127
|
-
while (logs_.front().number < current_log_number &&
|
|
128
|
-
logs_.front().IsSyncing()) {
|
|
129
|
-
log_sync_cv_.Wait();
|
|
130
|
-
}
|
|
131
|
-
for (auto it = logs_.begin();
|
|
132
|
-
it != logs_.end() && it->number < current_log_number; ++it) {
|
|
133
|
-
auto& log = *it;
|
|
134
|
-
log.PrepareForSync();
|
|
135
|
-
logs_to_sync.push_back(log.writer);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
IOStatus io_s;
|
|
139
|
-
if (!logs_to_sync.empty()) {
|
|
140
|
-
log_write_mutex_.Unlock();
|
|
141
|
-
|
|
142
|
-
assert(job_context);
|
|
143
|
-
|
|
144
|
-
for (log::Writer* log : logs_to_sync) {
|
|
145
|
-
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
|
146
|
-
"[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
|
|
147
|
-
log->get_log_number());
|
|
148
|
-
if (error_recovery_in_prog) {
|
|
149
|
-
log->file()->reset_seen_error();
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
IOOptions io_options;
|
|
153
|
-
io_s = WritableFileWriter::PrepareIOOptions(write_options, io_options);
|
|
154
|
-
if (!io_s.ok()) {
|
|
155
|
-
break;
|
|
156
|
-
}
|
|
157
|
-
io_s = log->file()->Sync(io_options, immutable_db_options_.use_fsync);
|
|
158
|
-
if (!io_s.ok()) {
|
|
159
|
-
break;
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
if (immutable_db_options_.recycle_log_file_num > 0) {
|
|
163
|
-
if (error_recovery_in_prog) {
|
|
164
|
-
log->file()->reset_seen_error();
|
|
165
|
-
}
|
|
166
|
-
// Normally the log file is closed when purging obsolete file, but if
|
|
167
|
-
// log recycling is enabled, the log file is closed here so that it
|
|
168
|
-
// can be reused.
|
|
169
|
-
io_s = log->Close(write_options);
|
|
170
|
-
if (!io_s.ok()) {
|
|
171
|
-
break;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
}
|
|
175
|
-
if (io_s.ok()) {
|
|
176
|
-
IOOptions io_options;
|
|
177
|
-
io_s = WritableFileWriter::PrepareIOOptions(write_options, io_options);
|
|
178
|
-
if (io_s.ok()) {
|
|
179
|
-
io_s = directories_.GetWalDir()->FsyncWithDirOptions(
|
|
180
|
-
io_options, nullptr,
|
|
181
|
-
DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
|
|
185
|
-
TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock",
|
|
186
|
-
/*arg=*/nullptr);
|
|
187
|
-
log_write_mutex_.Lock();
|
|
131
|
+
TEST_SYNC_POINT("DBImpl::SyncClosedWals:Start");
|
|
188
132
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
}
|
|
196
|
-
if (!io_s.ok()) {
|
|
197
|
-
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
|
|
198
|
-
return io_s;
|
|
199
|
-
}
|
|
133
|
+
IOStatus io_s = SyncWalImpl(/*include_current_wal*/ false, write_options,
|
|
134
|
+
job_context, synced_wals, error_recovery_in_prog);
|
|
135
|
+
if (!io_s.ok()) {
|
|
136
|
+
TEST_SYNC_POINT("DBImpl::SyncClosedWals:Failed");
|
|
137
|
+
} else {
|
|
138
|
+
TEST_SYNC_POINT("DBImpl::SyncClosedWals:end");
|
|
200
139
|
}
|
|
201
|
-
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:end");
|
|
202
140
|
return io_s;
|
|
203
141
|
}
|
|
204
142
|
|
|
@@ -237,12 +175,12 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
237
175
|
// If needs_to_sync_closed_wals is true, we need to record the current
|
|
238
176
|
// maximum memtable ID of this column family so that a later PickMemtables()
|
|
239
177
|
// call will not pick memtables whose IDs are higher. This is due to the fact
|
|
240
|
-
// that
|
|
178
|
+
// that SyncClosedWals() may release the db mutex, and memtable switch can
|
|
241
179
|
// happen for this column family in the meantime. The newly created memtables
|
|
242
180
|
// have their data backed by unsynced WALs, thus they cannot be included in
|
|
243
181
|
// this flush job.
|
|
244
182
|
// Another reason why we must record the current maximum memtable ID of this
|
|
245
|
-
// column family:
|
|
183
|
+
// column family: SyncClosedWals() may release db mutex, thus it's possible
|
|
246
184
|
// for application to continue to insert into memtables increasing db's
|
|
247
185
|
// sequence number. The application may take a snapshot, but this snapshot is
|
|
248
186
|
// not included in `snapshot_seqs` which will be passed to flush job because
|
|
@@ -256,7 +194,7 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
256
194
|
|
|
257
195
|
// If needs_to_sync_closed_wals is false, then the flush job will pick ALL
|
|
258
196
|
// existing memtables of the column family when PickMemTable() is called
|
|
259
|
-
// later. Although we won't call
|
|
197
|
+
// later. Although we won't call SyncClosedWals() in this case, we may still
|
|
260
198
|
// call the callbacks of the listeners, i.e. NotifyOnFlushBegin() which also
|
|
261
199
|
// releases and re-acquires the db mutex. In the meantime, the application
|
|
262
200
|
// can still insert into the memtables and increase the db's sequence number.
|
|
@@ -286,12 +224,12 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
286
224
|
bool need_cancel = false;
|
|
287
225
|
IOStatus log_io_s = IOStatus::OK();
|
|
288
226
|
if (needs_to_sync_closed_wals) {
|
|
289
|
-
//
|
|
227
|
+
// SyncClosedWals() may unlock and re-lock the log_write_mutex multiple
|
|
290
228
|
// times.
|
|
291
229
|
VersionEdit synced_wals;
|
|
292
230
|
bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
|
|
293
231
|
mutex_.Unlock();
|
|
294
|
-
log_io_s =
|
|
232
|
+
log_io_s = SyncClosedWals(write_options, job_context, &synced_wals,
|
|
295
233
|
error_recovery_in_prog);
|
|
296
234
|
mutex_.Lock();
|
|
297
235
|
if (log_io_s.ok() && synced_wals.IsWalAddition()) {
|
|
@@ -306,7 +244,7 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
306
244
|
error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
|
|
307
245
|
}
|
|
308
246
|
} else {
|
|
309
|
-
TEST_SYNC_POINT("DBImpl::
|
|
247
|
+
TEST_SYNC_POINT("DBImpl::SyncClosedWals:Skip");
|
|
310
248
|
}
|
|
311
249
|
s = log_io_s;
|
|
312
250
|
|
|
@@ -580,7 +518,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
580
518
|
VersionEdit synced_wals;
|
|
581
519
|
bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
|
|
582
520
|
mutex_.Unlock();
|
|
583
|
-
log_io_s =
|
|
521
|
+
log_io_s = SyncClosedWals(write_options, job_context, &synced_wals,
|
|
584
522
|
error_recovery_in_prog);
|
|
585
523
|
mutex_.Lock();
|
|
586
524
|
if (log_io_s.ok() && synced_wals.IsWalAddition()) {
|
|
@@ -1657,10 +1595,12 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1657
1595
|
|
|
1658
1596
|
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
|
|
1659
1597
|
|
|
1598
|
+
mutex_.Unlock();
|
|
1660
1599
|
if (compaction_job_info != nullptr) {
|
|
1661
1600
|
BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats,
|
|
1662
1601
|
job_context->job_id, compaction_job_info);
|
|
1663
1602
|
}
|
|
1603
|
+
mutex_.Lock();
|
|
1664
1604
|
|
|
1665
1605
|
if (status.ok()) {
|
|
1666
1606
|
// Done
|
|
@@ -2310,6 +2250,23 @@ void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
|
|
|
2310
2250
|
}
|
|
2311
2251
|
}
|
|
2312
2252
|
|
|
2253
|
+
void DBImpl::NotifyOnManualFlushScheduled(autovector<ColumnFamilyData*> cfds,
|
|
2254
|
+
FlushReason flush_reason) {
|
|
2255
|
+
if (immutable_db_options_.listeners.size() == 0U) {
|
|
2256
|
+
return;
|
|
2257
|
+
}
|
|
2258
|
+
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
2259
|
+
return;
|
|
2260
|
+
}
|
|
2261
|
+
std::vector<ManualFlushInfo> info;
|
|
2262
|
+
for (ColumnFamilyData* cfd : cfds) {
|
|
2263
|
+
info.push_back({cfd->GetID(), cfd->GetName(), flush_reason});
|
|
2264
|
+
}
|
|
2265
|
+
for (const auto& listener : immutable_db_options_.listeners) {
|
|
2266
|
+
listener->OnManualFlushScheduled(this, info);
|
|
2267
|
+
}
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2313
2270
|
Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
2314
2271
|
const FlushOptions& flush_options,
|
|
2315
2272
|
FlushReason flush_reason,
|
|
@@ -2414,7 +2371,14 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2414
2371
|
}
|
|
2415
2372
|
}
|
|
2416
2373
|
for (const auto& req : flush_reqs) {
|
|
2417
|
-
|
|
2374
|
+
assert(req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
2375
|
+
ColumnFamilyData* loop_cfd =
|
|
2376
|
+
req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
2377
|
+
bool already_queued_for_flush = loop_cfd->queued_for_flush();
|
|
2378
|
+
bool flush_req_enqueued = SchedulePendingFlush(req);
|
|
2379
|
+
if (already_queued_for_flush || flush_req_enqueued) {
|
|
2380
|
+
loop_cfd->SetFlushSkipReschedule();
|
|
2381
|
+
}
|
|
2418
2382
|
}
|
|
2419
2383
|
MaybeScheduleFlushOrCompaction();
|
|
2420
2384
|
}
|
|
@@ -2426,6 +2390,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2426
2390
|
}
|
|
2427
2391
|
}
|
|
2428
2392
|
}
|
|
2393
|
+
|
|
2394
|
+
NotifyOnManualFlushScheduled({cfd}, flush_reason);
|
|
2429
2395
|
TEST_SYNC_POINT("DBImpl::FlushMemTable:AfterScheduleFlush");
|
|
2430
2396
|
TEST_SYNC_POINT("DBImpl::FlushMemTable:BeforeWaitForBgFlush");
|
|
2431
2397
|
if (s.ok() && flush_options.wait) {
|
|
@@ -2570,6 +2536,7 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2570
2536
|
}
|
|
2571
2537
|
}
|
|
2572
2538
|
}
|
|
2539
|
+
NotifyOnManualFlushScheduled(cfds, flush_reason);
|
|
2573
2540
|
TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:AfterScheduleFlush");
|
|
2574
2541
|
TEST_SYNC_POINT("DBImpl::AtomicFlushMemTables:BeforeWaitForBgFlush");
|
|
2575
2542
|
if (s.ok() && flush_options.wait) {
|
|
@@ -2627,7 +2594,9 @@ Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
|
|
|
2627
2594
|
flush_reason,
|
|
2628
2595
|
{{cfd,
|
|
2629
2596
|
std::numeric_limits<uint64_t>::max() /* max_mem_id_to_persist */}}};
|
|
2630
|
-
SchedulePendingFlush(flush_req)
|
|
2597
|
+
if (SchedulePendingFlush(flush_req)) {
|
|
2598
|
+
cfd->SetFlushSkipReschedule();
|
|
2599
|
+
};
|
|
2631
2600
|
}
|
|
2632
2601
|
}
|
|
2633
2602
|
MaybeScheduleFlushOrCompaction();
|
|
@@ -2715,13 +2684,13 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|
|
2715
2684
|
// mode due to pending compaction bytes, but that's less common
|
|
2716
2685
|
// No extra immutable Memtable will be created if the current Memtable is
|
|
2717
2686
|
// empty.
|
|
2718
|
-
|
|
2719
|
-
|
|
2720
|
-
|
|
2721
|
-
|
|
2722
|
-
|
|
2723
|
-
|
|
2724
|
-
|
|
2687
|
+
write_stall_condition =
|
|
2688
|
+
ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
2689
|
+
cfd->GetUnflushedMemTableCountForWriteStallCheck(),
|
|
2690
|
+
vstorage->l0_delay_trigger_count() + 1,
|
|
2691
|
+
vstorage->estimated_compaction_needed_bytes(), mutable_cf_options,
|
|
2692
|
+
*cfd->ioptions())
|
|
2693
|
+
.first;
|
|
2725
2694
|
} while (write_stall_condition != WriteStallCondition::kNormal);
|
|
2726
2695
|
}
|
|
2727
2696
|
return Status::OK();
|
|
@@ -3033,13 +3002,14 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
|
|
|
3033
3002
|
return cfd;
|
|
3034
3003
|
}
|
|
3035
3004
|
|
|
3036
|
-
|
|
3005
|
+
bool DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
3037
3006
|
mutex_.AssertHeld();
|
|
3007
|
+
bool enqueued = false;
|
|
3038
3008
|
if (reject_new_background_jobs_) {
|
|
3039
|
-
return;
|
|
3009
|
+
return enqueued;
|
|
3040
3010
|
}
|
|
3041
3011
|
if (flush_req.cfd_to_max_mem_id_to_persist.empty()) {
|
|
3042
|
-
return;
|
|
3012
|
+
return enqueued;
|
|
3043
3013
|
}
|
|
3044
3014
|
if (!immutable_db_options_.atomic_flush) {
|
|
3045
3015
|
// For the non-atomic flush case, we never schedule multiple column
|
|
@@ -3054,6 +3024,7 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
|
3054
3024
|
cfd->set_queued_for_flush(true);
|
|
3055
3025
|
++unscheduled_flushes_;
|
|
3056
3026
|
flush_queue_.push_back(flush_req);
|
|
3027
|
+
enqueued = true;
|
|
3057
3028
|
}
|
|
3058
3029
|
} else {
|
|
3059
3030
|
for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
|
|
@@ -3062,7 +3033,9 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
|
3062
3033
|
}
|
|
3063
3034
|
++unscheduled_flushes_;
|
|
3064
3035
|
flush_queue_.push_back(flush_req);
|
|
3036
|
+
enqueued = true;
|
|
3065
3037
|
}
|
|
3038
|
+
return enqueued;
|
|
3066
3039
|
}
|
|
3067
3040
|
|
|
3068
3041
|
void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
|
|
@@ -4190,7 +4163,7 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
4190
4163
|
compaction_job_info->base_input_level = c->start_level();
|
|
4191
4164
|
compaction_job_info->output_level = c->output_level();
|
|
4192
4165
|
compaction_job_info->stats = compaction_job_stats;
|
|
4193
|
-
const auto& input_table_properties = c->
|
|
4166
|
+
const auto& input_table_properties = c->GetOrInitInputTableProperties();
|
|
4194
4167
|
const auto& output_table_properties = c->GetOutputTableProperties();
|
|
4195
4168
|
compaction_job_info->table_properties.insert(input_table_properties.begin(),
|
|
4196
4169
|
input_table_properties.end());
|
|
@@ -312,6 +312,26 @@ void DBImpl::FindObsoleteFiles(JobContext* job_context, bool force,
|
|
|
312
312
|
// logs_ could have changed while we were waiting.
|
|
313
313
|
continue;
|
|
314
314
|
}
|
|
315
|
+
// This WAL file is not live, so it's OK if we never sync the rest of it.
|
|
316
|
+
// If it's already closed, then it's been fully synced. If
|
|
317
|
+
// !background_close_inactive_wals then we need to Close it before
|
|
318
|
+
// removing from logs_ but not blocking while holding log_write_mutex_.
|
|
319
|
+
if (!immutable_db_options_.background_close_inactive_wals &&
|
|
320
|
+
log.writer->file()) {
|
|
321
|
+
// We are taking ownership of and pinning the front entry, so we can
|
|
322
|
+
// expect it to be the same after releasing and re-acquiring the lock
|
|
323
|
+
log.PrepareForSync();
|
|
324
|
+
log_write_mutex_.Unlock();
|
|
325
|
+
// TODO: maybe check the return value of Close.
|
|
326
|
+
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
327
|
+
auto s = log.writer->file()->Close({});
|
|
328
|
+
s.PermitUncheckedError();
|
|
329
|
+
log_write_mutex_.Lock();
|
|
330
|
+
log.writer->PublishIfClosed();
|
|
331
|
+
assert(&log == &logs_.front());
|
|
332
|
+
log.FinishSync();
|
|
333
|
+
log_sync_cv_.SignalAll();
|
|
334
|
+
}
|
|
315
335
|
logs_to_free_.push_back(log.ReleaseWriter());
|
|
316
336
|
logs_.pop_front();
|
|
317
337
|
}
|
|
@@ -410,12 +430,24 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
|
|
|
410
430
|
state.manifest_delete_files.size());
|
|
411
431
|
// We may ignore the dbname when generating the file names.
|
|
412
432
|
for (auto& file : state.sst_delete_files) {
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
433
|
+
auto* handle = file.metadata->table_reader_handle;
|
|
434
|
+
if (file.only_delete_metadata) {
|
|
435
|
+
if (handle) {
|
|
436
|
+
// Simply release handle of file that is not being deleted
|
|
437
|
+
table_cache_->Release(handle);
|
|
438
|
+
}
|
|
439
|
+
} else {
|
|
440
|
+
// File is being deleted (actually obsolete)
|
|
441
|
+
auto number = file.metadata->fd.GetNumber();
|
|
442
|
+
candidate_files.emplace_back(MakeTableFileName(number), file.path);
|
|
443
|
+
if (handle == nullptr) {
|
|
444
|
+
// For files not "pinned" in table cache
|
|
445
|
+
handle = TableCache::Lookup(table_cache_.get(), number);
|
|
446
|
+
}
|
|
447
|
+
if (handle) {
|
|
448
|
+
TableCache::ReleaseObsolete(table_cache_.get(), handle,
|
|
449
|
+
file.uncache_aggressiveness);
|
|
450
|
+
}
|
|
419
451
|
}
|
|
420
452
|
file.DeleteMetadata();
|
|
421
453
|
}
|
|
@@ -491,7 +523,7 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
|
|
|
491
523
|
for (const auto w : state.logs_to_free) {
|
|
492
524
|
// TODO: maybe check the return value of Close.
|
|
493
525
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
494
|
-
auto s = w->Close(
|
|
526
|
+
auto s = w->Close({});
|
|
495
527
|
s.PermitUncheckedError();
|
|
496
528
|
}
|
|
497
529
|
|
|
@@ -577,8 +609,6 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
|
|
|
577
609
|
std::string fname;
|
|
578
610
|
std::string dir_to_sync;
|
|
579
611
|
if (type == kTableFile) {
|
|
580
|
-
// evict from cache
|
|
581
|
-
TableCache::Evict(table_cache_.get(), number);
|
|
582
612
|
fname = MakeTableFileName(candidate_file.file_path, number);
|
|
583
613
|
dir_to_sync = candidate_file.file_path;
|
|
584
614
|
} else if (type == kBlobFile) {
|