@nxtedition/rocksdb 13.1.4 → 13.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +43 -16
- package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
- package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
- package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
- package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
- package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
- package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
- package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
- package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
- package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
- package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
- package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
- package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
- package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
- package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
- package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
- package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
- package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
- package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
- package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
- package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
- package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
- package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
- package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
- package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
- package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
- package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
- package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
- package/deps/rocksdb/rocksdb/port/port.h +5 -9
- package/deps/rocksdb/rocksdb/src.mk +8 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/format.cc +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
- package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
- package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
- package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
- package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -48,6 +48,7 @@
|
|
|
48
48
|
#include "db/write_controller.h"
|
|
49
49
|
#include "db/write_thread.h"
|
|
50
50
|
#include "logging/event_logger.h"
|
|
51
|
+
#include "memtable/wbwi_memtable.h"
|
|
51
52
|
#include "monitoring/instrumented_mutex.h"
|
|
52
53
|
#include "options/db_options.h"
|
|
53
54
|
#include "port/port.h"
|
|
@@ -60,6 +61,7 @@
|
|
|
60
61
|
#include "rocksdb/transaction_log.h"
|
|
61
62
|
#include "rocksdb/user_write_callback.h"
|
|
62
63
|
#include "rocksdb/utilities/replayer.h"
|
|
64
|
+
#include "rocksdb/utilities/write_batch_with_index.h"
|
|
63
65
|
#include "rocksdb/write_buffer_manager.h"
|
|
64
66
|
#include "table/merging_iterator.h"
|
|
65
67
|
#include "util/autovector.h"
|
|
@@ -363,12 +365,10 @@ class DBImpl : public DB {
|
|
|
363
365
|
const Snapshot* GetSnapshot() override;
|
|
364
366
|
void ReleaseSnapshot(const Snapshot* snapshot) override;
|
|
365
367
|
|
|
366
|
-
// EXPERIMENTAL
|
|
367
368
|
std::unique_ptr<Iterator> NewCoalescingIterator(
|
|
368
369
|
const ReadOptions& options,
|
|
369
370
|
const std::vector<ColumnFamilyHandle*>& column_families) override;
|
|
370
371
|
|
|
371
|
-
// EXPERIMENTAL
|
|
372
372
|
std::unique_ptr<AttributeGroupIterator> NewAttributeGroupIterator(
|
|
373
373
|
const ReadOptions& options,
|
|
374
374
|
const std::vector<ColumnFamilyHandle*>& column_families) override;
|
|
@@ -482,7 +482,8 @@ class DBImpl : public DB {
|
|
|
482
482
|
|
|
483
483
|
Status GetDbIdentity(std::string& identity) const override;
|
|
484
484
|
|
|
485
|
-
virtual Status GetDbIdentityFromIdentityFile(
|
|
485
|
+
virtual Status GetDbIdentityFromIdentityFile(const IOOptions& opts,
|
|
486
|
+
std::string* identity) const;
|
|
486
487
|
|
|
487
488
|
Status GetDbSessionId(std::string& session_id) const override;
|
|
488
489
|
|
|
@@ -1199,9 +1200,7 @@ class DBImpl : public DB {
|
|
|
1199
1200
|
|
|
1200
1201
|
uint64_t TEST_total_log_size() const { return total_log_size_; }
|
|
1201
1202
|
|
|
1202
|
-
|
|
1203
|
-
Status TEST_GetAllImmutableCFOptions(
|
|
1204
|
-
std::unordered_map<std::string, const ImmutableCFOptions*>* iopts_map);
|
|
1203
|
+
void TEST_GetAllBlockCaches(std::unordered_set<const Cache*>* cache_set);
|
|
1205
1204
|
|
|
1206
1205
|
// Return the lastest MutableCFOptions of a column family
|
|
1207
1206
|
Status TEST_GetLatestMutableCFOptions(ColumnFamilyHandle* column_family,
|
|
@@ -1470,7 +1469,8 @@ class DBImpl : public DB {
|
|
|
1470
1469
|
// The following two functions can only be called when:
|
|
1471
1470
|
// 1. WriteThread::Writer::EnterUnbatched() is used.
|
|
1472
1471
|
// 2. db_mutex is NOT held
|
|
1473
|
-
Status RenameTempFileToOptionsFile(const std::string& file_name
|
|
1472
|
+
Status RenameTempFileToOptionsFile(const std::string& file_name,
|
|
1473
|
+
bool is_remote_compaction_enabled);
|
|
1474
1474
|
Status DeleteObsoleteOptionsFiles();
|
|
1475
1475
|
|
|
1476
1476
|
void NotifyOnManualFlushScheduled(autovector<ColumnFamilyData*> cfds,
|
|
@@ -1509,6 +1509,23 @@ class DBImpl : public DB {
|
|
|
1509
1509
|
|
|
1510
1510
|
void EraseThreadStatusDbInfo() const;
|
|
1511
1511
|
|
|
1512
|
+
// For CFs that has updates in `wbwi`, their memtable will be switched,
|
|
1513
|
+
// and `wbwi` will be added as the latest immutable memtable.
|
|
1514
|
+
//
|
|
1515
|
+
// REQUIRES: this thread is currently at the front of the main writer queue.
|
|
1516
|
+
// @param prep_log refers to the WAL that contains prepare record
|
|
1517
|
+
// for the transaction based on wbwi.
|
|
1518
|
+
// @param assigned_seqno Sequence numbers for the ingested memtable.
|
|
1519
|
+
// @param last_seqno the value of versions_->LastSequence() after the write
|
|
1520
|
+
// ingests `wbwi` is done.
|
|
1521
|
+
// @param memtable_updated Whether the same write that ingests wbwi has
|
|
1522
|
+
// updated memtable. This is useful for determining whether to set bg
|
|
1523
|
+
// error when IngestWBWI fails.
|
|
1524
|
+
Status IngestWBWI(std::shared_ptr<WriteBatchWithIndex> wbwi,
|
|
1525
|
+
const WBWIMemTable::SeqnoRange& assigned_seqno,
|
|
1526
|
+
uint64_t min_prep_log, SequenceNumber last_seqno,
|
|
1527
|
+
bool memtable_updated, bool ignore_missing_cf);
|
|
1528
|
+
|
|
1512
1529
|
// If disable_memtable is set the application logic must guarantee that the
|
|
1513
1530
|
// batch will still be skipped from memtable during the recovery. An excption
|
|
1514
1531
|
// to this is seq_per_batch_ mode, in which since each batch already takes one
|
|
@@ -1524,6 +1541,16 @@ class DBImpl : public DB {
|
|
|
1524
1541
|
// batch_cnt is expected to be non-zero in seq_per_batch mode and
|
|
1525
1542
|
// indicates the number of sub-patches. A sub-patch is a subset of the write
|
|
1526
1543
|
// batch that does not have duplicate keys.
|
|
1544
|
+
// `callback` is called before WAL write.
|
|
1545
|
+
// See more in comment above WriteCallback::Callback().
|
|
1546
|
+
// pre_release_callback is called after WAL write and before memtable write.
|
|
1547
|
+
// See more in comment above PreReleaseCallback::Callback().
|
|
1548
|
+
// post_memtable_callback is called after memtable write but before publishing
|
|
1549
|
+
// the sequence number to readers.
|
|
1550
|
+
//
|
|
1551
|
+
// The main write queue. This is the only write queue that updates
|
|
1552
|
+
// LastSequence. When using one write queue, the same sequence also indicates
|
|
1553
|
+
// the last published sequence.
|
|
1527
1554
|
Status WriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1528
1555
|
WriteCallback* callback = nullptr,
|
|
1529
1556
|
UserWriteCallback* user_write_cb = nullptr,
|
|
@@ -1531,7 +1558,9 @@ class DBImpl : public DB {
|
|
|
1531
1558
|
bool disable_memtable = false, uint64_t* seq_used = nullptr,
|
|
1532
1559
|
size_t batch_cnt = 0,
|
|
1533
1560
|
PreReleaseCallback* pre_release_callback = nullptr,
|
|
1534
|
-
PostMemTableCallback* post_memtable_callback = nullptr
|
|
1561
|
+
PostMemTableCallback* post_memtable_callback = nullptr,
|
|
1562
|
+
std::shared_ptr<WriteBatchWithIndex> wbwi = nullptr,
|
|
1563
|
+
uint64_t min_prep_log = 0);
|
|
1535
1564
|
|
|
1536
1565
|
Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1537
1566
|
WriteCallback* callback = nullptr,
|
|
@@ -1594,7 +1623,7 @@ class DBImpl : public DB {
|
|
|
1594
1623
|
// Read/create DB identity file (as appropriate), and write DB ID to
|
|
1595
1624
|
// version_edit if provided.
|
|
1596
1625
|
Status SetupDBId(const WriteOptions& write_options, bool read_only,
|
|
1597
|
-
bool is_new_db, VersionEdit* version_edit);
|
|
1626
|
+
bool is_new_db, bool is_retry, VersionEdit* version_edit);
|
|
1598
1627
|
// Assign db_id_ and write DB ID to version_edit if provided.
|
|
1599
1628
|
void SetDBId(std::string&& id, bool read_only, VersionEdit* version_edit);
|
|
1600
1629
|
|
|
@@ -1711,7 +1740,7 @@ class DBImpl : public DB {
|
|
|
1711
1740
|
|
|
1712
1741
|
struct WriteContext {
|
|
1713
1742
|
SuperVersionContext superversion_context;
|
|
1714
|
-
autovector<
|
|
1743
|
+
autovector<ReadOnlyMemTable*> memtables_to_free_;
|
|
1715
1744
|
|
|
1716
1745
|
explicit WriteContext(bool create_superversion = false)
|
|
1717
1746
|
: superversion_context(create_superversion) {}
|
|
@@ -2053,7 +2082,21 @@ class DBImpl : public DB {
|
|
|
2053
2082
|
|
|
2054
2083
|
Status TrimMemtableHistory(WriteContext* context);
|
|
2055
2084
|
|
|
2056
|
-
|
|
2085
|
+
// Switches the current live memtable to immutable/read-only memtable.
|
|
2086
|
+
// A new WAL is created if the current WAL is not empty.
|
|
2087
|
+
// If `new_imm` is not nullptr, it will be added as the newest immutable
|
|
2088
|
+
// memtable, if and only if OK status is returned.
|
|
2089
|
+
// `last_seqno` needs to be provided if `new_imm` is not nullptr. It is
|
|
2090
|
+
// the value of versions_->LastSequence() after the write that ingests new_imm
|
|
2091
|
+
// is done.
|
|
2092
|
+
//
|
|
2093
|
+
// REQUIRES: mutex_ is held
|
|
2094
|
+
// REQUIRES: this thread is currently at the front of the writer queue
|
|
2095
|
+
// REQUIRES: this thread is currently at the front of the 2nd writer queue if
|
|
2096
|
+
// two_write_queues_ is true (This is to simplify the reasoning.)
|
|
2097
|
+
Status SwitchMemtable(ColumnFamilyData* cfd, WriteContext* context,
|
|
2098
|
+
ReadOnlyMemTable* new_imm = nullptr,
|
|
2099
|
+
SequenceNumber last_seqno = 0);
|
|
2057
2100
|
|
|
2058
2101
|
// Select and output column families qualified for atomic flush in
|
|
2059
2102
|
// `selected_cfds`. If `provided_candidate_cfds` is non-empty, it will be used
|
|
@@ -2091,17 +2134,18 @@ class DBImpl : public DB {
|
|
|
2091
2134
|
// memtable pending flush.
|
|
2092
2135
|
// resuming_from_bg_err indicates whether the caller is attempting to resume
|
|
2093
2136
|
// from background error.
|
|
2094
|
-
Status WaitForFlushMemTable(
|
|
2095
|
-
|
|
2096
|
-
|
|
2137
|
+
Status WaitForFlushMemTable(
|
|
2138
|
+
ColumnFamilyData* cfd, const uint64_t* flush_memtable_id = nullptr,
|
|
2139
|
+
bool resuming_from_bg_err = false,
|
|
2140
|
+
std::optional<FlushReason> flush_reason = std::nullopt) {
|
|
2097
2141
|
return WaitForFlushMemTables({cfd}, {flush_memtable_id},
|
|
2098
|
-
resuming_from_bg_err);
|
|
2142
|
+
resuming_from_bg_err, flush_reason);
|
|
2099
2143
|
}
|
|
2100
2144
|
// Wait for memtables to be flushed for multiple column families.
|
|
2101
2145
|
Status WaitForFlushMemTables(
|
|
2102
2146
|
const autovector<ColumnFamilyData*>& cfds,
|
|
2103
2147
|
const autovector<const uint64_t*>& flush_memtable_ids,
|
|
2104
|
-
bool resuming_from_bg_err);
|
|
2148
|
+
bool resuming_from_bg_err, std::optional<FlushReason> flush_reason);
|
|
2105
2149
|
|
|
2106
2150
|
inline void WaitForPendingWrites() {
|
|
2107
2151
|
mutex_.AssertHeld();
|
|
@@ -2216,8 +2260,6 @@ class DBImpl : public DB {
|
|
|
2216
2260
|
void TrackOrUntrackFiles(const std::vector<std::string>& existing_data_files,
|
|
2217
2261
|
bool track);
|
|
2218
2262
|
|
|
2219
|
-
ColumnFamilyData* GetColumnFamilyDataByName(const std::string& cf_name);
|
|
2220
|
-
|
|
2221
2263
|
void MaybeScheduleFlushOrCompaction();
|
|
2222
2264
|
|
|
2223
2265
|
struct FlushRequest {
|
|
@@ -2897,6 +2939,11 @@ class DBImpl : public DB {
|
|
|
2897
2939
|
// garbages, among all column families.
|
|
2898
2940
|
SequenceNumber bottommost_files_mark_threshold_ = kMaxSequenceNumber;
|
|
2899
2941
|
|
|
2942
|
+
// The min threshold to trigger compactions for standalone range deletion
|
|
2943
|
+
// files that are marked for compaction.
|
|
2944
|
+
SequenceNumber standalone_range_deletion_files_mark_threshold_ =
|
|
2945
|
+
kMaxSequenceNumber;
|
|
2946
|
+
|
|
2900
2947
|
LogsWithPrepTracker logs_with_prep_tracker_;
|
|
2901
2948
|
|
|
2902
2949
|
// Callback for compaction to check if a key is visible to a snapshot.
|
|
@@ -3003,7 +3050,8 @@ CompressionType GetCompressionFlush(const ImmutableCFOptions& ioptions,
|
|
|
3003
3050
|
VersionEdit GetDBRecoveryEditForObsoletingMemTables(
|
|
3004
3051
|
VersionSet* vset, const ColumnFamilyData& cfd,
|
|
3005
3052
|
const autovector<VersionEdit*>& edit_list,
|
|
3006
|
-
const autovector<
|
|
3053
|
+
const autovector<ReadOnlyMemTable*>& memtables,
|
|
3054
|
+
LogsWithPrepTracker* prep_tracker);
|
|
3007
3055
|
|
|
3008
3056
|
// Return the earliest log file to keep after the memtable flush is
|
|
3009
3057
|
// finalized.
|
|
@@ -3014,13 +3062,13 @@ VersionEdit GetDBRecoveryEditForObsoletingMemTables(
|
|
|
3014
3062
|
uint64_t PrecomputeMinLogNumberToKeep2PC(
|
|
3015
3063
|
VersionSet* vset, const ColumnFamilyData& cfd_to_flush,
|
|
3016
3064
|
const autovector<VersionEdit*>& edit_list,
|
|
3017
|
-
const autovector<
|
|
3065
|
+
const autovector<ReadOnlyMemTable*>& memtables_to_flush,
|
|
3018
3066
|
LogsWithPrepTracker* prep_tracker);
|
|
3019
3067
|
// For atomic flush.
|
|
3020
3068
|
uint64_t PrecomputeMinLogNumberToKeep2PC(
|
|
3021
3069
|
VersionSet* vset, const autovector<ColumnFamilyData*>& cfds_to_flush,
|
|
3022
3070
|
const autovector<autovector<VersionEdit*>>& edit_lists,
|
|
3023
|
-
const autovector<const autovector<
|
|
3071
|
+
const autovector<const autovector<ReadOnlyMemTable*>*>& memtables_to_flush,
|
|
3024
3072
|
LogsWithPrepTracker* prep_tracker);
|
|
3025
3073
|
|
|
3026
3074
|
// In non-2PC mode, WALs with log number < the returned number can be
|
|
@@ -3037,11 +3085,11 @@ uint64_t PrecomputeMinLogNumberToKeepNon2PC(
|
|
|
3037
3085
|
// will not depend on any WAL file. nullptr means no memtable is being flushed.
|
|
3038
3086
|
// The function is only applicable to 2pc mode.
|
|
3039
3087
|
uint64_t FindMinPrepLogReferencedByMemTable(
|
|
3040
|
-
VersionSet* vset, const autovector<
|
|
3088
|
+
VersionSet* vset, const autovector<ReadOnlyMemTable*>& memtables_to_flush);
|
|
3041
3089
|
// For atomic flush.
|
|
3042
3090
|
uint64_t FindMinPrepLogReferencedByMemTable(
|
|
3043
3091
|
VersionSet* vset,
|
|
3044
|
-
const autovector<const autovector<
|
|
3092
|
+
const autovector<const autovector<ReadOnlyMemTable*>*>& memtables_to_flush);
|
|
3045
3093
|
|
|
3046
3094
|
// Fix user-supplied options to be reasonable
|
|
3047
3095
|
template <class T, class V>
|
|
@@ -753,7 +753,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
753
753
|
|
|
754
754
|
if (s.ok()) {
|
|
755
755
|
autovector<ColumnFamilyData*> tmp_cfds;
|
|
756
|
-
autovector<const autovector<
|
|
756
|
+
autovector<const autovector<ReadOnlyMemTable*>*> mems_list;
|
|
757
757
|
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
|
758
758
|
autovector<FileMetaData*> tmp_file_meta;
|
|
759
759
|
autovector<std::list<std::unique_ptr<FlushJobInfo>>*>
|
|
@@ -1457,11 +1457,6 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1457
1457
|
input_set.insert(TableFileNameToNumber(file_name));
|
|
1458
1458
|
}
|
|
1459
1459
|
|
|
1460
|
-
ColumnFamilyMetaData cf_meta;
|
|
1461
|
-
// TODO(yhchiang): can directly use version here if none of the
|
|
1462
|
-
// following functions call is pluggable to external developers.
|
|
1463
|
-
version->GetColumnFamilyMetaData(&cf_meta);
|
|
1464
|
-
|
|
1465
1460
|
if (output_path_id < 0) {
|
|
1466
1461
|
if (cfd->ioptions()->cf_paths.size() == 1U) {
|
|
1467
1462
|
output_path_id = 0;
|
|
@@ -1482,7 +1477,7 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1482
1477
|
|
|
1483
1478
|
std::vector<CompactionInputFiles> input_files;
|
|
1484
1479
|
Status s = cfd->compaction_picker()->SanitizeAndConvertCompactionInputFiles(
|
|
1485
|
-
&input_set,
|
|
1480
|
+
&input_set, output_level, version, &input_files);
|
|
1486
1481
|
TEST_SYNC_POINT(
|
|
1487
1482
|
"DBImpl::CompactFilesImpl::PostSanitizeAndConvertCompactionInputFiles");
|
|
1488
1483
|
if (!s.ok()) {
|
|
@@ -1862,8 +1857,9 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|
|
1862
1857
|
mutable_cf_options.compression_opts,
|
|
1863
1858
|
mutable_cf_options.default_write_temperature,
|
|
1864
1859
|
0 /* max_subcompactions, not applicable */,
|
|
1865
|
-
{} /* grandparents, not applicable */,
|
|
1866
|
-
|
|
1860
|
+
{} /* grandparents, not applicable */,
|
|
1861
|
+
std::nullopt /* earliest_snapshot */, nullptr /* snapshot_checker */,
|
|
1862
|
+
false /* is manual */, "" /* trim_ts */, -1 /* score, not applicable */,
|
|
1867
1863
|
false /* is deletion compaction, not applicable */,
|
|
1868
1864
|
false /* l0_files_might_overlap, not applicable */,
|
|
1869
1865
|
CompactionReason::kRefitLevel));
|
|
@@ -2417,7 +2413,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2417
2413
|
}
|
|
2418
2414
|
s = WaitForFlushMemTables(
|
|
2419
2415
|
cfds, flush_memtable_ids,
|
|
2420
|
-
flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err
|
|
2416
|
+
flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */,
|
|
2417
|
+
flush_reason);
|
|
2421
2418
|
InstrumentedMutexLock lock_guard(&mutex_);
|
|
2422
2419
|
for (auto* tmp_cfd : cfds) {
|
|
2423
2420
|
tmp_cfd->UnrefAndTryDelete();
|
|
@@ -2559,7 +2556,8 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2559
2556
|
}
|
|
2560
2557
|
s = WaitForFlushMemTables(
|
|
2561
2558
|
cfds, flush_memtable_ids,
|
|
2562
|
-
flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err
|
|
2559
|
+
flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */,
|
|
2560
|
+
flush_reason);
|
|
2563
2561
|
InstrumentedMutexLock lock_guard(&mutex_);
|
|
2564
2562
|
for (auto* cfd : cfds) {
|
|
2565
2563
|
cfd->UnrefAndTryDelete();
|
|
@@ -2622,7 +2620,7 @@ Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
|
|
|
2622
2620
|
flush_memtable_id_ptrs.push_back(&flush_memtable_id);
|
|
2623
2621
|
}
|
|
2624
2622
|
s = WaitForFlushMemTables(cfds, flush_memtable_id_ptrs,
|
|
2625
|
-
true /* resuming_from_bg_err
|
|
2623
|
+
true /* resuming_from_bg_err */, flush_reason);
|
|
2626
2624
|
mutex_.Lock();
|
|
2627
2625
|
}
|
|
2628
2626
|
|
|
@@ -2722,7 +2720,7 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|
|
2722
2720
|
Status DBImpl::WaitForFlushMemTables(
|
|
2723
2721
|
const autovector<ColumnFamilyData*>& cfds,
|
|
2724
2722
|
const autovector<const uint64_t*>& flush_memtable_ids,
|
|
2725
|
-
bool resuming_from_bg_err) {
|
|
2723
|
+
bool resuming_from_bg_err, std::optional<FlushReason> flush_reason) {
|
|
2726
2724
|
int num = static_cast<int>(cfds.size());
|
|
2727
2725
|
// Wait until the compaction completes
|
|
2728
2726
|
InstrumentedMutexLock l(&mutex_);
|
|
@@ -2760,7 +2758,15 @@ Status DBImpl::WaitForFlushMemTables(
|
|
|
2760
2758
|
(flush_memtable_ids[i] != nullptr &&
|
|
2761
2759
|
cfds[i]->imm()->GetEarliestMemTableID() >
|
|
2762
2760
|
*flush_memtable_ids[i])) {
|
|
2763
|
-
|
|
2761
|
+
// Make file ingestion's flush wait until SuperVersion is also updated
|
|
2762
|
+
// since after flush, it does range overlapping check and file level
|
|
2763
|
+
// assignment with the current SuperVersion.
|
|
2764
|
+
if (!flush_reason.has_value() ||
|
|
2765
|
+
flush_reason.value() != FlushReason::kExternalFileIngestion ||
|
|
2766
|
+
cfds[i]->GetSuperVersion()->imm->GetID() ==
|
|
2767
|
+
cfds[i]->imm()->current()->GetID()) {
|
|
2768
|
+
++num_finished;
|
|
2769
|
+
}
|
|
2764
2770
|
}
|
|
2765
2771
|
}
|
|
2766
2772
|
if (1 == num_dropped && 1 == num) {
|
|
@@ -3679,8 +3685,20 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3679
3685
|
// compaction is not necessary. Need to make sure mutex is held
|
|
3680
3686
|
// until we make a copy in the following code
|
|
3681
3687
|
TEST_SYNC_POINT("DBImpl::BackgroundCompaction():BeforePickCompaction");
|
|
3688
|
+
SnapshotChecker* snapshot_checker = nullptr;
|
|
3689
|
+
std::vector<SequenceNumber> snapshot_seqs;
|
|
3690
|
+
// This info is not useful for other scenarios, so save querying existing
|
|
3691
|
+
// snapshots for those cases.
|
|
3692
|
+
if (cfd->ioptions()->compaction_style == kCompactionStyleUniversal &&
|
|
3693
|
+
cfd->user_comparator()->timestamp_size() == 0) {
|
|
3694
|
+
SequenceNumber earliest_write_conflict_snapshot;
|
|
3695
|
+
GetSnapshotContext(job_context, &snapshot_seqs,
|
|
3696
|
+
&earliest_write_conflict_snapshot,
|
|
3697
|
+
&snapshot_checker);
|
|
3698
|
+
assert(is_snapshot_supported_ || snapshots_.empty());
|
|
3699
|
+
}
|
|
3682
3700
|
c.reset(cfd->PickCompaction(*mutable_cf_options, mutable_db_options_,
|
|
3683
|
-
log_buffer));
|
|
3701
|
+
snapshot_seqs, snapshot_checker, log_buffer));
|
|
3684
3702
|
TEST_SYNC_POINT("DBImpl::BackgroundCompaction():AfterPickCompaction");
|
|
3685
3703
|
|
|
3686
3704
|
if (c != nullptr) {
|
|
@@ -3968,7 +3986,10 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3968
3986
|
// Sanity checking that compaction files are freed.
|
|
3969
3987
|
for (size_t i = 0; i < c->num_input_levels(); i++) {
|
|
3970
3988
|
for (size_t j = 0; j < c->inputs(i)->size(); j++) {
|
|
3971
|
-
|
|
3989
|
+
// When status is not OK, compaction's result installation failed and
|
|
3990
|
+
// no new Version installed. The files could have been released and
|
|
3991
|
+
// picked up again by other compaction attempts.
|
|
3992
|
+
assert(!c->input(i, j)->being_compacted || !status.ok());
|
|
3972
3993
|
}
|
|
3973
3994
|
}
|
|
3974
3995
|
std::unordered_set<Compaction*>* cip = c->column_family_data()
|
|
@@ -4287,12 +4308,18 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
|
|
|
4287
4308
|
// newer snapshot created and released frequently, the compaction will be
|
|
4288
4309
|
// triggered soon anyway.
|
|
4289
4310
|
bottommost_files_mark_threshold_ = kMaxSequenceNumber;
|
|
4311
|
+
standalone_range_deletion_files_mark_threshold_ = kMaxSequenceNumber;
|
|
4290
4312
|
for (auto* my_cfd : *versions_->GetColumnFamilySet()) {
|
|
4291
4313
|
if (!my_cfd->ioptions()->allow_ingest_behind) {
|
|
4292
4314
|
bottommost_files_mark_threshold_ = std::min(
|
|
4293
4315
|
bottommost_files_mark_threshold_,
|
|
4294
4316
|
my_cfd->current()->storage_info()->bottommost_files_mark_threshold());
|
|
4295
4317
|
}
|
|
4318
|
+
standalone_range_deletion_files_mark_threshold_ =
|
|
4319
|
+
std::min(standalone_range_deletion_files_mark_threshold_,
|
|
4320
|
+
cfd->current()
|
|
4321
|
+
->storage_info()
|
|
4322
|
+
->standalone_range_tombstone_files_mark_threshold());
|
|
4296
4323
|
}
|
|
4297
4324
|
|
|
4298
4325
|
// Whenever we install new SuperVersion, we might need to issue new flushes or
|
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
7
7
|
// Use of this source code is governed by a BSD-style license that can be
|
|
8
8
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
9
|
-
|
|
10
9
|
#ifndef NDEBUG
|
|
10
|
+
#include <iostream>
|
|
11
11
|
|
|
12
12
|
#include "db/blob/blob_file_cache.h"
|
|
13
13
|
#include "db/column_family.h"
|
|
@@ -233,23 +233,16 @@ uint64_t DBImpl::TEST_LogfileNumber() {
|
|
|
233
233
|
return logfile_number_;
|
|
234
234
|
}
|
|
235
235
|
|
|
236
|
-
|
|
237
|
-
std::
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
iopts.push_back(cfd->ioptions());
|
|
236
|
+
void DBImpl::TEST_GetAllBlockCaches(
|
|
237
|
+
std::unordered_set<const Cache*>* cache_set) {
|
|
238
|
+
InstrumentedMutexLock l(&mutex_);
|
|
239
|
+
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
|
240
|
+
if (const auto bbto =
|
|
241
|
+
cfd->GetCurrentMutableCFOptions()
|
|
242
|
+
->table_factory->GetOptions<BlockBasedTableOptions>()) {
|
|
243
|
+
cache_set->insert(bbto->block_cache.get());
|
|
245
244
|
}
|
|
246
245
|
}
|
|
247
|
-
iopts_map->clear();
|
|
248
|
-
for (size_t i = 0; i < cf_names.size(); ++i) {
|
|
249
|
-
iopts_map->insert({cf_names[i], iopts[i]});
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
return Status::OK();
|
|
253
246
|
}
|
|
254
247
|
|
|
255
248
|
uint64_t DBImpl::TEST_FindMinLogContainingOutstandingPrep() {
|
|
@@ -265,7 +258,7 @@ size_t DBImpl::TEST_LogsWithPrepSize() {
|
|
|
265
258
|
}
|
|
266
259
|
|
|
267
260
|
uint64_t DBImpl::TEST_FindMinPrepLogReferencedByMemTable() {
|
|
268
|
-
autovector<
|
|
261
|
+
autovector<ReadOnlyMemTable*> empty_list;
|
|
269
262
|
return FindMinPrepLogReferencedByMemTable(versions_.get(), empty_list);
|
|
270
263
|
}
|
|
271
264
|
|
|
@@ -345,31 +338,52 @@ void DBImpl::TEST_VerifyNoObsoleteFilesCached(
|
|
|
345
338
|
l.emplace(&mutex_);
|
|
346
339
|
}
|
|
347
340
|
|
|
348
|
-
|
|
341
|
+
if (!opened_successfully_) {
|
|
342
|
+
// We don't need to pro-actively clean up open files during DB::Open()
|
|
343
|
+
// if we know we are about to fail and clean up in Close().
|
|
344
|
+
return;
|
|
345
|
+
}
|
|
346
|
+
if (disable_delete_obsolete_files_ > 0) {
|
|
347
|
+
// For better or worse, DB::Close() is allowed with deletions disabled.
|
|
348
|
+
// Since we generally associate clean-up of open files with deleting them,
|
|
349
|
+
// we allow "obsolete" open files when deletions are disabled.
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Live and "quarantined" files are allowed to be open in table cache
|
|
354
|
+
std::set<uint64_t> live_and_quar_files;
|
|
349
355
|
for (auto cfd : *versions_->GetColumnFamilySet()) {
|
|
350
356
|
if (cfd->IsDropped()) {
|
|
351
357
|
continue;
|
|
352
358
|
}
|
|
353
|
-
//
|
|
354
|
-
cfd->current()
|
|
359
|
+
// Iterate over live versions
|
|
360
|
+
Version* current = cfd->current();
|
|
361
|
+
Version* ver = current;
|
|
362
|
+
do {
|
|
363
|
+
// Sneakily add both SST and blob files to the same list
|
|
364
|
+
std::vector<uint64_t> live_files_vec;
|
|
365
|
+
ver->AddLiveFiles(&live_files_vec, &live_files_vec);
|
|
366
|
+
live_and_quar_files.insert(live_files_vec.begin(), live_files_vec.end());
|
|
367
|
+
|
|
368
|
+
ver = ver->Next();
|
|
369
|
+
} while (ver != current);
|
|
355
370
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
// FIXME: diagnose and fix the leaks of obsolete SST files revealed in
|
|
363
|
-
// unit tests.
|
|
364
|
-
return;
|
|
365
|
-
}
|
|
371
|
+
{
|
|
372
|
+
const auto& quar_files = error_handler_.GetFilesToQuarantine();
|
|
373
|
+
live_and_quar_files.insert(quar_files.begin(), quar_files.end());
|
|
374
|
+
}
|
|
375
|
+
auto fn = [&live_and_quar_files](const Slice& key, Cache::ObjectPtr, size_t,
|
|
376
|
+
const Cache::CacheItemHelper*) {
|
|
366
377
|
// See TableCache and BlobFileCache
|
|
367
378
|
assert(key.size() == sizeof(uint64_t));
|
|
368
379
|
uint64_t file_number;
|
|
369
380
|
GetUnaligned(reinterpret_cast<const uint64_t*>(key.data()), &file_number);
|
|
370
|
-
// Assert file is in
|
|
371
|
-
|
|
372
|
-
|
|
381
|
+
// Assert file is in live/quarantined set
|
|
382
|
+
if (live_and_quar_files.find(file_number) == live_and_quar_files.end()) {
|
|
383
|
+
std::cerr << "File " << file_number << " is not live nor quarantined"
|
|
384
|
+
<< std::endl;
|
|
385
|
+
assert(false);
|
|
386
|
+
}
|
|
373
387
|
};
|
|
374
388
|
table_cache_->ApplyToAllEntries(fn, {});
|
|
375
389
|
}
|
|
@@ -449,14 +449,8 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
|
|
|
449
449
|
// File is being deleted (actually obsolete)
|
|
450
450
|
auto number = file.metadata->fd.GetNumber();
|
|
451
451
|
candidate_files.emplace_back(MakeTableFileName(number), file.path);
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
handle = TableCache::Lookup(table_cache_.get(), number);
|
|
455
|
-
}
|
|
456
|
-
if (handle) {
|
|
457
|
-
TableCache::ReleaseObsolete(table_cache_.get(), handle,
|
|
458
|
-
file.uncache_aggressiveness);
|
|
459
|
-
}
|
|
452
|
+
TableCache::ReleaseObsolete(table_cache_.get(), number, handle,
|
|
453
|
+
file.uncache_aggressiveness);
|
|
460
454
|
}
|
|
461
455
|
file.DeleteMetadata();
|
|
462
456
|
}
|
|
@@ -572,9 +566,17 @@ void DBImpl::PurgeObsoleteFiles(JobContext& state, bool schedule_only) {
|
|
|
572
566
|
case kTableFile:
|
|
573
567
|
// If the second condition is not there, this makes
|
|
574
568
|
// DontDeletePendingOutputs fail
|
|
569
|
+
// FIXME: but should NOT keep if it came from sst_delete_files?
|
|
575
570
|
keep = (sst_live_set.find(number) != sst_live_set.end()) ||
|
|
576
571
|
number >= state.min_pending_output;
|
|
577
572
|
if (!keep) {
|
|
573
|
+
// NOTE: sometimes redundant (if came from sst_delete_files)
|
|
574
|
+
// We don't know which column family is applicable here so we don't
|
|
575
|
+
// know what uncache_aggressiveness would be used with
|
|
576
|
+
// ReleaseObsolete(). Anyway, obsolete files ideally go into
|
|
577
|
+
// sst_delete_files for better/quicker handling, and this is just a
|
|
578
|
+
// backstop.
|
|
579
|
+
TableCache::Evict(table_cache_.get(), number);
|
|
578
580
|
files_to_del.insert(number);
|
|
579
581
|
}
|
|
580
582
|
break;
|
|
@@ -739,7 +741,8 @@ void DBImpl::DeleteObsoleteFiles() {
|
|
|
739
741
|
VersionEdit GetDBRecoveryEditForObsoletingMemTables(
|
|
740
742
|
VersionSet* vset, const ColumnFamilyData& cfd,
|
|
741
743
|
const autovector<VersionEdit*>& edit_list,
|
|
742
|
-
const autovector<
|
|
744
|
+
const autovector<ReadOnlyMemTable*>& memtables,
|
|
745
|
+
LogsWithPrepTracker* prep_tracker) {
|
|
743
746
|
VersionEdit wal_deletion_edit;
|
|
744
747
|
uint64_t min_wal_number_to_keep = 0;
|
|
745
748
|
assert(edit_list.size() > 0);
|
|
@@ -769,12 +772,12 @@ VersionEdit GetDBRecoveryEditForObsoletingMemTables(
|
|
|
769
772
|
}
|
|
770
773
|
|
|
771
774
|
uint64_t FindMinPrepLogReferencedByMemTable(
|
|
772
|
-
VersionSet* vset, const autovector<
|
|
775
|
+
VersionSet* vset, const autovector<ReadOnlyMemTable*>& memtables_to_flush) {
|
|
773
776
|
uint64_t min_log = 0;
|
|
774
777
|
|
|
775
778
|
// we must look through the memtables for two phase transactions
|
|
776
779
|
// that have been committed but not yet flushed
|
|
777
|
-
std::unordered_set<
|
|
780
|
+
std::unordered_set<ReadOnlyMemTable*> memtables_to_flush_set(
|
|
778
781
|
memtables_to_flush.begin(), memtables_to_flush.end());
|
|
779
782
|
for (auto loop_cfd : *vset->GetColumnFamilySet()) {
|
|
780
783
|
if (loop_cfd->IsDropped()) {
|
|
@@ -799,12 +802,12 @@ uint64_t FindMinPrepLogReferencedByMemTable(
|
|
|
799
802
|
}
|
|
800
803
|
|
|
801
804
|
uint64_t FindMinPrepLogReferencedByMemTable(
|
|
802
|
-
VersionSet* vset,
|
|
803
|
-
|
|
805
|
+
VersionSet* vset, const autovector<const autovector<ReadOnlyMemTable*>*>&
|
|
806
|
+
memtables_to_flush) {
|
|
804
807
|
uint64_t min_log = 0;
|
|
805
808
|
|
|
806
|
-
std::unordered_set<
|
|
807
|
-
for (const autovector<
|
|
809
|
+
std::unordered_set<ReadOnlyMemTable*> memtables_to_flush_set;
|
|
810
|
+
for (const autovector<ReadOnlyMemTable*>* memtables : memtables_to_flush) {
|
|
808
811
|
memtables_to_flush_set.insert(memtables->begin(), memtables->end());
|
|
809
812
|
}
|
|
810
813
|
for (auto loop_cfd : *vset->GetColumnFamilySet()) {
|
|
@@ -896,7 +899,7 @@ uint64_t PrecomputeMinLogNumberToKeepNon2PC(
|
|
|
896
899
|
uint64_t PrecomputeMinLogNumberToKeep2PC(
|
|
897
900
|
VersionSet* vset, const ColumnFamilyData& cfd_to_flush,
|
|
898
901
|
const autovector<VersionEdit*>& edit_list,
|
|
899
|
-
const autovector<
|
|
902
|
+
const autovector<ReadOnlyMemTable*>& memtables_to_flush,
|
|
900
903
|
LogsWithPrepTracker* prep_tracker) {
|
|
901
904
|
assert(vset != nullptr);
|
|
902
905
|
assert(prep_tracker != nullptr);
|
|
@@ -937,7 +940,7 @@ uint64_t PrecomputeMinLogNumberToKeep2PC(
|
|
|
937
940
|
uint64_t PrecomputeMinLogNumberToKeep2PC(
|
|
938
941
|
VersionSet* vset, const autovector<ColumnFamilyData*>& cfds_to_flush,
|
|
939
942
|
const autovector<autovector<VersionEdit*>>& edit_lists,
|
|
940
|
-
const autovector<const autovector<
|
|
943
|
+
const autovector<const autovector<ReadOnlyMemTable*>*>& memtables_to_flush,
|
|
941
944
|
LogsWithPrepTracker* prep_tracker) {
|
|
942
945
|
assert(vset != nullptr);
|
|
943
946
|
assert(prep_tracker != nullptr);
|
|
@@ -980,7 +983,8 @@ void DBImpl::SetDBId(std::string&& id, bool read_only,
|
|
|
980
983
|
}
|
|
981
984
|
|
|
982
985
|
Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
|
|
983
|
-
bool is_new_db,
|
|
986
|
+
bool is_new_db, bool is_retry,
|
|
987
|
+
VersionEdit* version_edit) {
|
|
984
988
|
Status s;
|
|
985
989
|
if (!is_new_db) {
|
|
986
990
|
// Check for the IDENTITY file and create it if not there or
|
|
@@ -988,7 +992,11 @@ Status DBImpl::SetupDBId(const WriteOptions& write_options, bool read_only,
|
|
|
988
992
|
std::string db_id_in_file;
|
|
989
993
|
s = fs_->FileExists(IdentityFileName(dbname_), IOOptions(), nullptr);
|
|
990
994
|
if (s.ok()) {
|
|
991
|
-
|
|
995
|
+
IOOptions opts;
|
|
996
|
+
if (is_retry) {
|
|
997
|
+
opts.verify_and_reconstruct_read = true;
|
|
998
|
+
}
|
|
999
|
+
s = GetDbIdentityFromIdentityFile(opts, &db_id_in_file);
|
|
992
1000
|
if (s.ok() && !db_id_in_file.empty()) {
|
|
993
1001
|
if (db_id_.empty()) {
|
|
994
1002
|
// Loaded from file and wasn't already known from manifest
|