@nxtedition/rocksdb 8.2.8 → 9.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +0 -21
- package/deps/rocksdb/rocksdb/CMakeLists.txt +20 -10
- package/deps/rocksdb/rocksdb/Makefile +37 -25
- package/deps/rocksdb/rocksdb/README.md +29 -0
- package/deps/rocksdb/rocksdb/TARGETS +25 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +35 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +229 -74
- package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +4 -3
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -95
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +4 -2
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -3
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +2683 -496
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +580 -159
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +145 -42
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +20 -1
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +391 -17
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +309 -212
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +0 -32
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +439 -12
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +44 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +11 -1
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -3
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.cc +119 -0
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.h +155 -0
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +711 -0
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +17 -11
- package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +28 -12
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +20 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +7 -3
- package/deps/rocksdb/rocksdb/db/builder.cc +35 -10
- package/deps/rocksdb/rocksdb/db/c.cc +233 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +140 -6
- package/deps/rocksdb/rocksdb/db/column_family.cc +110 -51
- package/deps/rocksdb/rocksdb/db/column_family.h +34 -2
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +314 -7
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +106 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +47 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +10 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +148 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +22 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +33 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +14 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +90 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +170 -95
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +32 -58
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +46 -10
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +13 -3
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +74 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +27 -3
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +850 -44
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +275 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +52 -19
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +733 -320
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +155 -66
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +516 -155
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +8 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +100 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +95 -50
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +136 -79
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +6 -95
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +31 -22
- package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +85 -57
- package/deps/rocksdb/rocksdb/db/db_iter.h +11 -2
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +29 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +276 -21
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +193 -7
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +294 -26
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +364 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +13 -3
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +52 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +74 -1
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +22 -4
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +282 -167
- package/deps/rocksdb/rocksdb/db/db_test.cc +180 -49
- package/deps/rocksdb/rocksdb/db/db_test2.cc +84 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +25 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.h +45 -2
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +14 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +245 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +480 -1
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +67 -34
- package/deps/rocksdb/rocksdb/db/error_handler.h +13 -9
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +144 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +105 -17
- package/deps/rocksdb/rocksdb/db/flush_job.h +27 -4
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +90 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
- package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +70 -83
- package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +45 -11
- package/deps/rocksdb/rocksdb/db/memtable_list.h +43 -2
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +91 -5
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +330 -115
- package/deps/rocksdb/rocksdb/db/merge_helper.h +100 -12
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +82 -0
- package/deps/rocksdb/rocksdb/db/merge_test.cc +267 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -2
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +4 -4
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +4 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +4 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +25 -7
- package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +459 -74
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +105 -69
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +83 -46
- package/deps/rocksdb/rocksdb/db/table_cache.cc +76 -54
- package/deps/rocksdb/rocksdb/db/table_cache.h +18 -12
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
- package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
- package/deps/rocksdb/rocksdb/db/version_edit.h +58 -10
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
- package/deps/rocksdb/rocksdb/db/version_set.cc +207 -110
- package/deps/rocksdb/rocksdb/db/version_set.h +36 -15
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -5
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +47 -26
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +525 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -22
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -20
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +0 -29
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +46 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +40 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper_test.cc +39 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +55 -20
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +4 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +88 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +37 -13
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +110 -58
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +68 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +34 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +429 -237
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +13 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +21 -14
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.cc +51 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.h +27 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +3 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +29 -38
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +302 -101
- package/deps/rocksdb/rocksdb/env/env.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
- package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +79 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +100 -70
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +64 -18
- package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
- package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1225 -97
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +72 -33
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +40 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +163 -91
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +112 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +108 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +42 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +92 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +34 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +91 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +8 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +10 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +55 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +130 -22
- package/deps/rocksdb/rocksdb/include/rocksdb/port_defs.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +92 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +37 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +35 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +20 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +42 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +53 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -2
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +4 -3
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +1 -1
- package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +22 -1
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +18 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +14 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +6 -1
- package/deps/rocksdb/rocksdb/options/db_options.cc +54 -2
- package/deps/rocksdb/rocksdb/options/db_options.h +4 -0
- package/deps/rocksdb/rocksdb/options/options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +18 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +14 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -1
- package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
- package/deps/rocksdb/rocksdb/port/README +10 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.h +7 -4
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +32 -12
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
- package/deps/rocksdb/rocksdb/port/win/port_win.h +5 -2
- package/deps/rocksdb/rocksdb/src.mk +10 -1
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
- package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +116 -43
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +9 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +321 -49
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +98 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +233 -98
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +58 -23
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +12 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +52 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +26 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -18
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +20 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +232 -71
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -6
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +44 -26
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +36 -19
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +11 -7
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +14 -13
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +9 -2
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/format.cc +175 -33
- package/deps/rocksdb/rocksdb/table/format.h +63 -10
- package/deps/rocksdb/rocksdb/table/get_context.cc +52 -89
- package/deps/rocksdb/rocksdb/table/get_context.h +12 -3
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +11 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +29 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +22 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +45 -9
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +24 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +6 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +31 -0
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +3 -3
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +87 -65
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +221 -33
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +36 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +33 -11
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
- package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
- package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +32 -11
- package/deps/rocksdb/rocksdb/util/cast_util.h +24 -0
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +55 -8
- package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
- package/deps/rocksdb/rocksdb/util/compression.h +119 -35
- package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
- package/deps/rocksdb/rocksdb/util/crc32c.cc +7 -1
- package/deps/rocksdb/rocksdb/util/distributed_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
- package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
- package/deps/rocksdb/rocksdb/util/hash.h +7 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
- package/deps/rocksdb/rocksdb/util/math.h +58 -6
- package/deps/rocksdb/rocksdb/util/math128.h +29 -7
- package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
- package/deps/rocksdb/rocksdb/util/overload.h +23 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +53 -18
- package/deps/rocksdb/rocksdb/util/rate_limiter_impl.h +6 -1
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +90 -19
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
- package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
- package/deps/rocksdb/rocksdb/util/string_util.cc +39 -0
- package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +10 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +385 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +192 -1
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +461 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +0 -3
- package/deps/rocksdb/rocksdb/util/xxph3.h +0 -4
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +71 -26
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +20 -16
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +11 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +7 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +12 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +13 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +272 -33
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +15 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +4 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +195 -23
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +19 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +88 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +43 -17
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +73 -24
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +41 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +15 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +59 -28
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +127 -120
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +129 -59
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +111 -14
- package/deps/rocksdb/rocksdb.gyp +6 -2
- package/index.js +0 -8
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +0 -7
- package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +0 -33
- package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +0 -26
- package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +0 -10
|
@@ -21,7 +21,9 @@
|
|
|
21
21
|
#include "monitoring/thread_status_util.h"
|
|
22
22
|
#include "test_util/sync_point.h"
|
|
23
23
|
#include "util/cast_util.h"
|
|
24
|
+
#include "util/coding.h"
|
|
24
25
|
#include "util/concurrent_task_limiter_impl.h"
|
|
26
|
+
#include "util/udt_util.h"
|
|
25
27
|
|
|
26
28
|
namespace ROCKSDB_NAMESPACE {
|
|
27
29
|
|
|
@@ -76,8 +78,43 @@ bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force,
|
|
|
76
78
|
return false;
|
|
77
79
|
}
|
|
78
80
|
|
|
81
|
+
bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
|
|
82
|
+
const FlushRequest& flush_req) {
|
|
83
|
+
mutex_.AssertHeld();
|
|
84
|
+
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
85
|
+
ColumnFamilyData* cfd = flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
86
|
+
uint64_t max_memtable_id =
|
|
87
|
+
flush_req.cfd_to_max_mem_id_to_persist.begin()->second;
|
|
88
|
+
if (cfd->IsDropped() ||
|
|
89
|
+
!cfd->ShouldPostponeFlushToRetainUDT(max_memtable_id)) {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
// Check if holding on the flush will cause entering write stall mode.
|
|
93
|
+
// Write stall entered because of the accumulation of write buffers can be
|
|
94
|
+
// alleviated if we continue with the flush instead of postponing it.
|
|
95
|
+
const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
|
|
96
|
+
|
|
97
|
+
// Taking the status of the active Memtable into consideration so that we are
|
|
98
|
+
// not just checking if DB is currently already in write stall mode.
|
|
99
|
+
int mem_to_flush = cfd->mem()->ApproximateMemoryUsageFast() >=
|
|
100
|
+
cfd->mem()->write_buffer_size() / 2
|
|
101
|
+
? 1
|
|
102
|
+
: 0;
|
|
103
|
+
WriteStallCondition write_stall =
|
|
104
|
+
ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
105
|
+
cfd->imm()->NumNotFlushed() + mem_to_flush, /*num_l0_files=*/0,
|
|
106
|
+
/*num_compaction_needed_bytes=*/0, mutable_cf_options,
|
|
107
|
+
*cfd->ioptions())
|
|
108
|
+
.first;
|
|
109
|
+
if (write_stall != WriteStallCondition::kNormal) {
|
|
110
|
+
return false;
|
|
111
|
+
}
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
|
|
79
115
|
IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
|
|
80
|
-
VersionEdit* synced_wals
|
|
116
|
+
VersionEdit* synced_wals,
|
|
117
|
+
bool error_recovery_in_prog) {
|
|
81
118
|
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
|
|
82
119
|
InstrumentedMutexLock l(&log_write_mutex_);
|
|
83
120
|
autovector<log::Writer*, 1> logs_to_sync;
|
|
@@ -103,7 +140,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
|
|
|
103
140
|
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
|
104
141
|
"[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
|
|
105
142
|
log->get_log_number());
|
|
106
|
-
if (
|
|
143
|
+
if (error_recovery_in_prog) {
|
|
107
144
|
log->file()->reset_seen_error();
|
|
108
145
|
}
|
|
109
146
|
io_s = log->file()->Sync(immutable_db_options_.use_fsync);
|
|
@@ -112,7 +149,7 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
|
|
|
112
149
|
}
|
|
113
150
|
|
|
114
151
|
if (immutable_db_options_.recycle_log_file_num > 0) {
|
|
115
|
-
if (
|
|
152
|
+
if (error_recovery_in_prog) {
|
|
116
153
|
log->file()->reset_seen_error();
|
|
117
154
|
}
|
|
118
155
|
io_s = log->Close();
|
|
@@ -186,9 +223,10 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
186
223
|
// `snapshot_seqs` has already been computed before this function starts.
|
|
187
224
|
// Recording the max memtable ID ensures that the flush job does not flush
|
|
188
225
|
// a memtable without knowing such snapshot(s).
|
|
189
|
-
uint64_t max_memtable_id =
|
|
190
|
-
|
|
191
|
-
|
|
226
|
+
uint64_t max_memtable_id =
|
|
227
|
+
needs_to_sync_closed_wals
|
|
228
|
+
? cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */)
|
|
229
|
+
: std::numeric_limits<uint64_t>::max();
|
|
192
230
|
|
|
193
231
|
// If needs_to_sync_closed_wals is false, then the flush job will pick ALL
|
|
194
232
|
// existing memtables of the column family when PickMemTable() is called
|
|
@@ -197,7 +235,7 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
197
235
|
// releases and re-acquires the db mutex. In the meantime, the application
|
|
198
236
|
// can still insert into the memtables and increase the db's sequence number.
|
|
199
237
|
// The application can take a snapshot, hoping that the latest visible state
|
|
200
|
-
// to this
|
|
238
|
+
// to this snapshot is preserved. This is hard to guarantee since db mutex
|
|
201
239
|
// not held. This newly-created snapshot is not included in `snapshot_seqs`
|
|
202
240
|
// and the flush job is unaware of its presence. Consequently, the flush job
|
|
203
241
|
// may drop certain keys when generating the L0, causing incorrect data to be
|
|
@@ -214,7 +252,7 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
214
252
|
GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
|
|
215
253
|
&event_logger_, mutable_cf_options.report_bg_io_stats,
|
|
216
254
|
true /* sync_output_directory */, true /* write_manifest */, thread_pri,
|
|
217
|
-
io_tracer_,
|
|
255
|
+
io_tracer_, seqno_to_time_mapping_, db_id_, db_session_id_,
|
|
218
256
|
cfd->GetFullHistoryTsLow(), &blob_callback_);
|
|
219
257
|
FileMetaData file_meta;
|
|
220
258
|
|
|
@@ -225,8 +263,10 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
225
263
|
// SyncClosedLogs() may unlock and re-lock the log_write_mutex multiple
|
|
226
264
|
// times.
|
|
227
265
|
VersionEdit synced_wals;
|
|
266
|
+
bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
|
|
228
267
|
mutex_.Unlock();
|
|
229
|
-
log_io_s =
|
|
268
|
+
log_io_s =
|
|
269
|
+
SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog);
|
|
230
270
|
mutex_.Lock();
|
|
231
271
|
if (log_io_s.ok() && synced_wals.IsWalAddition()) {
|
|
232
272
|
const ReadOptions read_options(Env::IOActivity::kFlush);
|
|
@@ -248,6 +288,24 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
248
288
|
// If the log sync failed, we do not need to pick memtable. Otherwise,
|
|
249
289
|
// num_flush_not_started_ needs to be rollback.
|
|
250
290
|
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
|
|
291
|
+
// Exit a flush due to bg error should not set bg error again.
|
|
292
|
+
bool skip_set_bg_error = false;
|
|
293
|
+
if (s.ok() && !error_handler_.GetBGError().ok() &&
|
|
294
|
+
error_handler_.IsBGWorkStopped() &&
|
|
295
|
+
flush_reason != FlushReason::kErrorRecovery &&
|
|
296
|
+
flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
|
|
297
|
+
// Error recovery in progress, should not pick memtable which excludes
|
|
298
|
+
// them from being picked up by recovery flush.
|
|
299
|
+
// This ensures that when bg error is set, no new flush can pick
|
|
300
|
+
// memtables.
|
|
301
|
+
skip_set_bg_error = true;
|
|
302
|
+
s = error_handler_.GetBGError();
|
|
303
|
+
assert(!s.ok());
|
|
304
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
305
|
+
"[JOB %d] Skip flush due to background error %s",
|
|
306
|
+
job_context->job_id, s.ToString().c_str());
|
|
307
|
+
}
|
|
308
|
+
|
|
251
309
|
if (s.ok()) {
|
|
252
310
|
flush_job.PickMemTable();
|
|
253
311
|
need_cancel = true;
|
|
@@ -268,7 +326,8 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
268
326
|
// is unlocked by the current thread.
|
|
269
327
|
if (s.ok()) {
|
|
270
328
|
s = flush_job.Run(&logs_with_prep_tracker_, &file_meta,
|
|
271
|
-
&switched_to_mempurge
|
|
329
|
+
&switched_to_mempurge, &skip_set_bg_error,
|
|
330
|
+
&error_handler_);
|
|
272
331
|
need_cancel = false;
|
|
273
332
|
}
|
|
274
333
|
|
|
@@ -309,7 +368,8 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
309
368
|
}
|
|
310
369
|
}
|
|
311
370
|
|
|
312
|
-
if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()
|
|
371
|
+
if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() &&
|
|
372
|
+
!skip_set_bg_error) {
|
|
313
373
|
if (log_io_s.ok()) {
|
|
314
374
|
// Error while writing to MANIFEST.
|
|
315
375
|
// In fact, versions_->io_status() can also be the result of renaming
|
|
@@ -466,7 +526,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
466
526
|
GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
|
|
467
527
|
&event_logger_, mutable_cf_options.report_bg_io_stats,
|
|
468
528
|
false /* sync_output_directory */, false /* write_manifest */,
|
|
469
|
-
thread_pri, io_tracer_,
|
|
529
|
+
thread_pri, io_tracer_, seqno_to_time_mapping_, db_id_, db_session_id_,
|
|
470
530
|
cfd->GetFullHistoryTsLow(), &blob_callback_));
|
|
471
531
|
}
|
|
472
532
|
|
|
@@ -490,8 +550,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
490
550
|
// TODO (yanqin) investigate whether we should sync the closed logs for
|
|
491
551
|
// single column family case.
|
|
492
552
|
VersionEdit synced_wals;
|
|
553
|
+
bool error_recovery_in_prog = error_handler_.IsRecoveryInProgress();
|
|
493
554
|
mutex_.Unlock();
|
|
494
|
-
log_io_s =
|
|
555
|
+
log_io_s =
|
|
556
|
+
SyncClosedLogs(job_context, &synced_wals, error_recovery_in_prog);
|
|
495
557
|
mutex_.Lock();
|
|
496
558
|
if (log_io_s.ok() && synced_wals.IsWalAddition()) {
|
|
497
559
|
const ReadOptions read_options(Env::IOActivity::kFlush);
|
|
@@ -521,6 +583,21 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
521
583
|
pick_status.push_back(false);
|
|
522
584
|
}
|
|
523
585
|
|
|
586
|
+
bool flush_for_recovery =
|
|
587
|
+
bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
|
|
588
|
+
bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecoveryRetryFlush;
|
|
589
|
+
bool skip_set_bg_error = false;
|
|
590
|
+
|
|
591
|
+
if (s.ok() && !error_handler_.GetBGError().ok() &&
|
|
592
|
+
error_handler_.IsBGWorkStopped() && !flush_for_recovery) {
|
|
593
|
+
s = error_handler_.GetBGError();
|
|
594
|
+
skip_set_bg_error = true;
|
|
595
|
+
assert(!s.ok());
|
|
596
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
597
|
+
"[JOB %d] Skip flush due to background error %s",
|
|
598
|
+
job_context->job_id, s.ToString().c_str());
|
|
599
|
+
}
|
|
600
|
+
|
|
524
601
|
if (s.ok()) {
|
|
525
602
|
for (int i = 0; i != num_cfs; ++i) {
|
|
526
603
|
jobs[i]->PickMemTable();
|
|
@@ -585,7 +662,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
585
662
|
}
|
|
586
663
|
}
|
|
587
664
|
}
|
|
588
|
-
} else {
|
|
665
|
+
} else if (!skip_set_bg_error) {
|
|
666
|
+
// When `skip_set_bg_error` is true, no memtable is picked so
|
|
667
|
+
// there is no need to call Cancel() or RollbackMemtableFlush().
|
|
668
|
+
//
|
|
589
669
|
// Need to undo atomic flush if something went wrong, i.e. s is not OK and
|
|
590
670
|
// it is not because of CF drop.
|
|
591
671
|
// Have to cancel the flush jobs that have NOT executed because we need to
|
|
@@ -598,8 +678,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
598
678
|
for (int i = 0; i != num_cfs; ++i) {
|
|
599
679
|
if (exec_status[i].second.ok() && exec_status[i].first) {
|
|
600
680
|
auto& mems = jobs[i]->GetMemTables();
|
|
601
|
-
cfds[i]->imm()->RollbackMemtableFlush(
|
|
602
|
-
|
|
681
|
+
cfds[i]->imm()->RollbackMemtableFlush(
|
|
682
|
+
mems, /*rollback_succeeding_memtables=*/false);
|
|
603
683
|
}
|
|
604
684
|
}
|
|
605
685
|
}
|
|
@@ -641,10 +721,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
641
721
|
};
|
|
642
722
|
|
|
643
723
|
bool resuming_from_bg_err =
|
|
644
|
-
error_handler_.IsDBStopped() ||
|
|
645
|
-
(bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
|
|
646
|
-
bg_flush_args[0].flush_reason_ ==
|
|
647
|
-
FlushReason::kErrorRecoveryRetryFlush);
|
|
724
|
+
error_handler_.IsDBStopped() || flush_for_recovery;
|
|
648
725
|
while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) {
|
|
649
726
|
std::pair<Status, bool> res = wait_to_install_func();
|
|
650
727
|
|
|
@@ -655,15 +732,27 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
655
732
|
s = res.first;
|
|
656
733
|
break;
|
|
657
734
|
} else if (!res.second) {
|
|
735
|
+
// we are the oldest immutable memtable
|
|
736
|
+
break;
|
|
737
|
+
}
|
|
738
|
+
// We are not the oldest immutable memtable
|
|
739
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
740
|
+
"DBImpl::AtomicFlushMemTablesToOutputFiles:WaitCV", &res);
|
|
741
|
+
//
|
|
742
|
+
// If bg work is stopped, recovery thread first calls
|
|
743
|
+
// WaitForBackgroundWork() before proceeding to flush for recovery. This
|
|
744
|
+
// flush can block WaitForBackgroundWork() while waiting for recovery
|
|
745
|
+
// flush to install result. To avoid this deadlock, we should abort here
|
|
746
|
+
// if there is background error.
|
|
747
|
+
if (!flush_for_recovery && error_handler_.IsBGWorkStopped() &&
|
|
748
|
+
!error_handler_.GetBGError().ok()) {
|
|
749
|
+
s = error_handler_.GetBGError();
|
|
750
|
+
assert(!s.ok());
|
|
658
751
|
break;
|
|
659
752
|
}
|
|
660
753
|
atomic_flush_install_cv_.Wait();
|
|
661
754
|
|
|
662
|
-
resuming_from_bg_err =
|
|
663
|
-
error_handler_.IsDBStopped() ||
|
|
664
|
-
(bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
|
|
665
|
-
bg_flush_args[0].flush_reason_ ==
|
|
666
|
-
FlushReason::kErrorRecoveryRetryFlush);
|
|
755
|
+
resuming_from_bg_err = error_handler_.IsDBStopped() || flush_for_recovery;
|
|
667
756
|
}
|
|
668
757
|
|
|
669
758
|
if (!resuming_from_bg_err) {
|
|
@@ -679,6 +768,17 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
679
768
|
// installation.
|
|
680
769
|
s = error_handler_.GetRecoveryError();
|
|
681
770
|
}
|
|
771
|
+
// Since we are not installing these memtables, need to rollback
|
|
772
|
+
// to allow future flush job to pick up these memtables.
|
|
773
|
+
if (!s.ok()) {
|
|
774
|
+
for (int i = 0; i != num_cfs; ++i) {
|
|
775
|
+
assert(exec_status[i].first);
|
|
776
|
+
assert(exec_status[i].second.ok());
|
|
777
|
+
auto& mems = jobs[i]->GetMemTables();
|
|
778
|
+
cfds[i]->imm()->RollbackMemtableFlush(
|
|
779
|
+
mems, /*rollback_succeeding_memtables=*/false);
|
|
780
|
+
}
|
|
781
|
+
}
|
|
682
782
|
}
|
|
683
783
|
|
|
684
784
|
if (s.ok()) {
|
|
@@ -782,7 +882,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
782
882
|
|
|
783
883
|
// Need to undo atomic flush if something went wrong, i.e. s is not OK and
|
|
784
884
|
// it is not because of CF drop.
|
|
785
|
-
if (!s.ok() && !s.IsColumnFamilyDropped()) {
|
|
885
|
+
if (!s.ok() && !s.IsColumnFamilyDropped() && !skip_set_bg_error) {
|
|
786
886
|
if (log_io_s.ok()) {
|
|
787
887
|
// Error while writing to MANIFEST.
|
|
788
888
|
// In fact, versions_->io_status() can also be the result of renaming
|
|
@@ -852,8 +952,8 @@ void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
|
|
|
852
952
|
}
|
|
853
953
|
}
|
|
854
954
|
mutex_.Lock();
|
|
855
|
-
// no need to signal bg_cv_ as it will be signaled at the end of the
|
|
856
|
-
// flush process.
|
|
955
|
+
// no need to signal bg_cv_ as it will be signaled at the end of the
|
|
956
|
+
// flush process.
|
|
857
957
|
}
|
|
858
958
|
|
|
859
959
|
void DBImpl::NotifyOnFlushCompleted(
|
|
@@ -912,26 +1012,14 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
|
|
|
912
1012
|
end_without_ts, "" /*trim_ts*/);
|
|
913
1013
|
}
|
|
914
1014
|
|
|
915
|
-
std::string begin_str;
|
|
916
|
-
|
|
1015
|
+
std::string begin_str, end_str;
|
|
1016
|
+
auto [begin, end] =
|
|
1017
|
+
MaybeAddTimestampsToRange(begin_without_ts, end_without_ts, ts_sz,
|
|
1018
|
+
&begin_str, &end_str, false /*exclusive_end*/);
|
|
917
1019
|
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
if (begin_without_ts != nullptr) {
|
|
922
|
-
AppendKeyWithMaxTimestamp(&begin_str, *begin_without_ts, ts_sz);
|
|
923
|
-
}
|
|
924
|
-
if (end_without_ts != nullptr) {
|
|
925
|
-
AppendKeyWithMinTimestamp(&end_str, *end_without_ts, ts_sz);
|
|
926
|
-
}
|
|
927
|
-
Slice begin(begin_str);
|
|
928
|
-
Slice end(end_str);
|
|
929
|
-
|
|
930
|
-
Slice* begin_with_ts = begin_without_ts ? &begin : nullptr;
|
|
931
|
-
Slice* end_with_ts = end_without_ts ? &end : nullptr;
|
|
932
|
-
|
|
933
|
-
return CompactRangeInternal(options, column_family, begin_with_ts,
|
|
934
|
-
end_with_ts, "" /*trim_ts*/);
|
|
1020
|
+
return CompactRangeInternal(
|
|
1021
|
+
options, column_family, begin.has_value() ? &begin.value() : nullptr,
|
|
1022
|
+
end.has_value() ? &end.value() : nullptr, "" /*trim_ts*/);
|
|
935
1023
|
}
|
|
936
1024
|
|
|
937
1025
|
Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
|
|
@@ -1066,7 +1154,6 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1066
1154
|
std::numeric_limits<uint64_t>::max(), trim_ts);
|
|
1067
1155
|
} else {
|
|
1068
1156
|
int first_overlapped_level = kInvalidLevel;
|
|
1069
|
-
int max_overlapped_level = kInvalidLevel;
|
|
1070
1157
|
{
|
|
1071
1158
|
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
|
|
1072
1159
|
Version* current_version = super_version->current;
|
|
@@ -1142,10 +1229,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1142
1229
|
begin, end);
|
|
1143
1230
|
}
|
|
1144
1231
|
if (overlap) {
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
}
|
|
1148
|
-
max_overlapped_level = level;
|
|
1232
|
+
first_overlapped_level = level;
|
|
1233
|
+
break;
|
|
1149
1234
|
}
|
|
1150
1235
|
}
|
|
1151
1236
|
CleanupSuperVersion(super_version);
|
|
@@ -1159,7 +1244,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1159
1244
|
end, exclusive, true /* disallow_trivial_move */,
|
|
1160
1245
|
std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
|
|
1161
1246
|
trim_ts);
|
|
1162
|
-
final_output_level =
|
|
1247
|
+
final_output_level = first_overlapped_level;
|
|
1163
1248
|
} else {
|
|
1164
1249
|
assert(cfd->ioptions()->compaction_style == kCompactionStyleLevel);
|
|
1165
1250
|
uint64_t next_file_number = versions_->current_next_file_number();
|
|
@@ -1171,7 +1256,29 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1171
1256
|
int level = first_overlapped_level;
|
|
1172
1257
|
final_output_level = level;
|
|
1173
1258
|
int output_level = 0, base_level = 0;
|
|
1174
|
-
|
|
1259
|
+
for (;;) {
|
|
1260
|
+
// Always allow L0 -> L1 compaction
|
|
1261
|
+
if (level > 0) {
|
|
1262
|
+
if (cfd->ioptions()->level_compaction_dynamic_level_bytes) {
|
|
1263
|
+
assert(final_output_level < cfd->ioptions()->num_levels);
|
|
1264
|
+
if (final_output_level + 1 == cfd->ioptions()->num_levels) {
|
|
1265
|
+
break;
|
|
1266
|
+
}
|
|
1267
|
+
} else {
|
|
1268
|
+
// TODO(cbi): there is still a race condition here where
|
|
1269
|
+
// if a background compaction compacts some file beyond
|
|
1270
|
+
// current()->storage_info()->num_non_empty_levels() right after
|
|
1271
|
+
// the check here.This should happen very infrequently and should
|
|
1272
|
+
// not happen once a user populates the last level of the LSM.
|
|
1273
|
+
InstrumentedMutexLock l(&mutex_);
|
|
1274
|
+
// num_non_empty_levels may be lower after a compaction, so
|
|
1275
|
+
// we check for >= here.
|
|
1276
|
+
if (final_output_level + 1 >=
|
|
1277
|
+
cfd->current()->storage_info()->num_non_empty_levels()) {
|
|
1278
|
+
break;
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
}
|
|
1175
1282
|
output_level = level + 1;
|
|
1176
1283
|
if (cfd->ioptions()->level_compaction_dynamic_level_bytes &&
|
|
1177
1284
|
level == 0) {
|
|
@@ -1203,17 +1310,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1203
1310
|
if (s.ok()) {
|
|
1204
1311
|
assert(final_output_level > 0);
|
|
1205
1312
|
// bottommost level intra-level compaction
|
|
1206
|
-
// TODO(cbi): this preserves earlier behavior where if
|
|
1207
|
-
// max_overlapped_level = 0 and bottommost_level_compaction is
|
|
1208
|
-
// kIfHaveCompactionFilter, we only do a L0 -> LBase compaction
|
|
1209
|
-
// and do not do intra-LBase compaction even when user configures
|
|
1210
|
-
// compaction filter. We may want to still do a LBase -> LBase
|
|
1211
|
-
// compaction in case there is some file in LBase that did not go
|
|
1212
|
-
// through L0 -> LBase compaction, and hence did not go through
|
|
1213
|
-
// compaction filter.
|
|
1214
1313
|
if ((options.bottommost_level_compaction ==
|
|
1215
1314
|
BottommostLevelCompaction::kIfHaveCompactionFilter &&
|
|
1216
|
-
max_overlapped_level != 0 &&
|
|
1217
1315
|
(cfd->ioptions()->compaction_filter != nullptr ||
|
|
1218
1316
|
cfd->ioptions()->compaction_filter_factory != nullptr)) ||
|
|
1219
1317
|
options.bottommost_level_compaction ==
|
|
@@ -1221,10 +1319,11 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1221
1319
|
options.bottommost_level_compaction ==
|
|
1222
1320
|
BottommostLevelCompaction::kForce) {
|
|
1223
1321
|
// Use `next_file_number` as `max_file_num_to_ignore` to avoid
|
|
1224
|
-
// rewriting newly compacted files when it is kForceOptimized
|
|
1322
|
+
// rewriting newly compacted files when it is kForceOptimized
|
|
1323
|
+
// or kIfHaveCompactionFilter with compaction filter set.
|
|
1225
1324
|
s = RunManualCompaction(
|
|
1226
1325
|
cfd, final_output_level, final_output_level, options, begin,
|
|
1227
|
-
end, exclusive,
|
|
1326
|
+
end, exclusive, true /* disallow_trivial_move */,
|
|
1228
1327
|
next_file_number /* max_file_num_to_ignore */, trim_ts);
|
|
1229
1328
|
}
|
|
1230
1329
|
}
|
|
@@ -1375,6 +1474,14 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1375
1474
|
}
|
|
1376
1475
|
}
|
|
1377
1476
|
|
|
1477
|
+
if (cfd->ioptions()->allow_ingest_behind &&
|
|
1478
|
+
output_level >= cfd->ioptions()->num_levels - 1) {
|
|
1479
|
+
return Status::InvalidArgument(
|
|
1480
|
+
"Exceed the maximum output level defined by "
|
|
1481
|
+
"the current compaction algorithm with ingest_behind --- " +
|
|
1482
|
+
std::to_string(cfd->ioptions()->num_levels - 1));
|
|
1483
|
+
}
|
|
1484
|
+
|
|
1378
1485
|
Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
|
|
1379
1486
|
&input_set, cf_meta, output_level);
|
|
1380
1487
|
TEST_SYNC_POINT("DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles");
|
|
@@ -1419,7 +1526,8 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1419
1526
|
// without releasing the lock, so we're guaranteed a compaction can be formed.
|
|
1420
1527
|
assert(c != nullptr);
|
|
1421
1528
|
|
|
1422
|
-
c->
|
|
1529
|
+
c->FinalizeInputInfo(version);
|
|
1530
|
+
|
|
1423
1531
|
// deletion compaction currently not allowed in CompactFiles.
|
|
1424
1532
|
assert(!c->deletion_compaction());
|
|
1425
1533
|
|
|
@@ -1469,7 +1577,12 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1469
1577
|
TEST_SYNC_POINT("CompactFilesImpl:3");
|
|
1470
1578
|
mutex_.Lock();
|
|
1471
1579
|
|
|
1472
|
-
|
|
1580
|
+
bool compaction_released = false;
|
|
1581
|
+
Status status =
|
|
1582
|
+
compaction_job.Install(*c->mutable_cf_options(), &compaction_released);
|
|
1583
|
+
if (!compaction_released) {
|
|
1584
|
+
c->ReleaseCompactionFiles(s);
|
|
1585
|
+
}
|
|
1473
1586
|
if (status.ok()) {
|
|
1474
1587
|
assert(compaction_job.io_status().ok());
|
|
1475
1588
|
InstallSuperVersionAndScheduleWork(c->column_family_data(),
|
|
@@ -1480,7 +1593,6 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1480
1593
|
// not check compaction_job.io_status() explicitly if we're not calling
|
|
1481
1594
|
// SetBGError
|
|
1482
1595
|
compaction_job.io_status().PermitUncheckedError();
|
|
1483
|
-
c->ReleaseCompactionFiles(s);
|
|
1484
1596
|
// Need to make sure SstFileManager does its bookkeeping
|
|
1485
1597
|
auto sfm = static_cast<SstFileManagerImpl*>(
|
|
1486
1598
|
immutable_db_options_.sst_file_manager.get());
|
|
@@ -1492,7 +1604,7 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1492
1604
|
|
|
1493
1605
|
if (compaction_job_info != nullptr) {
|
|
1494
1606
|
BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats,
|
|
1495
|
-
job_context->job_id,
|
|
1607
|
+
job_context->job_id, compaction_job_info);
|
|
1496
1608
|
}
|
|
1497
1609
|
|
|
1498
1610
|
if (status.ok()) {
|
|
@@ -1589,21 +1701,18 @@ void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
|
|
|
1589
1701
|
}
|
|
1590
1702
|
|
|
1591
1703
|
c->SetNotifyOnCompactionCompleted();
|
|
1592
|
-
Version* current = cfd->current();
|
|
1593
|
-
current->Ref();
|
|
1594
1704
|
// release lock while notifying events
|
|
1595
1705
|
mutex_.Unlock();
|
|
1596
1706
|
TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex");
|
|
1597
1707
|
{
|
|
1598
1708
|
CompactionJobInfo info{};
|
|
1599
|
-
BuildCompactionJobInfo(cfd, c, st, job_stats, job_id,
|
|
1709
|
+
BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, &info);
|
|
1600
1710
|
for (auto listener : immutable_db_options_.listeners) {
|
|
1601
1711
|
listener->OnCompactionBegin(this, info);
|
|
1602
1712
|
}
|
|
1603
1713
|
info.status.PermitUncheckedError();
|
|
1604
1714
|
}
|
|
1605
1715
|
mutex_.Lock();
|
|
1606
|
-
current->Unref();
|
|
1607
1716
|
}
|
|
1608
1717
|
|
|
1609
1718
|
void DBImpl::NotifyOnCompactionCompleted(
|
|
@@ -1621,21 +1730,17 @@ void DBImpl::NotifyOnCompactionCompleted(
|
|
|
1621
1730
|
return;
|
|
1622
1731
|
}
|
|
1623
1732
|
|
|
1624
|
-
Version* current = cfd->current();
|
|
1625
|
-
current->Ref();
|
|
1626
1733
|
// release lock while notifying events
|
|
1627
1734
|
mutex_.Unlock();
|
|
1628
1735
|
TEST_SYNC_POINT("DBImpl::NotifyOnCompactionCompleted::UnlockMutex");
|
|
1629
1736
|
{
|
|
1630
1737
|
CompactionJobInfo info{};
|
|
1631
|
-
BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id,
|
|
1632
|
-
&info);
|
|
1738
|
+
BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, &info);
|
|
1633
1739
|
for (auto listener : immutable_db_options_.listeners) {
|
|
1634
1740
|
listener->OnCompactionCompleted(this, info);
|
|
1635
1741
|
}
|
|
1636
1742
|
}
|
|
1637
1743
|
mutex_.Lock();
|
|
1638
|
-
current->Unref();
|
|
1639
1744
|
// no need to signal bg_cv_ as it will be signaled at the end of the
|
|
1640
1745
|
// flush process.
|
|
1641
1746
|
}
|
|
@@ -1758,7 +1863,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|
|
1758
1863
|
f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
|
|
1759
1864
|
f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
|
|
1760
1865
|
f->file_checksum, f->file_checksum_func_name, f->unique_id,
|
|
1761
|
-
f->compensated_range_deletion_size, f->tail_size
|
|
1866
|
+
f->compensated_range_deletion_size, f->tail_size,
|
|
1867
|
+
f->user_defined_timestamps_persisted);
|
|
1762
1868
|
}
|
|
1763
1869
|
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
|
|
1764
1870
|
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
|
|
@@ -1808,6 +1914,37 @@ int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) {
|
|
|
1808
1914
|
->mutable_cf_options.level0_stop_writes_trigger;
|
|
1809
1915
|
}
|
|
1810
1916
|
|
|
1917
|
+
Status DBImpl::FlushAllColumnFamilies(const FlushOptions& flush_options,
|
|
1918
|
+
FlushReason flush_reason) {
|
|
1919
|
+
mutex_.AssertHeld();
|
|
1920
|
+
Status status;
|
|
1921
|
+
if (immutable_db_options_.atomic_flush) {
|
|
1922
|
+
mutex_.Unlock();
|
|
1923
|
+
status = AtomicFlushMemTables(flush_options, flush_reason);
|
|
1924
|
+
if (status.IsColumnFamilyDropped()) {
|
|
1925
|
+
status = Status::OK();
|
|
1926
|
+
}
|
|
1927
|
+
mutex_.Lock();
|
|
1928
|
+
} else {
|
|
1929
|
+
for (auto cfd : versions_->GetRefedColumnFamilySet()) {
|
|
1930
|
+
if (cfd->IsDropped()) {
|
|
1931
|
+
continue;
|
|
1932
|
+
}
|
|
1933
|
+
mutex_.Unlock();
|
|
1934
|
+
status = FlushMemTable(cfd, flush_options, flush_reason);
|
|
1935
|
+
TEST_SYNC_POINT("DBImpl::FlushAllColumnFamilies:1");
|
|
1936
|
+
TEST_SYNC_POINT("DBImpl::FlushAllColumnFamilies:2");
|
|
1937
|
+
mutex_.Lock();
|
|
1938
|
+
if (!status.ok() && !status.IsColumnFamilyDropped()) {
|
|
1939
|
+
break;
|
|
1940
|
+
} else if (status.IsColumnFamilyDropped()) {
|
|
1941
|
+
status = Status::OK();
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
return status;
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1811
1948
|
Status DBImpl::Flush(const FlushOptions& flush_options,
|
|
1812
1949
|
ColumnFamilyHandle* column_family) {
|
|
1813
1950
|
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
|
|
@@ -2099,7 +2236,8 @@ void DBImpl::GenerateFlushRequest(const autovector<ColumnFamilyData*>& cfds,
|
|
|
2099
2236
|
// cfd may be null, see DBImpl::ScheduleFlushes
|
|
2100
2237
|
continue;
|
|
2101
2238
|
}
|
|
2102
|
-
uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID(
|
|
2239
|
+
uint64_t max_memtable_id = cfd->imm()->GetLatestMemTableID(
|
|
2240
|
+
immutable_db_options_.atomic_flush /* for_atomic_flush */);
|
|
2103
2241
|
req->cfd_to_max_mem_id_to_persist.emplace(cfd, max_memtable_id);
|
|
2104
2242
|
}
|
|
2105
2243
|
}
|
|
@@ -2143,15 +2281,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2143
2281
|
}
|
|
2144
2282
|
WaitForPendingWrites();
|
|
2145
2283
|
|
|
2146
|
-
if (
|
|
2147
|
-
(!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) {
|
|
2148
|
-
// Note that, when flush reason is kErrorRecoveryRetryFlush, during the
|
|
2149
|
-
// auto retry resume, we want to avoid creating new small memtables.
|
|
2150
|
-
// Therefore, SwitchMemtable will not be called. Also, since ResumeImpl
|
|
2151
|
-
// will iterate through all the CFs and call FlushMemtable during auto
|
|
2152
|
-
// retry resume, it is possible that in some CFs,
|
|
2153
|
-
// cfd->imm()->NumNotFlushed() = 0. In this case, so no flush request will
|
|
2154
|
-
// be created and scheduled, status::OK() will be returned.
|
|
2284
|
+
if (!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load()) {
|
|
2155
2285
|
s = SwitchMemtable(cfd, &context);
|
|
2156
2286
|
}
|
|
2157
2287
|
const uint64_t flush_memtable_id = std::numeric_limits<uint64_t>::max();
|
|
@@ -2160,10 +2290,10 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2160
2290
|
!cached_recoverable_state_empty_.load()) {
|
|
2161
2291
|
FlushRequest req{flush_reason, {{cfd, flush_memtable_id}}};
|
|
2162
2292
|
flush_reqs.emplace_back(std::move(req));
|
|
2163
|
-
memtable_ids_to_wait.emplace_back(
|
|
2293
|
+
memtable_ids_to_wait.emplace_back(
|
|
2294
|
+
cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */));
|
|
2164
2295
|
}
|
|
2165
|
-
if (immutable_db_options_.persist_stats_to_disk
|
|
2166
|
-
flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
|
|
2296
|
+
if (immutable_db_options_.persist_stats_to_disk) {
|
|
2167
2297
|
ColumnFamilyData* cfd_stats =
|
|
2168
2298
|
versions_->GetColumnFamilySet()->GetColumnFamily(
|
|
2169
2299
|
kPersistentStatsColumnFamilyName);
|
|
@@ -2189,7 +2319,8 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2189
2319
|
FlushRequest req{flush_reason, {{cfd_stats, flush_memtable_id}}};
|
|
2190
2320
|
flush_reqs.emplace_back(std::move(req));
|
|
2191
2321
|
memtable_ids_to_wait.emplace_back(
|
|
2192
|
-
cfd_stats->imm()->GetLatestMemTableID(
|
|
2322
|
+
cfd_stats->imm()->GetLatestMemTableID(
|
|
2323
|
+
false /* for_atomic_flush */));
|
|
2193
2324
|
}
|
|
2194
2325
|
}
|
|
2195
2326
|
}
|
|
@@ -2240,8 +2371,7 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2240
2371
|
}
|
|
2241
2372
|
s = WaitForFlushMemTables(
|
|
2242
2373
|
cfds, flush_memtable_ids,
|
|
2243
|
-
|
|
2244
|
-
flush_reason == FlushReason::kErrorRecoveryRetryFlush));
|
|
2374
|
+
flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */);
|
|
2245
2375
|
InstrumentedMutexLock lock_guard(&mutex_);
|
|
2246
2376
|
for (auto* tmp_cfd : cfds) {
|
|
2247
2377
|
tmp_cfd->UnrefAndTryDelete();
|
|
@@ -2336,8 +2466,7 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2336
2466
|
}
|
|
2337
2467
|
|
|
2338
2468
|
for (auto cfd : cfds) {
|
|
2339
|
-
if (
|
|
2340
|
-
flush_reason == FlushReason::kErrorRecoveryRetryFlush) {
|
|
2469
|
+
if (cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) {
|
|
2341
2470
|
continue;
|
|
2342
2471
|
}
|
|
2343
2472
|
cfd->Ref();
|
|
@@ -2382,8 +2511,7 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2382
2511
|
}
|
|
2383
2512
|
s = WaitForFlushMemTables(
|
|
2384
2513
|
cfds, flush_memtable_ids,
|
|
2385
|
-
|
|
2386
|
-
flush_reason == FlushReason::kErrorRecoveryRetryFlush));
|
|
2514
|
+
flush_reason == FlushReason::kErrorRecovery /* resuming_from_bg_err */);
|
|
2387
2515
|
InstrumentedMutexLock lock_guard(&mutex_);
|
|
2388
2516
|
for (auto* cfd : cfds) {
|
|
2389
2517
|
cfd->UnrefAndTryDelete();
|
|
@@ -2392,6 +2520,68 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2392
2520
|
return s;
|
|
2393
2521
|
}
|
|
2394
2522
|
|
|
2523
|
+
Status DBImpl::RetryFlushesForErrorRecovery(FlushReason flush_reason,
|
|
2524
|
+
bool wait) {
|
|
2525
|
+
mutex_.AssertHeld();
|
|
2526
|
+
assert(flush_reason == FlushReason::kErrorRecoveryRetryFlush ||
|
|
2527
|
+
flush_reason == FlushReason::kCatchUpAfterErrorRecovery);
|
|
2528
|
+
|
|
2529
|
+
// Collect referenced CFDs.
|
|
2530
|
+
autovector<ColumnFamilyData*> cfds;
|
|
2531
|
+
for (ColumnFamilyData* cfd : *versions_->GetColumnFamilySet()) {
|
|
2532
|
+
if (!cfd->IsDropped() && cfd->initialized() &&
|
|
2533
|
+
cfd->imm()->NumNotFlushed() != 0) {
|
|
2534
|
+
cfd->Ref();
|
|
2535
|
+
cfd->imm()->FlushRequested();
|
|
2536
|
+
cfds.push_back(cfd);
|
|
2537
|
+
}
|
|
2538
|
+
}
|
|
2539
|
+
|
|
2540
|
+
// Submit flush requests for all immutable memtables needing flush.
|
|
2541
|
+
// `flush_memtable_ids` will be populated such that all immutable
|
|
2542
|
+
// memtables eligible for flush are waited on before this function
|
|
2543
|
+
// returns.
|
|
2544
|
+
autovector<uint64_t> flush_memtable_ids;
|
|
2545
|
+
if (immutable_db_options_.atomic_flush) {
|
|
2546
|
+
FlushRequest flush_req;
|
|
2547
|
+
GenerateFlushRequest(cfds, flush_reason, &flush_req);
|
|
2548
|
+
SchedulePendingFlush(flush_req);
|
|
2549
|
+
for (auto& iter : flush_req.cfd_to_max_mem_id_to_persist) {
|
|
2550
|
+
flush_memtable_ids.push_back(iter.second);
|
|
2551
|
+
}
|
|
2552
|
+
} else {
|
|
2553
|
+
for (auto cfd : cfds) {
|
|
2554
|
+
flush_memtable_ids.push_back(
|
|
2555
|
+
cfd->imm()->GetLatestMemTableID(false /* for_atomic_flush */));
|
|
2556
|
+
// Impose no bound on the highest memtable ID flushed. There is no
|
|
2557
|
+
// reason to do so outside of atomic flush.
|
|
2558
|
+
FlushRequest flush_req{
|
|
2559
|
+
flush_reason,
|
|
2560
|
+
{{cfd,
|
|
2561
|
+
std::numeric_limits<uint64_t>::max() /* max_mem_id_to_persist */}}};
|
|
2562
|
+
SchedulePendingFlush(flush_req);
|
|
2563
|
+
}
|
|
2564
|
+
}
|
|
2565
|
+
MaybeScheduleFlushOrCompaction();
|
|
2566
|
+
|
|
2567
|
+
Status s;
|
|
2568
|
+
if (wait) {
|
|
2569
|
+
mutex_.Unlock();
|
|
2570
|
+
autovector<const uint64_t*> flush_memtable_id_ptrs;
|
|
2571
|
+
for (auto& flush_memtable_id : flush_memtable_ids) {
|
|
2572
|
+
flush_memtable_id_ptrs.push_back(&flush_memtable_id);
|
|
2573
|
+
}
|
|
2574
|
+
s = WaitForFlushMemTables(cfds, flush_memtable_id_ptrs,
|
|
2575
|
+
true /* resuming_from_bg_err */);
|
|
2576
|
+
mutex_.Lock();
|
|
2577
|
+
}
|
|
2578
|
+
|
|
2579
|
+
for (auto* cfd : cfds) {
|
|
2580
|
+
cfd->UnrefAndTryDelete();
|
|
2581
|
+
}
|
|
2582
|
+
return s;
|
|
2583
|
+
}
|
|
2584
|
+
|
|
2395
2585
|
// Calling FlushMemTable(), whether from DB::Flush() or from Backup Engine, can
|
|
2396
2586
|
// cause write stall, for example if one memtable is being flushed already.
|
|
2397
2587
|
// This method tries to avoid write stall (similar to CompactRange() behavior)
|
|
@@ -2455,8 +2645,11 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|
|
2455
2645
|
// check whether one extra immutable memtable or an extra L0 file would
|
|
2456
2646
|
// cause write stalling mode to be entered. It could still enter stall
|
|
2457
2647
|
// mode due to pending compaction bytes, but that's less common
|
|
2648
|
+
// No extra immutable Memtable will be created if the current Memtable is
|
|
2649
|
+
// empty.
|
|
2650
|
+
int mem_to_flush = cfd->mem()->IsEmpty() ? 0 : 1;
|
|
2458
2651
|
write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
2459
|
-
cfd->imm()->NumNotFlushed() +
|
|
2652
|
+
cfd->imm()->NumNotFlushed() + mem_to_flush,
|
|
2460
2653
|
vstorage->l0_delay_trigger_count() + 1,
|
|
2461
2654
|
vstorage->estimated_compaction_needed_bytes(),
|
|
2462
2655
|
mutable_cf_options, *cfd->ioptions())
|
|
@@ -2602,6 +2795,11 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
|
|
2602
2795
|
// There has been a hard error and this call is not part of the recovery
|
|
2603
2796
|
// sequence. Bail out here so we don't get into an endless loop of
|
|
2604
2797
|
// scheduling BG work which will again call this function
|
|
2798
|
+
//
|
|
2799
|
+
// Note that a non-recovery flush can still be scheduled if
|
|
2800
|
+
// error_handler_.IsRecoveryInProgress() returns true. We rely on
|
|
2801
|
+
// BackgroundCallFlush() to check flush reason and drop non-recovery
|
|
2802
|
+
// flushes.
|
|
2605
2803
|
return;
|
|
2606
2804
|
} else if (shutting_down_.load(std::memory_order_acquire)) {
|
|
2607
2805
|
// DB is being deleted; no more background compactions
|
|
@@ -2612,6 +2810,9 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
|
|
2612
2810
|
env_->GetBackgroundThreads(Env::Priority::HIGH) == 0;
|
|
2613
2811
|
while (!is_flush_pool_empty && unscheduled_flushes_ > 0 &&
|
|
2614
2812
|
bg_flush_scheduled_ < bg_job_limits.max_flushes) {
|
|
2813
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
2814
|
+
"DBImpl::MaybeScheduleFlushOrCompaction:BeforeSchedule",
|
|
2815
|
+
&unscheduled_flushes_);
|
|
2615
2816
|
bg_flush_scheduled_++;
|
|
2616
2817
|
FlushThreadArg* fta = new FlushThreadArg;
|
|
2617
2818
|
fta->db_ = this;
|
|
@@ -2721,7 +2922,7 @@ ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() {
|
|
|
2721
2922
|
|
|
2722
2923
|
DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() {
|
|
2723
2924
|
assert(!flush_queue_.empty());
|
|
2724
|
-
FlushRequest flush_req = flush_queue_.front();
|
|
2925
|
+
FlushRequest flush_req = std::move(flush_queue_.front());
|
|
2725
2926
|
flush_queue_.pop_front();
|
|
2726
2927
|
if (!immutable_db_options_.atomic_flush) {
|
|
2727
2928
|
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
@@ -2765,6 +2966,9 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
|
|
|
2765
2966
|
|
|
2766
2967
|
void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
2767
2968
|
mutex_.AssertHeld();
|
|
2969
|
+
if (reject_new_background_jobs_) {
|
|
2970
|
+
return;
|
|
2971
|
+
}
|
|
2768
2972
|
if (flush_req.cfd_to_max_mem_id_to_persist.empty()) {
|
|
2769
2973
|
return;
|
|
2770
2974
|
}
|
|
@@ -2794,6 +2998,9 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
|
2794
2998
|
|
|
2795
2999
|
void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
|
|
2796
3000
|
mutex_.AssertHeld();
|
|
3001
|
+
if (reject_new_background_jobs_) {
|
|
3002
|
+
return;
|
|
3003
|
+
}
|
|
2797
3004
|
if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) {
|
|
2798
3005
|
AddToCompactionQueue(cfd);
|
|
2799
3006
|
++unscheduled_compactions_;
|
|
@@ -2803,6 +3010,9 @@ void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
|
|
|
2803
3010
|
void DBImpl::SchedulePendingPurge(std::string fname, std::string dir_to_sync,
|
|
2804
3011
|
FileType type, uint64_t number, int job_id) {
|
|
2805
3012
|
mutex_.AssertHeld();
|
|
3013
|
+
if (reject_new_background_jobs_) {
|
|
3014
|
+
return;
|
|
3015
|
+
}
|
|
2806
3016
|
PurgeFileInfo file_info(fname, dir_to_sync, type, number, job_id);
|
|
2807
3017
|
purge_files_.insert({{number, std::move(file_info)}});
|
|
2808
3018
|
}
|
|
@@ -2891,6 +3101,7 @@ void DBImpl::UnscheduleFlushCallback(void* arg) {
|
|
|
2891
3101
|
|
|
2892
3102
|
Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
2893
3103
|
LogBuffer* log_buffer, FlushReason* reason,
|
|
3104
|
+
bool* flush_rescheduled_to_retain_udt,
|
|
2894
3105
|
Env::Priority thread_pri) {
|
|
2895
3106
|
mutex_.AssertHeld();
|
|
2896
3107
|
|
|
@@ -2916,14 +3127,61 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
|
2916
3127
|
autovector<ColumnFamilyData*> column_families_not_to_flush;
|
|
2917
3128
|
while (!flush_queue_.empty()) {
|
|
2918
3129
|
// This cfd is already referenced
|
|
2919
|
-
|
|
3130
|
+
FlushRequest flush_req = PopFirstFromFlushQueue();
|
|
2920
3131
|
FlushReason flush_reason = flush_req.flush_reason;
|
|
3132
|
+
if (!error_handler_.GetBGError().ok() && error_handler_.IsBGWorkStopped() &&
|
|
3133
|
+
flush_reason != FlushReason::kErrorRecovery &&
|
|
3134
|
+
flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
|
|
3135
|
+
// Stop non-recovery flush when bg work is stopped
|
|
3136
|
+
// Note that we drop the flush request here.
|
|
3137
|
+
// Recovery thread should schedule further flushes after bg error
|
|
3138
|
+
// is cleared.
|
|
3139
|
+
status = error_handler_.GetBGError();
|
|
3140
|
+
assert(!status.ok());
|
|
3141
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
3142
|
+
"[JOB %d] Abort flush due to background error %s",
|
|
3143
|
+
job_context->job_id, status.ToString().c_str());
|
|
3144
|
+
*reason = flush_reason;
|
|
3145
|
+
for (auto item : flush_req.cfd_to_max_mem_id_to_persist) {
|
|
3146
|
+
item.first->UnrefAndTryDelete();
|
|
3147
|
+
}
|
|
3148
|
+
return status;
|
|
3149
|
+
}
|
|
3150
|
+
if (!immutable_db_options_.atomic_flush &&
|
|
3151
|
+
ShouldRescheduleFlushRequestToRetainUDT(flush_req)) {
|
|
3152
|
+
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
3153
|
+
ColumnFamilyData* cfd =
|
|
3154
|
+
flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
3155
|
+
if (cfd->UnrefAndTryDelete()) {
|
|
3156
|
+
return Status::OK();
|
|
3157
|
+
}
|
|
3158
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
3159
|
+
"FlushRequest for column family %s is re-scheduled to "
|
|
3160
|
+
"retain user-defined timestamps.",
|
|
3161
|
+
cfd->GetName().c_str());
|
|
3162
|
+
// Reschedule the `FlushRequest` as is without checking dropped column
|
|
3163
|
+
// family etc. The follow-up job will do the check anyways, so save the
|
|
3164
|
+
// duplication. Column family is deduplicated by `SchdulePendingFlush` and
|
|
3165
|
+
// `PopFirstFromFlushQueue` contains at flush request enqueueing and
|
|
3166
|
+
// dequeueing time.
|
|
3167
|
+
// This flush request is rescheduled right after it's popped from the
|
|
3168
|
+
// queue while the db mutex is held, so there should be no other
|
|
3169
|
+
// FlushRequest for the same column family with higher `max_memtable_id`
|
|
3170
|
+
// in the queue to block the reschedule from succeeding.
|
|
3171
|
+
#ifndef NDEBUG
|
|
3172
|
+
flush_req.reschedule_count += 1;
|
|
3173
|
+
#endif /* !NDEBUG */
|
|
3174
|
+
SchedulePendingFlush(flush_req);
|
|
3175
|
+
*reason = flush_reason;
|
|
3176
|
+
*flush_rescheduled_to_retain_udt = true;
|
|
3177
|
+
return Status::TryAgain();
|
|
3178
|
+
}
|
|
2921
3179
|
superversion_contexts.clear();
|
|
2922
3180
|
superversion_contexts.reserve(
|
|
2923
3181
|
flush_req.cfd_to_max_mem_id_to_persist.size());
|
|
2924
3182
|
|
|
2925
|
-
for (const auto&
|
|
2926
|
-
|
|
3183
|
+
for (const auto& [cfd, max_memtable_id] :
|
|
3184
|
+
flush_req.cfd_to_max_mem_id_to_persist) {
|
|
2927
3185
|
if (cfd->GetMempurgeUsed()) {
|
|
2928
3186
|
// If imm() contains silent memtables (e.g.: because
|
|
2929
3187
|
// MemPurge was activated), requesting a flush will
|
|
@@ -2937,10 +3195,16 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
|
2937
3195
|
continue;
|
|
2938
3196
|
}
|
|
2939
3197
|
superversion_contexts.emplace_back(SuperVersionContext(true));
|
|
2940
|
-
bg_flush_args.emplace_back(cfd,
|
|
3198
|
+
bg_flush_args.emplace_back(cfd, max_memtable_id,
|
|
2941
3199
|
&(superversion_contexts.back()), flush_reason);
|
|
2942
3200
|
}
|
|
2943
|
-
|
|
3201
|
+
// `MaybeScheduleFlushOrCompaction` schedules as many `BackgroundCallFlush`
|
|
3202
|
+
// jobs as the number of `FlushRequest` in the `flush_queue_`, a.k.a
|
|
3203
|
+
// `unscheduled_flushes_`. So it's sufficient to make each `BackgroundFlush`
|
|
3204
|
+
// handle one `FlushRequest` and each have a Status returned.
|
|
3205
|
+
if (!bg_flush_args.empty() || !column_families_not_to_flush.empty()) {
|
|
3206
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundFlush:CheckFlushRequest:cb",
|
|
3207
|
+
const_cast<int*>(&flush_req.reschedule_count));
|
|
2944
3208
|
break;
|
|
2945
3209
|
}
|
|
2946
3210
|
}
|
|
@@ -3002,11 +3266,20 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
3002
3266
|
pending_outputs_inserted_elem(new std::list<uint64_t>::iterator(
|
|
3003
3267
|
CaptureCurrentFileNumberInPendingOutputs()));
|
|
3004
3268
|
FlushReason reason;
|
|
3005
|
-
|
|
3006
|
-
Status s =
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3269
|
+
bool flush_rescheduled_to_retain_udt = false;
|
|
3270
|
+
Status s =
|
|
3271
|
+
BackgroundFlush(&made_progress, &job_context, &log_buffer, &reason,
|
|
3272
|
+
&flush_rescheduled_to_retain_udt, thread_pri);
|
|
3273
|
+
if (s.IsTryAgain() && flush_rescheduled_to_retain_udt) {
|
|
3274
|
+
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
|
|
3275
|
+
mutex_.Unlock();
|
|
3276
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::AfterRetainUDTReschedule:cb", nullptr);
|
|
3277
|
+
immutable_db_options_.clock->SleepForMicroseconds(
|
|
3278
|
+
100000); // prevent hot loop
|
|
3279
|
+
mutex_.Lock();
|
|
3280
|
+
} else if (!s.ok() && !s.IsShutdownInProgress() &&
|
|
3281
|
+
!s.IsColumnFamilyDropped() &&
|
|
3282
|
+
reason != FlushReason::kErrorRecovery) {
|
|
3010
3283
|
// Wait a little bit before retrying background flush in
|
|
3011
3284
|
// case this is an environmental problem and we do not want to
|
|
3012
3285
|
// chew up resources for failed flushes for the duration of
|
|
@@ -3016,9 +3289,9 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
3016
3289
|
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
|
|
3017
3290
|
mutex_.Unlock();
|
|
3018
3291
|
ROCKS_LOG_ERROR(immutable_db_options_.info_log,
|
|
3019
|
-
"Waiting after background flush error: %s"
|
|
3292
|
+
"[JOB %d] Waiting after background flush error: %s"
|
|
3020
3293
|
"Accumulated background error counts: %" PRIu64,
|
|
3021
|
-
s.ToString().c_str(), error_cnt);
|
|
3294
|
+
job_context.job_id, s.ToString().c_str(), error_cnt);
|
|
3022
3295
|
log_buffer.FlushBufferToLog();
|
|
3023
3296
|
LogFlush(immutable_db_options_.info_log);
|
|
3024
3297
|
immutable_db_options_.clock->SleepForMicroseconds(1000000);
|
|
@@ -3027,29 +3300,33 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
3027
3300
|
|
|
3028
3301
|
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0");
|
|
3029
3302
|
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
|
|
3030
|
-
|
|
3031
|
-
//
|
|
3032
|
-
//
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3303
|
+
// There is no need to do these clean up if the flush job is rescheduled
|
|
3304
|
+
// to retain user-defined timestamps because the job doesn't get to the
|
|
3305
|
+
// stage of actually flushing the MemTables.
|
|
3306
|
+
if (!flush_rescheduled_to_retain_udt) {
|
|
3307
|
+
// If flush failed, we want to delete all temporary files that we might
|
|
3308
|
+
// have created. Thus, we force full scan in FindObsoleteFiles()
|
|
3309
|
+
FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
|
|
3310
|
+
!s.IsColumnFamilyDropped());
|
|
3311
|
+
// delete unnecessary files if any, this is done outside the mutex
|
|
3312
|
+
if (job_context.HaveSomethingToClean() ||
|
|
3313
|
+
job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
|
|
3314
|
+
mutex_.Unlock();
|
|
3315
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound");
|
|
3316
|
+
// Have to flush the info logs before bg_flush_scheduled_--
|
|
3317
|
+
// because if bg_flush_scheduled_ becomes 0 and the lock is
|
|
3318
|
+
// released, the deconstructor of DB can kick in and destroy all the
|
|
3319
|
+
// states of DB so info_log might not be available after that point.
|
|
3320
|
+
// It also applies to access other states that DB owns.
|
|
3321
|
+
log_buffer.FlushBufferToLog();
|
|
3322
|
+
if (job_context.HaveSomethingToDelete()) {
|
|
3323
|
+
PurgeObsoleteFiles(job_context);
|
|
3324
|
+
}
|
|
3325
|
+
job_context.Clean();
|
|
3326
|
+
mutex_.Lock();
|
|
3048
3327
|
}
|
|
3049
|
-
|
|
3050
|
-
mutex_.Lock();
|
|
3328
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp");
|
|
3051
3329
|
}
|
|
3052
|
-
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp");
|
|
3053
3330
|
|
|
3054
3331
|
assert(num_running_flushes_ > 0);
|
|
3055
3332
|
num_running_flushes_--;
|
|
@@ -3256,8 +3533,6 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3256
3533
|
|
|
3257
3534
|
std::unique_ptr<TaskLimiterToken> task_token;
|
|
3258
3535
|
|
|
3259
|
-
// InternalKey manual_end_storage;
|
|
3260
|
-
// InternalKey* manual_end = &manual_end_storage;
|
|
3261
3536
|
bool sfm_reserved_compact_space = false;
|
|
3262
3537
|
if (is_manual) {
|
|
3263
3538
|
ManualCompactionState* m = manual_compaction;
|
|
@@ -3393,6 +3668,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3393
3668
|
}
|
|
3394
3669
|
|
|
3395
3670
|
IOStatus io_s;
|
|
3671
|
+
bool compaction_released = false;
|
|
3396
3672
|
if (!c) {
|
|
3397
3673
|
// Nothing to do
|
|
3398
3674
|
ROCKS_LOG_BUFFER(log_buffer, "Compaction nothing to do");
|
|
@@ -3415,7 +3691,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3415
3691
|
}
|
|
3416
3692
|
status = versions_->LogAndApply(
|
|
3417
3693
|
c->column_family_data(), *c->mutable_cf_options(), read_options,
|
|
3418
|
-
c->edit(), &mutex_, directories_.GetDbDir()
|
|
3694
|
+
c->edit(), &mutex_, directories_.GetDbDir(),
|
|
3695
|
+
/*new_descriptor_log=*/false, /*column_family_options=*/nullptr,
|
|
3696
|
+
[&c, &compaction_released](const Status& s) {
|
|
3697
|
+
c->ReleaseCompactionFiles(s);
|
|
3698
|
+
compaction_released = true;
|
|
3699
|
+
});
|
|
3419
3700
|
io_s = versions_->io_status();
|
|
3420
3701
|
InstallSuperVersionAndScheduleWork(c->column_family_data(),
|
|
3421
3702
|
&job_context->superversion_contexts[0],
|
|
@@ -3423,6 +3704,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3423
3704
|
ROCKS_LOG_BUFFER(log_buffer, "[%s] Deleted %d files\n",
|
|
3424
3705
|
c->column_family_data()->GetName().c_str(),
|
|
3425
3706
|
c->num_input_files(0));
|
|
3707
|
+
if (status.ok() && io_s.ok()) {
|
|
3708
|
+
UpdateDeletionCompactionStats(c);
|
|
3709
|
+
}
|
|
3426
3710
|
*made_progress = true;
|
|
3427
3711
|
TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCompaction:AfterCompaction",
|
|
3428
3712
|
c->column_family_data());
|
|
@@ -3457,7 +3741,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3457
3741
|
f->oldest_blob_file_number, f->oldest_ancester_time,
|
|
3458
3742
|
f->file_creation_time, f->epoch_number, f->file_checksum,
|
|
3459
3743
|
f->file_checksum_func_name, f->unique_id,
|
|
3460
|
-
f->compensated_range_deletion_size, f->tail_size
|
|
3744
|
+
f->compensated_range_deletion_size, f->tail_size,
|
|
3745
|
+
f->user_defined_timestamps_persisted);
|
|
3461
3746
|
|
|
3462
3747
|
ROCKS_LOG_BUFFER(
|
|
3463
3748
|
log_buffer,
|
|
@@ -3480,7 +3765,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3480
3765
|
}
|
|
3481
3766
|
status = versions_->LogAndApply(
|
|
3482
3767
|
c->column_family_data(), *c->mutable_cf_options(), read_options,
|
|
3483
|
-
c->edit(), &mutex_, directories_.GetDbDir()
|
|
3768
|
+
c->edit(), &mutex_, directories_.GetDbDir(),
|
|
3769
|
+
/*new_descriptor_log=*/false, /*column_family_options=*/nullptr,
|
|
3770
|
+
[&c, &compaction_released](const Status& s) {
|
|
3771
|
+
c->ReleaseCompactionFiles(s);
|
|
3772
|
+
compaction_released = true;
|
|
3773
|
+
});
|
|
3484
3774
|
io_s = versions_->io_status();
|
|
3485
3775
|
// Use latest MutableCFOptions
|
|
3486
3776
|
InstallSuperVersionAndScheduleWork(c->column_family_data(),
|
|
@@ -3530,6 +3820,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3530
3820
|
// Transfer requested token, so it doesn't need to do it again.
|
|
3531
3821
|
ca->prepicked_compaction->task_token = std::move(task_token);
|
|
3532
3822
|
++bg_bottom_compaction_scheduled_;
|
|
3823
|
+
assert(c == nullptr);
|
|
3533
3824
|
env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca, Env::Priority::BOTTOM,
|
|
3534
3825
|
this, &DBImpl::UnscheduleCompactionCallback);
|
|
3535
3826
|
} else {
|
|
@@ -3573,8 +3864,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3573
3864
|
compaction_job.Run().PermitUncheckedError();
|
|
3574
3865
|
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:NonTrivial:AfterRun");
|
|
3575
3866
|
mutex_.Lock();
|
|
3576
|
-
|
|
3577
|
-
|
|
3867
|
+
status =
|
|
3868
|
+
compaction_job.Install(*c->mutable_cf_options(), &compaction_released);
|
|
3578
3869
|
io_s = compaction_job.io_status();
|
|
3579
3870
|
if (status.ok()) {
|
|
3580
3871
|
InstallSuperVersionAndScheduleWork(c->column_family_data(),
|
|
@@ -3593,7 +3884,23 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3593
3884
|
}
|
|
3594
3885
|
|
|
3595
3886
|
if (c != nullptr) {
|
|
3596
|
-
|
|
3887
|
+
if (!compaction_released) {
|
|
3888
|
+
c->ReleaseCompactionFiles(status);
|
|
3889
|
+
} else {
|
|
3890
|
+
#ifndef NDEBUG
|
|
3891
|
+
// Sanity checking that compaction files are freed.
|
|
3892
|
+
for (size_t i = 0; i < c->num_input_levels(); i++) {
|
|
3893
|
+
for (size_t j = 0; j < c->inputs(i)->size(); j++) {
|
|
3894
|
+
assert(!c->input(i, j)->being_compacted);
|
|
3895
|
+
}
|
|
3896
|
+
}
|
|
3897
|
+
std::unordered_set<Compaction*>* cip = c->column_family_data()
|
|
3898
|
+
->compaction_picker()
|
|
3899
|
+
->compactions_in_progress();
|
|
3900
|
+
assert(cip->find(c.get()) == cip->end());
|
|
3901
|
+
#endif
|
|
3902
|
+
}
|
|
3903
|
+
|
|
3597
3904
|
*made_progress = true;
|
|
3598
3905
|
|
|
3599
3906
|
// Need to make sure SstFileManager does its bookkeeping
|
|
@@ -3778,10 +4085,31 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
|
|
|
3778
4085
|
return false;
|
|
3779
4086
|
}
|
|
3780
4087
|
|
|
4088
|
+
void DBImpl::UpdateDeletionCompactionStats(
|
|
4089
|
+
const std::unique_ptr<Compaction>& c) {
|
|
4090
|
+
if (c == nullptr) {
|
|
4091
|
+
return;
|
|
4092
|
+
}
|
|
4093
|
+
|
|
4094
|
+
CompactionReason reason = c->compaction_reason();
|
|
4095
|
+
|
|
4096
|
+
switch (reason) {
|
|
4097
|
+
case CompactionReason::kFIFOMaxSize:
|
|
4098
|
+
RecordTick(stats_, FIFO_MAX_SIZE_COMPACTIONS);
|
|
4099
|
+
break;
|
|
4100
|
+
case CompactionReason::kFIFOTtl:
|
|
4101
|
+
RecordTick(stats_, FIFO_TTL_COMPACTIONS);
|
|
4102
|
+
break;
|
|
4103
|
+
default:
|
|
4104
|
+
assert(false);
|
|
4105
|
+
break;
|
|
4106
|
+
}
|
|
4107
|
+
}
|
|
4108
|
+
|
|
3781
4109
|
void DBImpl::BuildCompactionJobInfo(
|
|
3782
4110
|
const ColumnFamilyData* cfd, Compaction* c, const Status& st,
|
|
3783
4111
|
const CompactionJobStats& compaction_job_stats, const int job_id,
|
|
3784
|
-
|
|
4112
|
+
CompactionJobInfo* compaction_job_info) const {
|
|
3785
4113
|
assert(compaction_job_info != nullptr);
|
|
3786
4114
|
compaction_job_info->cf_id = cfd->GetID();
|
|
3787
4115
|
compaction_job_info->cf_name = cfd->GetName();
|
|
@@ -3791,7 +4119,12 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
3791
4119
|
compaction_job_info->base_input_level = c->start_level();
|
|
3792
4120
|
compaction_job_info->output_level = c->output_level();
|
|
3793
4121
|
compaction_job_info->stats = compaction_job_stats;
|
|
3794
|
-
|
|
4122
|
+
const auto& input_table_properties = c->GetInputTableProperties();
|
|
4123
|
+
const auto& output_table_properties = c->GetOutputTableProperties();
|
|
4124
|
+
compaction_job_info->table_properties.insert(input_table_properties.begin(),
|
|
4125
|
+
input_table_properties.end());
|
|
4126
|
+
compaction_job_info->table_properties.insert(output_table_properties.begin(),
|
|
4127
|
+
output_table_properties.end());
|
|
3795
4128
|
compaction_job_info->compaction_reason = c->compaction_reason();
|
|
3796
4129
|
compaction_job_info->compression = c->output_compression();
|
|
3797
4130
|
|
|
@@ -3805,15 +4138,9 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
3805
4138
|
compaction_job_info->input_files.push_back(fn);
|
|
3806
4139
|
compaction_job_info->input_file_infos.push_back(CompactionFileInfo{
|
|
3807
4140
|
static_cast<int>(i), file_number, fmd->oldest_blob_file_number});
|
|
3808
|
-
if (compaction_job_info->table_properties.count(fn) == 0) {
|
|
3809
|
-
std::shared_ptr<const TableProperties> tp;
|
|
3810
|
-
auto s = current->GetTableProperties(read_options, &tp, fmd, &fn);
|
|
3811
|
-
if (s.ok()) {
|
|
3812
|
-
compaction_job_info->table_properties[fn] = tp;
|
|
3813
|
-
}
|
|
3814
|
-
}
|
|
3815
4141
|
}
|
|
3816
4142
|
}
|
|
4143
|
+
|
|
3817
4144
|
for (const auto& newf : c->edit()->GetNewFiles()) {
|
|
3818
4145
|
const FileMetaData& meta = newf.second;
|
|
3819
4146
|
const FileDescriptor& desc = meta.fd;
|
|
@@ -3957,20 +4284,54 @@ void DBImpl::GetSnapshotContext(
|
|
|
3957
4284
|
*snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot);
|
|
3958
4285
|
}
|
|
3959
4286
|
|
|
3960
|
-
Status DBImpl::WaitForCompact(
|
|
4287
|
+
Status DBImpl::WaitForCompact(
|
|
4288
|
+
const WaitForCompactOptions& wait_for_compact_options) {
|
|
3961
4289
|
InstrumentedMutexLock l(&mutex_);
|
|
4290
|
+
if (wait_for_compact_options.flush) {
|
|
4291
|
+
Status s = DBImpl::FlushAllColumnFamilies(FlushOptions(),
|
|
4292
|
+
FlushReason::kManualFlush);
|
|
4293
|
+
if (!s.ok()) {
|
|
4294
|
+
return s;
|
|
4295
|
+
}
|
|
4296
|
+
} else if (wait_for_compact_options.close_db &&
|
|
4297
|
+
has_unpersisted_data_.load(std::memory_order_relaxed) &&
|
|
4298
|
+
!mutable_db_options_.avoid_flush_during_shutdown) {
|
|
4299
|
+
Status s =
|
|
4300
|
+
DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kShutDown);
|
|
4301
|
+
if (!s.ok()) {
|
|
4302
|
+
return s;
|
|
4303
|
+
}
|
|
4304
|
+
}
|
|
4305
|
+
TEST_SYNC_POINT("DBImpl::WaitForCompact:StartWaiting");
|
|
4306
|
+
const auto deadline = immutable_db_options_.clock->NowMicros() +
|
|
4307
|
+
wait_for_compact_options.timeout.count();
|
|
3962
4308
|
for (;;) {
|
|
3963
4309
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
3964
4310
|
return Status::ShutdownInProgress();
|
|
3965
4311
|
}
|
|
3966
|
-
if (bg_work_paused_ && abort_on_pause) {
|
|
4312
|
+
if (bg_work_paused_ && wait_for_compact_options.abort_on_pause) {
|
|
3967
4313
|
return Status::Aborted();
|
|
3968
4314
|
}
|
|
3969
4315
|
if ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
|
|
3970
4316
|
bg_flush_scheduled_ || unscheduled_compactions_ ||
|
|
3971
|
-
unscheduled_flushes_) &&
|
|
4317
|
+
unscheduled_flushes_ || error_handler_.IsRecoveryInProgress()) &&
|
|
3972
4318
|
(error_handler_.GetBGError().ok())) {
|
|
3973
|
-
|
|
4319
|
+
if (wait_for_compact_options.timeout.count()) {
|
|
4320
|
+
if (bg_cv_.TimedWait(deadline)) {
|
|
4321
|
+
return Status::TimedOut();
|
|
4322
|
+
}
|
|
4323
|
+
} else {
|
|
4324
|
+
bg_cv_.Wait();
|
|
4325
|
+
}
|
|
4326
|
+
} else if (wait_for_compact_options.close_db) {
|
|
4327
|
+
reject_new_background_jobs_ = true;
|
|
4328
|
+
mutex_.Unlock();
|
|
4329
|
+
Status s = Close();
|
|
4330
|
+
mutex_.Lock();
|
|
4331
|
+
if (!s.ok()) {
|
|
4332
|
+
reject_new_background_jobs_ = false;
|
|
4333
|
+
}
|
|
4334
|
+
return s;
|
|
3974
4335
|
} else {
|
|
3975
4336
|
return error_handler_.GetBGError();
|
|
3976
4337
|
}
|