@nxtedition/rocksdb 8.2.8 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
- package/deps/rocksdb/rocksdb/Makefile +22 -19
- package/deps/rocksdb/rocksdb/TARGETS +8 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
- package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
- package/deps/rocksdb/rocksdb/db/c.cc +169 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
- package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
- package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
- package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
- package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
- package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
- package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
- package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
- package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
- package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
- package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
- package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
- package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
- package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
- package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
- package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
- package/deps/rocksdb/rocksdb/env/env.cc +1 -2
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
- package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
- package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
- package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
- package/deps/rocksdb/rocksdb/options/options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
- package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
- package/deps/rocksdb/rocksdb/src.mk +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
- package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/format.cc +175 -33
- package/deps/rocksdb/rocksdb/table/format.h +63 -10
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
- package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
- package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
- package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
- package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
- package/deps/rocksdb/rocksdb/util/compression.h +110 -32
- package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
- package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
- package/deps/rocksdb/rocksdb/util/hash.h +7 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
- package/deps/rocksdb/rocksdb/util/math.h +58 -6
- package/deps/rocksdb/rocksdb/util/math128.h +29 -7
- package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
- package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
- package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#include "monitoring/thread_status_util.h"
|
|
22
22
|
#include "test_util/sync_point.h"
|
|
23
23
|
#include "util/cast_util.h"
|
|
24
|
+
#include "util/coding.h"
|
|
24
25
|
#include "util/concurrent_task_limiter_impl.h"
|
|
25
26
|
|
|
26
27
|
namespace ROCKSDB_NAMESPACE {
|
|
@@ -76,6 +77,40 @@ bool DBImpl::RequestCompactionToken(ColumnFamilyData* cfd, bool force,
|
|
|
76
77
|
return false;
|
|
77
78
|
}
|
|
78
79
|
|
|
80
|
+
bool DBImpl::ShouldRescheduleFlushRequestToRetainUDT(
|
|
81
|
+
const FlushRequest& flush_req) {
|
|
82
|
+
mutex_.AssertHeld();
|
|
83
|
+
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
84
|
+
ColumnFamilyData* cfd = flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
85
|
+
uint64_t max_memtable_id =
|
|
86
|
+
flush_req.cfd_to_max_mem_id_to_persist.begin()->second;
|
|
87
|
+
if (cfd->IsDropped() ||
|
|
88
|
+
!cfd->ShouldPostponeFlushToRetainUDT(max_memtable_id)) {
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
// Check if holding on the flush will cause entering write stall mode.
|
|
92
|
+
// Write stall entered because of the accumulation of write buffers can be
|
|
93
|
+
// alleviated if we continue with the flush instead of postponing it.
|
|
94
|
+
const auto& mutable_cf_options = *cfd->GetLatestMutableCFOptions();
|
|
95
|
+
|
|
96
|
+
// Taking the status of the active Memtable into consideration so that we are
|
|
97
|
+
// not just checking if DB is currently already in write stall mode.
|
|
98
|
+
int mem_to_flush = cfd->mem()->ApproximateMemoryUsageFast() >=
|
|
99
|
+
cfd->mem()->write_buffer_size() / 2
|
|
100
|
+
? 1
|
|
101
|
+
: 0;
|
|
102
|
+
WriteStallCondition write_stall =
|
|
103
|
+
ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
104
|
+
cfd->imm()->NumNotFlushed() + mem_to_flush, /*num_l0_files=*/0,
|
|
105
|
+
/*num_compaction_needed_bytes=*/0, mutable_cf_options,
|
|
106
|
+
*cfd->ioptions())
|
|
107
|
+
.first;
|
|
108
|
+
if (write_stall != WriteStallCondition::kNormal) {
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
return true;
|
|
112
|
+
}
|
|
113
|
+
|
|
79
114
|
IOStatus DBImpl::SyncClosedLogs(JobContext* job_context,
|
|
80
115
|
VersionEdit* synced_wals) {
|
|
81
116
|
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Start");
|
|
@@ -248,6 +283,24 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
248
283
|
// If the log sync failed, we do not need to pick memtable. Otherwise,
|
|
249
284
|
// num_flush_not_started_ needs to be rollback.
|
|
250
285
|
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
|
|
286
|
+
// Exit a flush due to bg error should not set bg error again.
|
|
287
|
+
bool skip_set_bg_error = false;
|
|
288
|
+
if (s.ok() && !error_handler_.GetBGError().ok() &&
|
|
289
|
+
error_handler_.IsBGWorkStopped() &&
|
|
290
|
+
flush_reason != FlushReason::kErrorRecovery &&
|
|
291
|
+
flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
|
|
292
|
+
// Error recovery in progress, should not pick memtable which excludes
|
|
293
|
+
// them from being picked up by recovery flush.
|
|
294
|
+
// This ensures that when bg error is set, no new flush can pick
|
|
295
|
+
// memtables.
|
|
296
|
+
skip_set_bg_error = true;
|
|
297
|
+
s = error_handler_.GetBGError();
|
|
298
|
+
assert(!s.ok());
|
|
299
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
300
|
+
"[JOB %d] Skip flush due to background error %s",
|
|
301
|
+
job_context->job_id, s.ToString().c_str());
|
|
302
|
+
}
|
|
303
|
+
|
|
251
304
|
if (s.ok()) {
|
|
252
305
|
flush_job.PickMemTable();
|
|
253
306
|
need_cancel = true;
|
|
@@ -268,7 +321,8 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
268
321
|
// is unlocked by the current thread.
|
|
269
322
|
if (s.ok()) {
|
|
270
323
|
s = flush_job.Run(&logs_with_prep_tracker_, &file_meta,
|
|
271
|
-
&switched_to_mempurge
|
|
324
|
+
&switched_to_mempurge, &skip_set_bg_error,
|
|
325
|
+
&error_handler_);
|
|
272
326
|
need_cancel = false;
|
|
273
327
|
}
|
|
274
328
|
|
|
@@ -309,7 +363,8 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
309
363
|
}
|
|
310
364
|
}
|
|
311
365
|
|
|
312
|
-
if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()
|
|
366
|
+
if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped() &&
|
|
367
|
+
!skip_set_bg_error) {
|
|
313
368
|
if (log_io_s.ok()) {
|
|
314
369
|
// Error while writing to MANIFEST.
|
|
315
370
|
// In fact, versions_->io_status() can also be the result of renaming
|
|
@@ -521,6 +576,21 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
521
576
|
pick_status.push_back(false);
|
|
522
577
|
}
|
|
523
578
|
|
|
579
|
+
bool flush_for_recovery =
|
|
580
|
+
bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
|
|
581
|
+
bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecoveryRetryFlush;
|
|
582
|
+
bool skip_set_bg_error = false;
|
|
583
|
+
|
|
584
|
+
if (s.ok() && !error_handler_.GetBGError().ok() &&
|
|
585
|
+
error_handler_.IsBGWorkStopped() && !flush_for_recovery) {
|
|
586
|
+
s = error_handler_.GetBGError();
|
|
587
|
+
skip_set_bg_error = true;
|
|
588
|
+
assert(!s.ok());
|
|
589
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
590
|
+
"[JOB %d] Skip flush due to background error %s",
|
|
591
|
+
job_context->job_id, s.ToString().c_str());
|
|
592
|
+
}
|
|
593
|
+
|
|
524
594
|
if (s.ok()) {
|
|
525
595
|
for (int i = 0; i != num_cfs; ++i) {
|
|
526
596
|
jobs[i]->PickMemTable();
|
|
@@ -585,7 +655,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
585
655
|
}
|
|
586
656
|
}
|
|
587
657
|
}
|
|
588
|
-
} else {
|
|
658
|
+
} else if (!skip_set_bg_error) {
|
|
659
|
+
// When `skip_set_bg_error` is true, no memtable is picked so
|
|
660
|
+
// there is no need to call Cancel() or RollbackMemtableFlush().
|
|
661
|
+
//
|
|
589
662
|
// Need to undo atomic flush if something went wrong, i.e. s is not OK and
|
|
590
663
|
// it is not because of CF drop.
|
|
591
664
|
// Have to cancel the flush jobs that have NOT executed because we need to
|
|
@@ -598,8 +671,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
598
671
|
for (int i = 0; i != num_cfs; ++i) {
|
|
599
672
|
if (exec_status[i].second.ok() && exec_status[i].first) {
|
|
600
673
|
auto& mems = jobs[i]->GetMemTables();
|
|
601
|
-
cfds[i]->imm()->RollbackMemtableFlush(
|
|
602
|
-
|
|
674
|
+
cfds[i]->imm()->RollbackMemtableFlush(
|
|
675
|
+
mems, /*rollback_succeeding_memtables=*/false);
|
|
603
676
|
}
|
|
604
677
|
}
|
|
605
678
|
}
|
|
@@ -641,10 +714,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
641
714
|
};
|
|
642
715
|
|
|
643
716
|
bool resuming_from_bg_err =
|
|
644
|
-
error_handler_.IsDBStopped() ||
|
|
645
|
-
(bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
|
|
646
|
-
bg_flush_args[0].flush_reason_ ==
|
|
647
|
-
FlushReason::kErrorRecoveryRetryFlush);
|
|
717
|
+
error_handler_.IsDBStopped() || flush_for_recovery;
|
|
648
718
|
while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) {
|
|
649
719
|
std::pair<Status, bool> res = wait_to_install_func();
|
|
650
720
|
|
|
@@ -655,15 +725,27 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
655
725
|
s = res.first;
|
|
656
726
|
break;
|
|
657
727
|
} else if (!res.second) {
|
|
728
|
+
// we are the oldest immutable memtable
|
|
729
|
+
break;
|
|
730
|
+
}
|
|
731
|
+
// We are not the oldest immutable memtable
|
|
732
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
733
|
+
"DBImpl::AtomicFlushMemTablesToOutputFiles:WaitCV", &res);
|
|
734
|
+
//
|
|
735
|
+
// If bg work is stopped, recovery thread first calls
|
|
736
|
+
// WaitForBackgroundWork() before proceeding to flush for recovery. This
|
|
737
|
+
// flush can block WaitForBackgroundWork() while waiting for recovery
|
|
738
|
+
// flush to install result. To avoid this deadlock, we should abort here
|
|
739
|
+
// if there is background error.
|
|
740
|
+
if (!flush_for_recovery && error_handler_.IsBGWorkStopped() &&
|
|
741
|
+
!error_handler_.GetBGError().ok()) {
|
|
742
|
+
s = error_handler_.GetBGError();
|
|
743
|
+
assert(!s.ok());
|
|
658
744
|
break;
|
|
659
745
|
}
|
|
660
746
|
atomic_flush_install_cv_.Wait();
|
|
661
747
|
|
|
662
|
-
resuming_from_bg_err =
|
|
663
|
-
error_handler_.IsDBStopped() ||
|
|
664
|
-
(bg_flush_args[0].flush_reason_ == FlushReason::kErrorRecovery ||
|
|
665
|
-
bg_flush_args[0].flush_reason_ ==
|
|
666
|
-
FlushReason::kErrorRecoveryRetryFlush);
|
|
748
|
+
resuming_from_bg_err = error_handler_.IsDBStopped() || flush_for_recovery;
|
|
667
749
|
}
|
|
668
750
|
|
|
669
751
|
if (!resuming_from_bg_err) {
|
|
@@ -679,6 +761,17 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
679
761
|
// installation.
|
|
680
762
|
s = error_handler_.GetRecoveryError();
|
|
681
763
|
}
|
|
764
|
+
// Since we are not installing these memtables, need to rollback
|
|
765
|
+
// to allow future flush job to pick up these memtables.
|
|
766
|
+
if (!s.ok()) {
|
|
767
|
+
for (int i = 0; i != num_cfs; ++i) {
|
|
768
|
+
assert(exec_status[i].first);
|
|
769
|
+
assert(exec_status[i].second.ok());
|
|
770
|
+
auto& mems = jobs[i]->GetMemTables();
|
|
771
|
+
cfds[i]->imm()->RollbackMemtableFlush(
|
|
772
|
+
mems, /*rollback_succeeding_memtables=*/false);
|
|
773
|
+
}
|
|
774
|
+
}
|
|
682
775
|
}
|
|
683
776
|
|
|
684
777
|
if (s.ok()) {
|
|
@@ -782,7 +875,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
782
875
|
|
|
783
876
|
// Need to undo atomic flush if something went wrong, i.e. s is not OK and
|
|
784
877
|
// it is not because of CF drop.
|
|
785
|
-
if (!s.ok() && !s.IsColumnFamilyDropped()) {
|
|
878
|
+
if (!s.ok() && !s.IsColumnFamilyDropped() && !skip_set_bg_error) {
|
|
786
879
|
if (log_io_s.ok()) {
|
|
787
880
|
// Error while writing to MANIFEST.
|
|
788
881
|
// In fact, versions_->io_status() can also be the result of renaming
|
|
@@ -852,8 +945,8 @@ void DBImpl::NotifyOnFlushBegin(ColumnFamilyData* cfd, FileMetaData* file_meta,
|
|
|
852
945
|
}
|
|
853
946
|
}
|
|
854
947
|
mutex_.Lock();
|
|
855
|
-
// no need to signal bg_cv_ as it will be signaled at the end of the
|
|
856
|
-
// flush process.
|
|
948
|
+
// no need to signal bg_cv_ as it will be signaled at the end of the
|
|
949
|
+
// flush process.
|
|
857
950
|
}
|
|
858
951
|
|
|
859
952
|
void DBImpl::NotifyOnFlushCompleted(
|
|
@@ -1066,7 +1159,6 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1066
1159
|
std::numeric_limits<uint64_t>::max(), trim_ts);
|
|
1067
1160
|
} else {
|
|
1068
1161
|
int first_overlapped_level = kInvalidLevel;
|
|
1069
|
-
int max_overlapped_level = kInvalidLevel;
|
|
1070
1162
|
{
|
|
1071
1163
|
SuperVersion* super_version = cfd->GetReferencedSuperVersion(this);
|
|
1072
1164
|
Version* current_version = super_version->current;
|
|
@@ -1142,10 +1234,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1142
1234
|
begin, end);
|
|
1143
1235
|
}
|
|
1144
1236
|
if (overlap) {
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
}
|
|
1148
|
-
max_overlapped_level = level;
|
|
1237
|
+
first_overlapped_level = level;
|
|
1238
|
+
break;
|
|
1149
1239
|
}
|
|
1150
1240
|
}
|
|
1151
1241
|
CleanupSuperVersion(super_version);
|
|
@@ -1159,7 +1249,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1159
1249
|
end, exclusive, true /* disallow_trivial_move */,
|
|
1160
1250
|
std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
|
|
1161
1251
|
trim_ts);
|
|
1162
|
-
final_output_level =
|
|
1252
|
+
final_output_level = first_overlapped_level;
|
|
1163
1253
|
} else {
|
|
1164
1254
|
assert(cfd->ioptions()->compaction_style == kCompactionStyleLevel);
|
|
1165
1255
|
uint64_t next_file_number = versions_->current_next_file_number();
|
|
@@ -1171,7 +1261,29 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1171
1261
|
int level = first_overlapped_level;
|
|
1172
1262
|
final_output_level = level;
|
|
1173
1263
|
int output_level = 0, base_level = 0;
|
|
1174
|
-
|
|
1264
|
+
for (;;) {
|
|
1265
|
+
// Always allow L0 -> L1 compaction
|
|
1266
|
+
if (level > 0) {
|
|
1267
|
+
if (cfd->ioptions()->level_compaction_dynamic_level_bytes) {
|
|
1268
|
+
assert(final_output_level < cfd->ioptions()->num_levels);
|
|
1269
|
+
if (final_output_level + 1 == cfd->ioptions()->num_levels) {
|
|
1270
|
+
break;
|
|
1271
|
+
}
|
|
1272
|
+
} else {
|
|
1273
|
+
// TODO(cbi): there is still a race condition here where
|
|
1274
|
+
// if a background compaction compacts some file beyond
|
|
1275
|
+
// current()->storage_info()->num_non_empty_levels() right after
|
|
1276
|
+
// the check here.This should happen very infrequently and should
|
|
1277
|
+
// not happen once a user populates the last level of the LSM.
|
|
1278
|
+
InstrumentedMutexLock l(&mutex_);
|
|
1279
|
+
// num_non_empty_levels may be lower after a compaction, so
|
|
1280
|
+
// we check for >= here.
|
|
1281
|
+
if (final_output_level + 1 >=
|
|
1282
|
+
cfd->current()->storage_info()->num_non_empty_levels()) {
|
|
1283
|
+
break;
|
|
1284
|
+
}
|
|
1285
|
+
}
|
|
1286
|
+
}
|
|
1175
1287
|
output_level = level + 1;
|
|
1176
1288
|
if (cfd->ioptions()->level_compaction_dynamic_level_bytes &&
|
|
1177
1289
|
level == 0) {
|
|
@@ -1203,17 +1315,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1203
1315
|
if (s.ok()) {
|
|
1204
1316
|
assert(final_output_level > 0);
|
|
1205
1317
|
// bottommost level intra-level compaction
|
|
1206
|
-
// TODO(cbi): this preserves earlier behavior where if
|
|
1207
|
-
// max_overlapped_level = 0 and bottommost_level_compaction is
|
|
1208
|
-
// kIfHaveCompactionFilter, we only do a L0 -> LBase compaction
|
|
1209
|
-
// and do not do intra-LBase compaction even when user configures
|
|
1210
|
-
// compaction filter. We may want to still do a LBase -> LBase
|
|
1211
|
-
// compaction in case there is some file in LBase that did not go
|
|
1212
|
-
// through L0 -> LBase compaction, and hence did not go through
|
|
1213
|
-
// compaction filter.
|
|
1214
1318
|
if ((options.bottommost_level_compaction ==
|
|
1215
1319
|
BottommostLevelCompaction::kIfHaveCompactionFilter &&
|
|
1216
|
-
max_overlapped_level != 0 &&
|
|
1217
1320
|
(cfd->ioptions()->compaction_filter != nullptr ||
|
|
1218
1321
|
cfd->ioptions()->compaction_filter_factory != nullptr)) ||
|
|
1219
1322
|
options.bottommost_level_compaction ==
|
|
@@ -1221,10 +1324,11 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
1221
1324
|
options.bottommost_level_compaction ==
|
|
1222
1325
|
BottommostLevelCompaction::kForce) {
|
|
1223
1326
|
// Use `next_file_number` as `max_file_num_to_ignore` to avoid
|
|
1224
|
-
// rewriting newly compacted files when it is kForceOptimized
|
|
1327
|
+
// rewriting newly compacted files when it is kForceOptimized
|
|
1328
|
+
// or kIfHaveCompactionFilter with compaction filter set.
|
|
1225
1329
|
s = RunManualCompaction(
|
|
1226
1330
|
cfd, final_output_level, final_output_level, options, begin,
|
|
1227
|
-
end, exclusive,
|
|
1331
|
+
end, exclusive, true /* disallow_trivial_move */,
|
|
1228
1332
|
next_file_number /* max_file_num_to_ignore */, trim_ts);
|
|
1229
1333
|
}
|
|
1230
1334
|
}
|
|
@@ -1375,6 +1479,14 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1375
1479
|
}
|
|
1376
1480
|
}
|
|
1377
1481
|
|
|
1482
|
+
if (cfd->ioptions()->allow_ingest_behind &&
|
|
1483
|
+
output_level >= cfd->ioptions()->num_levels - 1) {
|
|
1484
|
+
return Status::InvalidArgument(
|
|
1485
|
+
"Exceed the maximum output level defined by "
|
|
1486
|
+
"the current compaction algorithm with ingest_behind --- " +
|
|
1487
|
+
std::to_string(cfd->ioptions()->num_levels - 1));
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1378
1490
|
Status s = cfd->compaction_picker()->SanitizeCompactionInputFiles(
|
|
1379
1491
|
&input_set, cf_meta, output_level);
|
|
1380
1492
|
TEST_SYNC_POINT("DBImpl::CompactFilesImpl::PostSanitizeCompactionInputFiles");
|
|
@@ -1492,7 +1604,7 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1492
1604
|
|
|
1493
1605
|
if (compaction_job_info != nullptr) {
|
|
1494
1606
|
BuildCompactionJobInfo(cfd, c.get(), s, compaction_job_stats,
|
|
1495
|
-
job_context->job_id,
|
|
1607
|
+
job_context->job_id, compaction_job_info);
|
|
1496
1608
|
}
|
|
1497
1609
|
|
|
1498
1610
|
if (status.ok()) {
|
|
@@ -1589,21 +1701,18 @@ void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
|
|
|
1589
1701
|
}
|
|
1590
1702
|
|
|
1591
1703
|
c->SetNotifyOnCompactionCompleted();
|
|
1592
|
-
Version* current = cfd->current();
|
|
1593
|
-
current->Ref();
|
|
1594
1704
|
// release lock while notifying events
|
|
1595
1705
|
mutex_.Unlock();
|
|
1596
1706
|
TEST_SYNC_POINT("DBImpl::NotifyOnCompactionBegin::UnlockMutex");
|
|
1597
1707
|
{
|
|
1598
1708
|
CompactionJobInfo info{};
|
|
1599
|
-
BuildCompactionJobInfo(cfd, c, st, job_stats, job_id,
|
|
1709
|
+
BuildCompactionJobInfo(cfd, c, st, job_stats, job_id, &info);
|
|
1600
1710
|
for (auto listener : immutable_db_options_.listeners) {
|
|
1601
1711
|
listener->OnCompactionBegin(this, info);
|
|
1602
1712
|
}
|
|
1603
1713
|
info.status.PermitUncheckedError();
|
|
1604
1714
|
}
|
|
1605
1715
|
mutex_.Lock();
|
|
1606
|
-
current->Unref();
|
|
1607
1716
|
}
|
|
1608
1717
|
|
|
1609
1718
|
void DBImpl::NotifyOnCompactionCompleted(
|
|
@@ -1621,21 +1730,17 @@ void DBImpl::NotifyOnCompactionCompleted(
|
|
|
1621
1730
|
return;
|
|
1622
1731
|
}
|
|
1623
1732
|
|
|
1624
|
-
Version* current = cfd->current();
|
|
1625
|
-
current->Ref();
|
|
1626
1733
|
// release lock while notifying events
|
|
1627
1734
|
mutex_.Unlock();
|
|
1628
1735
|
TEST_SYNC_POINT("DBImpl::NotifyOnCompactionCompleted::UnlockMutex");
|
|
1629
1736
|
{
|
|
1630
1737
|
CompactionJobInfo info{};
|
|
1631
|
-
BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id,
|
|
1632
|
-
&info);
|
|
1738
|
+
BuildCompactionJobInfo(cfd, c, st, compaction_job_stats, job_id, &info);
|
|
1633
1739
|
for (auto listener : immutable_db_options_.listeners) {
|
|
1634
1740
|
listener->OnCompactionCompleted(this, info);
|
|
1635
1741
|
}
|
|
1636
1742
|
}
|
|
1637
1743
|
mutex_.Lock();
|
|
1638
|
-
current->Unref();
|
|
1639
1744
|
// no need to signal bg_cv_ as it will be signaled at the end of the
|
|
1640
1745
|
// flush process.
|
|
1641
1746
|
}
|
|
@@ -1758,7 +1863,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|
|
1758
1863
|
f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
|
|
1759
1864
|
f->oldest_ancester_time, f->file_creation_time, f->epoch_number,
|
|
1760
1865
|
f->file_checksum, f->file_checksum_func_name, f->unique_id,
|
|
1761
|
-
f->compensated_range_deletion_size, f->tail_size
|
|
1866
|
+
f->compensated_range_deletion_size, f->tail_size,
|
|
1867
|
+
f->user_defined_timestamps_persisted);
|
|
1762
1868
|
}
|
|
1763
1869
|
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
|
|
1764
1870
|
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
|
|
@@ -1808,6 +1914,37 @@ int DBImpl::Level0StopWriteTrigger(ColumnFamilyHandle* column_family) {
|
|
|
1808
1914
|
->mutable_cf_options.level0_stop_writes_trigger;
|
|
1809
1915
|
}
|
|
1810
1916
|
|
|
1917
|
+
Status DBImpl::FlushAllColumnFamilies(const FlushOptions& flush_options,
|
|
1918
|
+
FlushReason flush_reason) {
|
|
1919
|
+
mutex_.AssertHeld();
|
|
1920
|
+
Status status;
|
|
1921
|
+
if (immutable_db_options_.atomic_flush) {
|
|
1922
|
+
mutex_.Unlock();
|
|
1923
|
+
status = AtomicFlushMemTables(flush_options, flush_reason);
|
|
1924
|
+
if (status.IsColumnFamilyDropped()) {
|
|
1925
|
+
status = Status::OK();
|
|
1926
|
+
}
|
|
1927
|
+
mutex_.Lock();
|
|
1928
|
+
} else {
|
|
1929
|
+
for (auto cfd : versions_->GetRefedColumnFamilySet()) {
|
|
1930
|
+
if (cfd->IsDropped()) {
|
|
1931
|
+
continue;
|
|
1932
|
+
}
|
|
1933
|
+
mutex_.Unlock();
|
|
1934
|
+
status = FlushMemTable(cfd, flush_options, flush_reason);
|
|
1935
|
+
TEST_SYNC_POINT("DBImpl::FlushAllColumnFamilies:1");
|
|
1936
|
+
TEST_SYNC_POINT("DBImpl::FlushAllColumnFamilies:2");
|
|
1937
|
+
mutex_.Lock();
|
|
1938
|
+
if (!status.ok() && !status.IsColumnFamilyDropped()) {
|
|
1939
|
+
break;
|
|
1940
|
+
} else if (status.IsColumnFamilyDropped()) {
|
|
1941
|
+
status = Status::OK();
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
}
|
|
1945
|
+
return status;
|
|
1946
|
+
}
|
|
1947
|
+
|
|
1811
1948
|
Status DBImpl::Flush(const FlushOptions& flush_options,
|
|
1812
1949
|
ColumnFamilyHandle* column_family) {
|
|
1813
1950
|
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
|
|
@@ -2144,9 +2281,13 @@ Status DBImpl::FlushMemTable(ColumnFamilyData* cfd,
|
|
|
2144
2281
|
WaitForPendingWrites();
|
|
2145
2282
|
|
|
2146
2283
|
if (flush_reason != FlushReason::kErrorRecoveryRetryFlush &&
|
|
2284
|
+
flush_reason != FlushReason::kCatchUpAfterErrorRecovery &&
|
|
2147
2285
|
(!cfd->mem()->IsEmpty() || !cached_recoverable_state_empty_.load())) {
|
|
2148
2286
|
// Note that, when flush reason is kErrorRecoveryRetryFlush, during the
|
|
2149
2287
|
// auto retry resume, we want to avoid creating new small memtables.
|
|
2288
|
+
// If flush reason is kCatchUpAfterErrorRecovery, we try to flush any new
|
|
2289
|
+
// memtable that filled up during recovery, and we also want to avoid
|
|
2290
|
+
// switching memtable to create small memtables.
|
|
2150
2291
|
// Therefore, SwitchMemtable will not be called. Also, since ResumeImpl
|
|
2151
2292
|
// will iterate through all the CFs and call FlushMemtable during auto
|
|
2152
2293
|
// retry resume, it is possible that in some CFs,
|
|
@@ -2337,7 +2478,8 @@ Status DBImpl::AtomicFlushMemTables(
|
|
|
2337
2478
|
|
|
2338
2479
|
for (auto cfd : cfds) {
|
|
2339
2480
|
if ((cfd->mem()->IsEmpty() && cached_recoverable_state_empty_.load()) ||
|
|
2340
|
-
flush_reason == FlushReason::kErrorRecoveryRetryFlush
|
|
2481
|
+
flush_reason == FlushReason::kErrorRecoveryRetryFlush ||
|
|
2482
|
+
flush_reason == FlushReason::kCatchUpAfterErrorRecovery) {
|
|
2341
2483
|
continue;
|
|
2342
2484
|
}
|
|
2343
2485
|
cfd->Ref();
|
|
@@ -2455,8 +2597,11 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|
|
2455
2597
|
// check whether one extra immutable memtable or an extra L0 file would
|
|
2456
2598
|
// cause write stalling mode to be entered. It could still enter stall
|
|
2457
2599
|
// mode due to pending compaction bytes, but that's less common
|
|
2600
|
+
// No extra immutable Memtable will be created if the current Memtable is
|
|
2601
|
+
// empty.
|
|
2602
|
+
int mem_to_flush = cfd->mem()->IsEmpty() ? 0 : 1;
|
|
2458
2603
|
write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
2459
|
-
cfd->imm()->NumNotFlushed() +
|
|
2604
|
+
cfd->imm()->NumNotFlushed() + mem_to_flush,
|
|
2460
2605
|
vstorage->l0_delay_trigger_count() + 1,
|
|
2461
2606
|
vstorage->estimated_compaction_needed_bytes(),
|
|
2462
2607
|
mutable_cf_options, *cfd->ioptions())
|
|
@@ -2602,6 +2747,11 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
|
|
2602
2747
|
// There has been a hard error and this call is not part of the recovery
|
|
2603
2748
|
// sequence. Bail out here so we don't get into an endless loop of
|
|
2604
2749
|
// scheduling BG work which will again call this function
|
|
2750
|
+
//
|
|
2751
|
+
// Note that a non-recovery flush can still be scheduled if
|
|
2752
|
+
// error_handler_.IsRecoveryInProgress() returns true. We rely on
|
|
2753
|
+
// BackgroundCallFlush() to check flush reason and drop non-recovery
|
|
2754
|
+
// flushes.
|
|
2605
2755
|
return;
|
|
2606
2756
|
} else if (shutting_down_.load(std::memory_order_acquire)) {
|
|
2607
2757
|
// DB is being deleted; no more background compactions
|
|
@@ -2612,6 +2762,9 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
|
|
2612
2762
|
env_->GetBackgroundThreads(Env::Priority::HIGH) == 0;
|
|
2613
2763
|
while (!is_flush_pool_empty && unscheduled_flushes_ > 0 &&
|
|
2614
2764
|
bg_flush_scheduled_ < bg_job_limits.max_flushes) {
|
|
2765
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
2766
|
+
"DBImpl::MaybeScheduleFlushOrCompaction:BeforeSchedule",
|
|
2767
|
+
&unscheduled_flushes_);
|
|
2615
2768
|
bg_flush_scheduled_++;
|
|
2616
2769
|
FlushThreadArg* fta = new FlushThreadArg;
|
|
2617
2770
|
fta->db_ = this;
|
|
@@ -2721,7 +2874,7 @@ ColumnFamilyData* DBImpl::PopFirstFromCompactionQueue() {
|
|
|
2721
2874
|
|
|
2722
2875
|
DBImpl::FlushRequest DBImpl::PopFirstFromFlushQueue() {
|
|
2723
2876
|
assert(!flush_queue_.empty());
|
|
2724
|
-
FlushRequest flush_req = flush_queue_.front();
|
|
2877
|
+
FlushRequest flush_req = std::move(flush_queue_.front());
|
|
2725
2878
|
flush_queue_.pop_front();
|
|
2726
2879
|
if (!immutable_db_options_.atomic_flush) {
|
|
2727
2880
|
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
@@ -2765,6 +2918,9 @@ ColumnFamilyData* DBImpl::PickCompactionFromQueue(
|
|
|
2765
2918
|
|
|
2766
2919
|
void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
2767
2920
|
mutex_.AssertHeld();
|
|
2921
|
+
if (reject_new_background_jobs_) {
|
|
2922
|
+
return;
|
|
2923
|
+
}
|
|
2768
2924
|
if (flush_req.cfd_to_max_mem_id_to_persist.empty()) {
|
|
2769
2925
|
return;
|
|
2770
2926
|
}
|
|
@@ -2794,6 +2950,9 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req) {
|
|
|
2794
2950
|
|
|
2795
2951
|
void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
|
|
2796
2952
|
mutex_.AssertHeld();
|
|
2953
|
+
if (reject_new_background_jobs_) {
|
|
2954
|
+
return;
|
|
2955
|
+
}
|
|
2797
2956
|
if (!cfd->queued_for_compaction() && cfd->NeedsCompaction()) {
|
|
2798
2957
|
AddToCompactionQueue(cfd);
|
|
2799
2958
|
++unscheduled_compactions_;
|
|
@@ -2803,6 +2962,9 @@ void DBImpl::SchedulePendingCompaction(ColumnFamilyData* cfd) {
|
|
|
2803
2962
|
void DBImpl::SchedulePendingPurge(std::string fname, std::string dir_to_sync,
|
|
2804
2963
|
FileType type, uint64_t number, int job_id) {
|
|
2805
2964
|
mutex_.AssertHeld();
|
|
2965
|
+
if (reject_new_background_jobs_) {
|
|
2966
|
+
return;
|
|
2967
|
+
}
|
|
2806
2968
|
PurgeFileInfo file_info(fname, dir_to_sync, type, number, job_id);
|
|
2807
2969
|
purge_files_.insert({{number, std::move(file_info)}});
|
|
2808
2970
|
}
|
|
@@ -2891,6 +3053,7 @@ void DBImpl::UnscheduleFlushCallback(void* arg) {
|
|
|
2891
3053
|
|
|
2892
3054
|
Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
2893
3055
|
LogBuffer* log_buffer, FlushReason* reason,
|
|
3056
|
+
bool* flush_rescheduled_to_retain_udt,
|
|
2894
3057
|
Env::Priority thread_pri) {
|
|
2895
3058
|
mutex_.AssertHeld();
|
|
2896
3059
|
|
|
@@ -2916,14 +3079,61 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
|
2916
3079
|
autovector<ColumnFamilyData*> column_families_not_to_flush;
|
|
2917
3080
|
while (!flush_queue_.empty()) {
|
|
2918
3081
|
// This cfd is already referenced
|
|
2919
|
-
|
|
3082
|
+
FlushRequest flush_req = PopFirstFromFlushQueue();
|
|
2920
3083
|
FlushReason flush_reason = flush_req.flush_reason;
|
|
3084
|
+
if (!error_handler_.GetBGError().ok() && error_handler_.IsBGWorkStopped() &&
|
|
3085
|
+
flush_reason != FlushReason::kErrorRecovery &&
|
|
3086
|
+
flush_reason != FlushReason::kErrorRecoveryRetryFlush) {
|
|
3087
|
+
// Stop non-recovery flush when bg work is stopped
|
|
3088
|
+
// Note that we drop the flush request here.
|
|
3089
|
+
// Recovery thread should schedule further flushes after bg error
|
|
3090
|
+
// is cleared.
|
|
3091
|
+
status = error_handler_.GetBGError();
|
|
3092
|
+
assert(!status.ok());
|
|
3093
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
3094
|
+
"[JOB %d] Abort flush due to background error %s",
|
|
3095
|
+
job_context->job_id, status.ToString().c_str());
|
|
3096
|
+
*reason = flush_reason;
|
|
3097
|
+
for (auto item : flush_req.cfd_to_max_mem_id_to_persist) {
|
|
3098
|
+
item.first->UnrefAndTryDelete();
|
|
3099
|
+
}
|
|
3100
|
+
return status;
|
|
3101
|
+
}
|
|
3102
|
+
if (!immutable_db_options_.atomic_flush &&
|
|
3103
|
+
ShouldRescheduleFlushRequestToRetainUDT(flush_req)) {
|
|
3104
|
+
assert(flush_req.cfd_to_max_mem_id_to_persist.size() == 1);
|
|
3105
|
+
ColumnFamilyData* cfd =
|
|
3106
|
+
flush_req.cfd_to_max_mem_id_to_persist.begin()->first;
|
|
3107
|
+
if (cfd->UnrefAndTryDelete()) {
|
|
3108
|
+
return Status::OK();
|
|
3109
|
+
}
|
|
3110
|
+
ROCKS_LOG_BUFFER(log_buffer,
|
|
3111
|
+
"FlushRequest for column family %s is re-scheduled to "
|
|
3112
|
+
"retain user-defined timestamps.",
|
|
3113
|
+
cfd->GetName().c_str());
|
|
3114
|
+
// Reschedule the `FlushRequest` as is without checking dropped column
|
|
3115
|
+
// family etc. The follow-up job will do the check anyways, so save the
|
|
3116
|
+
// duplication. Column family is deduplicated by `SchdulePendingFlush` and
|
|
3117
|
+
// `PopFirstFromFlushQueue` contains at flush request enqueueing and
|
|
3118
|
+
// dequeueing time.
|
|
3119
|
+
// This flush request is rescheduled right after it's popped from the
|
|
3120
|
+
// queue while the db mutex is held, so there should be no other
|
|
3121
|
+
// FlushRequest for the same column family with higher `max_memtable_id`
|
|
3122
|
+
// in the queue to block the reschedule from succeeding.
|
|
3123
|
+
#ifndef NDEBUG
|
|
3124
|
+
flush_req.reschedule_count += 1;
|
|
3125
|
+
#endif /* !NDEBUG */
|
|
3126
|
+
SchedulePendingFlush(flush_req);
|
|
3127
|
+
*reason = flush_reason;
|
|
3128
|
+
*flush_rescheduled_to_retain_udt = true;
|
|
3129
|
+
return Status::TryAgain();
|
|
3130
|
+
}
|
|
2921
3131
|
superversion_contexts.clear();
|
|
2922
3132
|
superversion_contexts.reserve(
|
|
2923
3133
|
flush_req.cfd_to_max_mem_id_to_persist.size());
|
|
2924
3134
|
|
|
2925
|
-
for (const auto&
|
|
2926
|
-
|
|
3135
|
+
for (const auto& [cfd, max_memtable_id] :
|
|
3136
|
+
flush_req.cfd_to_max_mem_id_to_persist) {
|
|
2927
3137
|
if (cfd->GetMempurgeUsed()) {
|
|
2928
3138
|
// If imm() contains silent memtables (e.g.: because
|
|
2929
3139
|
// MemPurge was activated), requesting a flush will
|
|
@@ -2937,10 +3147,16 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
|
2937
3147
|
continue;
|
|
2938
3148
|
}
|
|
2939
3149
|
superversion_contexts.emplace_back(SuperVersionContext(true));
|
|
2940
|
-
bg_flush_args.emplace_back(cfd,
|
|
3150
|
+
bg_flush_args.emplace_back(cfd, max_memtable_id,
|
|
2941
3151
|
&(superversion_contexts.back()), flush_reason);
|
|
2942
3152
|
}
|
|
2943
|
-
|
|
3153
|
+
// `MaybeScheduleFlushOrCompaction` schedules as many `BackgroundCallFlush`
|
|
3154
|
+
// jobs as the number of `FlushRequest` in the `flush_queue_`, a.k.a
|
|
3155
|
+
// `unscheduled_flushes_`. So it's sufficient to make each `BackgroundFlush`
|
|
3156
|
+
// handle one `FlushRequest` and each have a Status returned.
|
|
3157
|
+
if (!bg_flush_args.empty() || !column_families_not_to_flush.empty()) {
|
|
3158
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundFlush:CheckFlushRequest:cb",
|
|
3159
|
+
const_cast<int*>(&flush_req.reschedule_count));
|
|
2944
3160
|
break;
|
|
2945
3161
|
}
|
|
2946
3162
|
}
|
|
@@ -3002,11 +3218,20 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
3002
3218
|
pending_outputs_inserted_elem(new std::list<uint64_t>::iterator(
|
|
3003
3219
|
CaptureCurrentFileNumberInPendingOutputs()));
|
|
3004
3220
|
FlushReason reason;
|
|
3005
|
-
|
|
3006
|
-
Status s =
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3221
|
+
bool flush_rescheduled_to_retain_udt = false;
|
|
3222
|
+
Status s =
|
|
3223
|
+
BackgroundFlush(&made_progress, &job_context, &log_buffer, &reason,
|
|
3224
|
+
&flush_rescheduled_to_retain_udt, thread_pri);
|
|
3225
|
+
if (s.IsTryAgain() && flush_rescheduled_to_retain_udt) {
|
|
3226
|
+
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
|
|
3227
|
+
mutex_.Unlock();
|
|
3228
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::AfterRetainUDTReschedule:cb", nullptr);
|
|
3229
|
+
immutable_db_options_.clock->SleepForMicroseconds(
|
|
3230
|
+
100000); // prevent hot loop
|
|
3231
|
+
mutex_.Lock();
|
|
3232
|
+
} else if (!s.ok() && !s.IsShutdownInProgress() &&
|
|
3233
|
+
!s.IsColumnFamilyDropped() &&
|
|
3234
|
+
reason != FlushReason::kErrorRecovery) {
|
|
3010
3235
|
// Wait a little bit before retrying background flush in
|
|
3011
3236
|
// case this is an environmental problem and we do not want to
|
|
3012
3237
|
// chew up resources for failed flushes for the duration of
|
|
@@ -3016,9 +3241,9 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
3016
3241
|
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
|
|
3017
3242
|
mutex_.Unlock();
|
|
3018
3243
|
ROCKS_LOG_ERROR(immutable_db_options_.info_log,
|
|
3019
|
-
"Waiting after background flush error: %s"
|
|
3244
|
+
"[JOB %d] Waiting after background flush error: %s"
|
|
3020
3245
|
"Accumulated background error counts: %" PRIu64,
|
|
3021
|
-
s.ToString().c_str(), error_cnt);
|
|
3246
|
+
job_context.job_id, s.ToString().c_str(), error_cnt);
|
|
3022
3247
|
log_buffer.FlushBufferToLog();
|
|
3023
3248
|
LogFlush(immutable_db_options_.info_log);
|
|
3024
3249
|
immutable_db_options_.clock->SleepForMicroseconds(1000000);
|
|
@@ -3027,29 +3252,33 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
3027
3252
|
|
|
3028
3253
|
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FlushFinish:0");
|
|
3029
3254
|
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
|
|
3030
|
-
|
|
3031
|
-
//
|
|
3032
|
-
//
|
|
3033
|
-
|
|
3034
|
-
|
|
3035
|
-
|
|
3036
|
-
|
|
3037
|
-
|
|
3038
|
-
|
|
3039
|
-
|
|
3040
|
-
|
|
3041
|
-
|
|
3042
|
-
|
|
3043
|
-
|
|
3044
|
-
|
|
3045
|
-
|
|
3046
|
-
|
|
3047
|
-
|
|
3255
|
+
// There is no need to do these clean up if the flush job is rescheduled
|
|
3256
|
+
// to retain user-defined timestamps because the job doesn't get to the
|
|
3257
|
+
// stage of actually flushing the MemTables.
|
|
3258
|
+
if (!flush_rescheduled_to_retain_udt) {
|
|
3259
|
+
// If flush failed, we want to delete all temporary files that we might
|
|
3260
|
+
// have created. Thus, we force full scan in FindObsoleteFiles()
|
|
3261
|
+
FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
|
|
3262
|
+
!s.IsColumnFamilyDropped());
|
|
3263
|
+
// delete unnecessary files if any, this is done outside the mutex
|
|
3264
|
+
if (job_context.HaveSomethingToClean() ||
|
|
3265
|
+
job_context.HaveSomethingToDelete() || !log_buffer.IsEmpty()) {
|
|
3266
|
+
mutex_.Unlock();
|
|
3267
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:FilesFound");
|
|
3268
|
+
// Have to flush the info logs before bg_flush_scheduled_--
|
|
3269
|
+
// because if bg_flush_scheduled_ becomes 0 and the lock is
|
|
3270
|
+
// released, the deconstructor of DB can kick in and destroy all the
|
|
3271
|
+
// states of DB so info_log might not be available after that point.
|
|
3272
|
+
// It also applies to access other states that DB owns.
|
|
3273
|
+
log_buffer.FlushBufferToLog();
|
|
3274
|
+
if (job_context.HaveSomethingToDelete()) {
|
|
3275
|
+
PurgeObsoleteFiles(job_context);
|
|
3276
|
+
}
|
|
3277
|
+
job_context.Clean();
|
|
3278
|
+
mutex_.Lock();
|
|
3048
3279
|
}
|
|
3049
|
-
|
|
3050
|
-
mutex_.Lock();
|
|
3280
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp");
|
|
3051
3281
|
}
|
|
3052
|
-
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:ContextCleanedUp");
|
|
3053
3282
|
|
|
3054
3283
|
assert(num_running_flushes_ > 0);
|
|
3055
3284
|
num_running_flushes_--;
|
|
@@ -3457,7 +3686,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3457
3686
|
f->oldest_blob_file_number, f->oldest_ancester_time,
|
|
3458
3687
|
f->file_creation_time, f->epoch_number, f->file_checksum,
|
|
3459
3688
|
f->file_checksum_func_name, f->unique_id,
|
|
3460
|
-
f->compensated_range_deletion_size, f->tail_size
|
|
3689
|
+
f->compensated_range_deletion_size, f->tail_size,
|
|
3690
|
+
f->user_defined_timestamps_persisted);
|
|
3461
3691
|
|
|
3462
3692
|
ROCKS_LOG_BUFFER(
|
|
3463
3693
|
log_buffer,
|
|
@@ -3781,7 +4011,7 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
|
|
|
3781
4011
|
void DBImpl::BuildCompactionJobInfo(
|
|
3782
4012
|
const ColumnFamilyData* cfd, Compaction* c, const Status& st,
|
|
3783
4013
|
const CompactionJobStats& compaction_job_stats, const int job_id,
|
|
3784
|
-
|
|
4014
|
+
CompactionJobInfo* compaction_job_info) const {
|
|
3785
4015
|
assert(compaction_job_info != nullptr);
|
|
3786
4016
|
compaction_job_info->cf_id = cfd->GetID();
|
|
3787
4017
|
compaction_job_info->cf_name = cfd->GetName();
|
|
@@ -3791,7 +4021,7 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
3791
4021
|
compaction_job_info->base_input_level = c->start_level();
|
|
3792
4022
|
compaction_job_info->output_level = c->output_level();
|
|
3793
4023
|
compaction_job_info->stats = compaction_job_stats;
|
|
3794
|
-
compaction_job_info->table_properties = c->
|
|
4024
|
+
compaction_job_info->table_properties = c->GetTableProperties();
|
|
3795
4025
|
compaction_job_info->compaction_reason = c->compaction_reason();
|
|
3796
4026
|
compaction_job_info->compression = c->output_compression();
|
|
3797
4027
|
|
|
@@ -3805,15 +4035,9 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
3805
4035
|
compaction_job_info->input_files.push_back(fn);
|
|
3806
4036
|
compaction_job_info->input_file_infos.push_back(CompactionFileInfo{
|
|
3807
4037
|
static_cast<int>(i), file_number, fmd->oldest_blob_file_number});
|
|
3808
|
-
if (compaction_job_info->table_properties.count(fn) == 0) {
|
|
3809
|
-
std::shared_ptr<const TableProperties> tp;
|
|
3810
|
-
auto s = current->GetTableProperties(read_options, &tp, fmd, &fn);
|
|
3811
|
-
if (s.ok()) {
|
|
3812
|
-
compaction_job_info->table_properties[fn] = tp;
|
|
3813
|
-
}
|
|
3814
|
-
}
|
|
3815
4038
|
}
|
|
3816
4039
|
}
|
|
4040
|
+
|
|
3817
4041
|
for (const auto& newf : c->edit()->GetNewFiles()) {
|
|
3818
4042
|
const FileMetaData& meta = newf.second;
|
|
3819
4043
|
const FileDescriptor& desc = meta.fd;
|
|
@@ -3957,20 +4181,54 @@ void DBImpl::GetSnapshotContext(
|
|
|
3957
4181
|
*snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot);
|
|
3958
4182
|
}
|
|
3959
4183
|
|
|
3960
|
-
Status DBImpl::WaitForCompact(
|
|
4184
|
+
Status DBImpl::WaitForCompact(
|
|
4185
|
+
const WaitForCompactOptions& wait_for_compact_options) {
|
|
3961
4186
|
InstrumentedMutexLock l(&mutex_);
|
|
4187
|
+
if (wait_for_compact_options.flush) {
|
|
4188
|
+
Status s = DBImpl::FlushAllColumnFamilies(FlushOptions(),
|
|
4189
|
+
FlushReason::kManualFlush);
|
|
4190
|
+
if (!s.ok()) {
|
|
4191
|
+
return s;
|
|
4192
|
+
}
|
|
4193
|
+
} else if (wait_for_compact_options.close_db &&
|
|
4194
|
+
has_unpersisted_data_.load(std::memory_order_relaxed) &&
|
|
4195
|
+
!mutable_db_options_.avoid_flush_during_shutdown) {
|
|
4196
|
+
Status s =
|
|
4197
|
+
DBImpl::FlushAllColumnFamilies(FlushOptions(), FlushReason::kShutDown);
|
|
4198
|
+
if (!s.ok()) {
|
|
4199
|
+
return s;
|
|
4200
|
+
}
|
|
4201
|
+
}
|
|
4202
|
+
TEST_SYNC_POINT("DBImpl::WaitForCompact:StartWaiting");
|
|
4203
|
+
const auto deadline = immutable_db_options_.clock->NowMicros() +
|
|
4204
|
+
wait_for_compact_options.timeout.count();
|
|
3962
4205
|
for (;;) {
|
|
3963
4206
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
3964
4207
|
return Status::ShutdownInProgress();
|
|
3965
4208
|
}
|
|
3966
|
-
if (bg_work_paused_ && abort_on_pause) {
|
|
4209
|
+
if (bg_work_paused_ && wait_for_compact_options.abort_on_pause) {
|
|
3967
4210
|
return Status::Aborted();
|
|
3968
4211
|
}
|
|
3969
4212
|
if ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
|
|
3970
4213
|
bg_flush_scheduled_ || unscheduled_compactions_ ||
|
|
3971
|
-
unscheduled_flushes_) &&
|
|
4214
|
+
unscheduled_flushes_ || error_handler_.IsRecoveryInProgress()) &&
|
|
3972
4215
|
(error_handler_.GetBGError().ok())) {
|
|
3973
|
-
|
|
4216
|
+
if (wait_for_compact_options.timeout.count()) {
|
|
4217
|
+
if (bg_cv_.TimedWait(deadline)) {
|
|
4218
|
+
return Status::TimedOut();
|
|
4219
|
+
}
|
|
4220
|
+
} else {
|
|
4221
|
+
bg_cv_.Wait();
|
|
4222
|
+
}
|
|
4223
|
+
} else if (wait_for_compact_options.close_db) {
|
|
4224
|
+
reject_new_background_jobs_ = true;
|
|
4225
|
+
mutex_.Unlock();
|
|
4226
|
+
Status s = Close();
|
|
4227
|
+
mutex_.Lock();
|
|
4228
|
+
if (!s.ok()) {
|
|
4229
|
+
reject_new_background_jobs_ = false;
|
|
4230
|
+
}
|
|
4231
|
+
return s;
|
|
3974
4232
|
} else {
|
|
3975
4233
|
return error_handler_.GetBGError();
|
|
3976
4234
|
}
|