@nxtedition/rocksdb 13.5.7 → 13.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +248 -70
- package/binding.gyp +2 -2
- package/deps/rocksdb/rocksdb/BUCK +12 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -0
- package/deps/rocksdb/rocksdb/Makefile +28 -23
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -1
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +1 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +43 -39
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +0 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +1 -3
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +11 -1
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +13 -5
- package/deps/rocksdb/rocksdb/crash_test.mk +61 -15
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +136 -45
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +34 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +10 -7
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +12 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/builder.cc +22 -8
- package/deps/rocksdb/rocksdb/db/builder.h +5 -4
- package/deps/rocksdb/rocksdb/db/c.cc +556 -15
- package/deps/rocksdb/rocksdb/db/c_test.c +133 -12
- package/deps/rocksdb/rocksdb/db/column_family.cc +114 -50
- package/deps/rocksdb/rocksdb/db/column_family.h +53 -36
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +95 -70
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +71 -51
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -86
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +26 -68
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +0 -122
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +453 -258
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +117 -92
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +38 -38
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +24 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +34 -45
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +32 -31
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +10 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +82 -34
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +267 -179
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +273 -89
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +300 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_state.h +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +28 -23
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +69 -51
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +522 -245
- package/deps/rocksdb/rocksdb/db/convenience.cc +15 -4
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -3
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +196 -17
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +74 -62
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +48 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +682 -250
- package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +11 -16
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +57 -0
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +540 -490
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +347 -188
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +584 -217
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +13 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +5 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +40 -36
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +751 -372
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +35 -32
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +24 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +125 -63
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +2 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +311 -196
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +15 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +42 -29
- package/deps/rocksdb/rocksdb/db/db_iter.h +96 -31
- package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +168 -228
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +454 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +8 -8
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +90 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +60 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +85 -27
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +114 -2
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +51 -3
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +325 -18
- package/deps/rocksdb/rocksdb/db/db_test2.cc +644 -20
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
- package/deps/rocksdb/rocksdb/db/db_test_util.h +9 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +64 -45
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +203 -14
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +259 -30
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +75 -1
- package/deps/rocksdb/rocksdb/db/dbformat.h +70 -6
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +0 -190
- package/deps/rocksdb/rocksdb/db/error_handler.cc +22 -7
- package/deps/rocksdb/rocksdb/db/error_handler.h +16 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +41 -26
- package/deps/rocksdb/rocksdb/db/experimental.cc +4 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +464 -78
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +166 -69
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +54 -25
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +1 -3
- package/deps/rocksdb/rocksdb/db/flush_job.cc +98 -81
- package/deps/rocksdb/rocksdb/db/flush_job.h +4 -9
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +80 -84
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +2 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +12 -19
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +41 -15
- package/deps/rocksdb/rocksdb/db/internal_stats.h +63 -52
- package/deps/rocksdb/rocksdb/db/job_context.h +59 -24
- package/deps/rocksdb/rocksdb/db/listener_test.cc +69 -10
- package/deps/rocksdb/rocksdb/db/log_format.h +11 -2
- package/deps/rocksdb/rocksdb/db/log_reader.cc +147 -34
- package/deps/rocksdb/rocksdb/db/log_reader.h +40 -11
- package/deps/rocksdb/rocksdb/db/log_test.cc +16 -3
- package/deps/rocksdb/rocksdb/db/log_writer.cc +102 -55
- package/deps/rocksdb/rocksdb/db/log_writer.h +21 -2
- package/deps/rocksdb/rocksdb/db/malloc_stats.h +0 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +16 -47
- package/deps/rocksdb/rocksdb/db/memtable.h +76 -12
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +23 -20
- package/deps/rocksdb/rocksdb/db/memtable_list.h +9 -11
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +18 -37
- package/deps/rocksdb/rocksdb/db/merge_context.h +2 -1
- package/deps/rocksdb/rocksdb/db/merge_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +3 -5
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +15 -7
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +6 -3
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +22 -4
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +41 -1
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/repair.cc +29 -34
- package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +14 -15
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +1 -3
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +47 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +1 -3
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder.cc +2 -2
- package/deps/rocksdb/rocksdb/db/version_edit.cc +8 -37
- package/deps/rocksdb/rocksdb/db/version_edit.h +32 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +26 -18
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +282 -197
- package/deps/rocksdb/rocksdb/db/version_set.h +54 -57
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +28 -35
- package/deps/rocksdb/rocksdb/db/version_util.h +2 -3
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +3 -2
- package/deps/rocksdb/rocksdb/db/wal_manager.h +0 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_columns.cc +1 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +22 -8
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +5 -4
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +7 -6
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/write_thread.h +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +13 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +9 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +39 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +65 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +45 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +22 -5
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +28 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -38
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +80 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +51 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +23 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +305 -15
- package/deps/rocksdb/rocksdb/env/env.cc +32 -2
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +0 -2
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +2 -4
- package/deps/rocksdb/rocksdb/env/env_posix.cc +4 -2
- package/deps/rocksdb/rocksdb/env/env_test.cc +0 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -11
- package/deps/rocksdb/rocksdb/env/fs_readonly.h +0 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +0 -2
- package/deps/rocksdb/rocksdb/env/fs_remap.h +0 -2
- package/deps/rocksdb/rocksdb/env/io_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/io_posix.h +3 -2
- package/deps/rocksdb/rocksdb/env/mock_env.cc +0 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +2 -2
- package/deps/rocksdb/rocksdb/file/delete_scheduler.h +0 -2
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +0 -2
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +30 -21
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +16 -0
- package/deps/rocksdb/rocksdb/file/file_util.cc +32 -14
- package/deps/rocksdb/rocksdb/file/file_util.h +22 -5
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +229 -76
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +21 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +10 -7
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +12 -8
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +1 -2
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +0 -2
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +598 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_iterator.h +36 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +70 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +232 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +149 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +132 -34
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +158 -79
- package/deps/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +4 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +1 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +275 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +50 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +10 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +5 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +237 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +230 -39
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_level.h +31 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +41 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +20 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +19 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +124 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +26 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +55 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/debug.h +3 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +1 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +1 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +96 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index_faiss.h +117 -0
- package/deps/rocksdb/rocksdb/{utilities/secondary_index/faiss_ivf_index.h → include/rocksdb/utilities/secondary_index_simple.h} +11 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +26 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +16 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +63 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +28 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +3 -3
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +0 -2
- package/deps/rocksdb/rocksdb/logging/event_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +1 -1
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +0 -1
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +0 -1
- package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +3 -1
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +2 -2
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +2 -4
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +69 -8
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +32 -9
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +58 -45
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +5 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +1 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +3 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +44 -13
- package/deps/rocksdb/rocksdb/options/cf_options.h +21 -7
- package/deps/rocksdb/rocksdb/options/configurable.cc +5 -5
- package/deps/rocksdb/rocksdb/options/configurable_test.h +1 -2
- package/deps/rocksdb/rocksdb/options/customizable.cc +0 -1
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -11
- package/deps/rocksdb/rocksdb/options/db_options.cc +18 -15
- package/deps/rocksdb/rocksdb/options/db_options.h +2 -2
- package/deps/rocksdb/rocksdb/options/options.cc +296 -305
- package/deps/rocksdb/rocksdb/options/options_helper.cc +188 -62
- package/deps/rocksdb/rocksdb/options/options_helper.h +3 -3
- package/deps/rocksdb/rocksdb/options/options_parser.cc +2 -4
- package/deps/rocksdb/rocksdb/options/options_parser.h +0 -1
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +17 -4
- package/deps/rocksdb/rocksdb/options/options_test.cc +101 -76
- package/deps/rocksdb/rocksdb/port/lang.h +2 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +2 -1
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +5 -4
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +3 -2
- package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +99 -1
- package/deps/rocksdb/rocksdb/port/win/xpress_win.h +6 -0
- package/deps/rocksdb/rocksdb/src.mk +17 -11
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1094 -929
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +6 -19
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +76 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +221 -131
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +12 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +23 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -38
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +5 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +10 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +35 -43
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -4
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +4 -5
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +4 -4
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +37 -35
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +11 -7
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +4 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +31 -5
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +0 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +0 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +0 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +483 -0
- package/deps/rocksdb/rocksdb/table/format.cc +62 -44
- package/deps/rocksdb/rocksdb/table/format.h +35 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +3 -13
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +6 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +150 -141
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +8 -0
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +0 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +0 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +0 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +6 -6
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +86 -7
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +88 -2
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +0 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +10 -1
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +3 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +899 -22
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +3 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.h +132 -1
- package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +0 -1
- package/deps/rocksdb/rocksdb/test_util/transaction_test_util.h +0 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +163 -77
- package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +0 -2
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +120 -52
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +0 -2
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +0 -2
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +94 -0
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +0 -1
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +1 -1
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +3 -5
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +15 -8
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +131 -0
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +90 -0
- package/deps/rocksdb/rocksdb/util/autovector.h +1 -1
- package/deps/rocksdb/rocksdb/util/autovector_test.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +0 -2
- package/deps/rocksdb/rocksdb/util/compression.cc +936 -4
- package/deps/rocksdb/rocksdb/util/compression.h +348 -232
- package/deps/rocksdb/rocksdb/util/compression_test.cc +229 -0
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +10 -10
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.c +1 -0
- package/deps/rocksdb/rocksdb/util/data_structure.cc +2 -0
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +1 -3
- package/deps/rocksdb/rocksdb/util/ppc-opcode.h +5 -5
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +108 -0
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +67 -0
- package/deps/rocksdb/rocksdb/util/slice_test.cc +83 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +0 -2
- package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +18 -5
- package/deps/rocksdb/rocksdb/util/udt_util.h +10 -7
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +650 -154
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +438 -144
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +16 -17
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +2 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +7 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +0 -48
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/debug.cc +7 -14
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/env_mirror_test.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/env_timed.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/env_timed_test.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +5 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +10 -9
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +183 -32
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +258 -12
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_helper.h +33 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_iterator.cc +99 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +280 -120
- package/deps/rocksdb/rocksdb/utilities/secondary_index/simple_secondary_index.cc +79 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +52 -16
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h +10 -6
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +55 -0
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +37 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_tracker.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/db.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +1 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +36 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +1 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1118 -37
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +4 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +0 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +0 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +125 -127
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +45 -23
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +54 -22
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +477 -58
- package/deps/rocksdb/rocksdb.gyp +9 -4
- package/index.js +50 -9
- package/package.json +8 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -162,6 +162,23 @@ class Directories {
|
|
|
162
162
|
std::unique_ptr<FSDirectory> wal_dir_;
|
|
163
163
|
};
|
|
164
164
|
|
|
165
|
+
struct DBOpenLogRecordReadReporter : public log::Reader::Reporter {
|
|
166
|
+
Env* env;
|
|
167
|
+
Logger* info_log;
|
|
168
|
+
const char* fname;
|
|
169
|
+
Status* status; // nullptr if immutable_db_options_.paranoid_checks==false
|
|
170
|
+
bool* old_log_record;
|
|
171
|
+
void Corruption(size_t bytes, const Status& s,
|
|
172
|
+
uint64_t log_number = kMaxSequenceNumber) override;
|
|
173
|
+
|
|
174
|
+
void OldLogRecord(size_t bytes) override;
|
|
175
|
+
|
|
176
|
+
uint64_t GetCorruptedLogNumber() const { return corrupted_wal_number_; }
|
|
177
|
+
|
|
178
|
+
private:
|
|
179
|
+
uint64_t corrupted_wal_number_ = kMaxSequenceNumber;
|
|
180
|
+
};
|
|
181
|
+
|
|
165
182
|
// While DB is the public interface of RocksDB, and DBImpl is the actual
|
|
166
183
|
// class implementing it. It's the entrance of the core RocksdB engine.
|
|
167
184
|
// All other DB implementations, e.g. TransactionDB, BlobDB, etc, wrap a
|
|
@@ -239,6 +256,10 @@ class DBImpl : public DB {
|
|
|
239
256
|
Status WriteWithCallback(const WriteOptions& options, WriteBatch* updates,
|
|
240
257
|
UserWriteCallback* user_write_cb) override;
|
|
241
258
|
|
|
259
|
+
Status IngestWriteBatchWithIndex(
|
|
260
|
+
const WriteOptions& options,
|
|
261
|
+
std::shared_ptr<WriteBatchWithIndex> wbwi) override;
|
|
262
|
+
|
|
242
263
|
using DB::Get;
|
|
243
264
|
Status Get(const ReadOptions& _read_options,
|
|
244
265
|
ColumnFamilyHandle* column_family, const Slice& key,
|
|
@@ -362,6 +383,11 @@ class DBImpl : public DB {
|
|
|
362
383
|
const std::vector<ColumnFamilyHandle*>& column_families,
|
|
363
384
|
std::vector<Iterator*>* iterators) override;
|
|
364
385
|
|
|
386
|
+
using DB::NewMultiScan;
|
|
387
|
+
std::unique_ptr<MultiScan> NewMultiScan(
|
|
388
|
+
const ReadOptions& _read_options, ColumnFamilyHandle* column_family,
|
|
389
|
+
const std::vector<ScanOptions>& scan_opts) override;
|
|
390
|
+
|
|
365
391
|
const Snapshot* GetSnapshot() override;
|
|
366
392
|
void ReleaseSnapshot(const Snapshot* snapshot) override;
|
|
367
393
|
|
|
@@ -480,6 +506,9 @@ class DBImpl : public DB {
|
|
|
480
506
|
Status GetFullHistoryTsLow(ColumnFamilyHandle* column_family,
|
|
481
507
|
std::string* ts_low) override;
|
|
482
508
|
|
|
509
|
+
Status GetNewestUserDefinedTimestamp(ColumnFamilyHandle* column_family,
|
|
510
|
+
std::string* newest_timestamp) override;
|
|
511
|
+
|
|
483
512
|
Status GetDbIdentity(std::string& identity) const override;
|
|
484
513
|
|
|
485
514
|
virtual Status GetDbIdentityFromIdentityFile(const IOOptions& opts,
|
|
@@ -513,20 +542,19 @@ class DBImpl : public DB {
|
|
|
513
542
|
|
|
514
543
|
// Get the known flushed sizes of WALs that might still be written to
|
|
515
544
|
// or have pending sync.
|
|
516
|
-
// NOTE: unlike
|
|
545
|
+
// NOTE: unlike alive_wal_files_, this function includes WALs that might
|
|
517
546
|
// be obsolete (but not obsolete to a pending Checkpoint) and not yet fully
|
|
518
547
|
// synced.
|
|
519
548
|
Status GetOpenWalSizes(std::map<uint64_t, uint64_t>& number_to_size);
|
|
520
|
-
Status GetCurrentWalFile(std::unique_ptr<WalFile>*
|
|
549
|
+
Status GetCurrentWalFile(std::unique_ptr<WalFile>* current_wal_file) override;
|
|
521
550
|
Status GetCreationTimeOfOldestFile(uint64_t* creation_time) override;
|
|
522
551
|
|
|
523
552
|
Status GetUpdatesSince(
|
|
524
553
|
SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
|
|
525
554
|
const TransactionLogIterator::ReadOptions& read_options =
|
|
526
555
|
TransactionLogIterator::ReadOptions()) override;
|
|
527
|
-
Status DeleteFile(std::string name) override;
|
|
528
556
|
Status DeleteFilesInRanges(ColumnFamilyHandle* column_family,
|
|
529
|
-
const
|
|
557
|
+
const RangeOpt* ranges, size_t n,
|
|
530
558
|
bool include_end = true);
|
|
531
559
|
|
|
532
560
|
void GetLiveFilesMetaData(std::vector<LiveFileMetaData>* metadata) override;
|
|
@@ -635,6 +663,11 @@ class DBImpl : public DB {
|
|
|
635
663
|
ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
|
|
636
664
|
TablePropertiesCollection* props) override;
|
|
637
665
|
|
|
666
|
+
Status GetPropertiesOfTablesByLevel(
|
|
667
|
+
ColumnFamilyHandle* column_family,
|
|
668
|
+
std::vector<std::unique_ptr<TablePropertiesCollection>>* props_by_level)
|
|
669
|
+
override;
|
|
670
|
+
|
|
638
671
|
// ---- End of implementations of the DB interface ----
|
|
639
672
|
SystemClock* GetSystemClock() const;
|
|
640
673
|
|
|
@@ -939,13 +972,6 @@ class DBImpl : public DB {
|
|
|
939
972
|
return num_running_flushes_;
|
|
940
973
|
}
|
|
941
974
|
|
|
942
|
-
// Returns the number of currently running compactions.
|
|
943
|
-
// REQUIREMENT: mutex_ must be held when calling this function.
|
|
944
|
-
int num_running_compactions() {
|
|
945
|
-
mutex_.AssertHeld();
|
|
946
|
-
return num_running_compactions_;
|
|
947
|
-
}
|
|
948
|
-
|
|
949
975
|
const WriteController& write_controller() { return write_controller_; }
|
|
950
976
|
|
|
951
977
|
// hollow transactions shell used for recovery.
|
|
@@ -1054,7 +1080,7 @@ class DBImpl : public DB {
|
|
|
1054
1080
|
|
|
1055
1081
|
void AddToLogsToFreeQueue(log::Writer* log_writer) {
|
|
1056
1082
|
mutex_.AssertHeld();
|
|
1057
|
-
|
|
1083
|
+
wals_to_free_queue_.push_back(log_writer);
|
|
1058
1084
|
}
|
|
1059
1085
|
|
|
1060
1086
|
void AddSuperVersionsToFreeQueue(SuperVersion* sv) {
|
|
@@ -1064,10 +1090,7 @@ class DBImpl : public DB {
|
|
|
1064
1090
|
void SetSnapshotChecker(SnapshotChecker* snapshot_checker);
|
|
1065
1091
|
|
|
1066
1092
|
// Fill JobContext with snapshot information needed by flush and compaction.
|
|
1067
|
-
void
|
|
1068
|
-
std::vector<SequenceNumber>* snapshot_seqs,
|
|
1069
|
-
SequenceNumber* earliest_write_conflict_snapshot,
|
|
1070
|
-
SnapshotChecker** snapshot_checker);
|
|
1093
|
+
void InitSnapshotContext(JobContext* job_context);
|
|
1071
1094
|
|
|
1072
1095
|
// Not thread-safe.
|
|
1073
1096
|
void SetRecoverableStatePreReleaseCallback(PreReleaseCallback* callback);
|
|
@@ -1081,9 +1104,10 @@ class DBImpl : public DB {
|
|
|
1081
1104
|
// This is to be used only by internal rocksdb classes.
|
|
1082
1105
|
static Status Open(const DBOptions& db_options, const std::string& name,
|
|
1083
1106
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|
|
1084
|
-
std::vector<ColumnFamilyHandle*>* handles,
|
|
1085
|
-
|
|
1086
|
-
const bool
|
|
1107
|
+
std::vector<ColumnFamilyHandle*>* handles,
|
|
1108
|
+
std::unique_ptr<DB>* dbptr, const bool seq_per_batch,
|
|
1109
|
+
const bool batch_per_txn, const bool is_retry,
|
|
1110
|
+
bool* can_retry);
|
|
1087
1111
|
|
|
1088
1112
|
static IOStatus CreateAndNewDirectory(
|
|
1089
1113
|
FileSystem* fs, const std::string& dirname,
|
|
@@ -1118,7 +1142,7 @@ class DBImpl : public DB {
|
|
|
1118
1142
|
bool TEST_UnableToReleaseOldestLog() { return unable_to_release_oldest_log_; }
|
|
1119
1143
|
|
|
1120
1144
|
bool TEST_IsLogGettingFlushed() {
|
|
1121
|
-
return
|
|
1145
|
+
return alive_wal_files_.begin()->getting_flushed;
|
|
1122
1146
|
}
|
|
1123
1147
|
|
|
1124
1148
|
Status TEST_SwitchMemtable(ColumnFamilyData* cfd = nullptr);
|
|
@@ -1198,7 +1222,9 @@ class DBImpl : public DB {
|
|
|
1198
1222
|
|
|
1199
1223
|
uint64_t TEST_LogfileNumber();
|
|
1200
1224
|
|
|
1201
|
-
uint64_t
|
|
1225
|
+
uint64_t TEST_wals_total_size() const {
|
|
1226
|
+
return wals_total_size_.LoadRelaxed();
|
|
1227
|
+
}
|
|
1202
1228
|
|
|
1203
1229
|
void TEST_GetAllBlockCaches(std::unordered_set<const Cache*>* cache_set);
|
|
1204
1230
|
|
|
@@ -1257,27 +1283,24 @@ class DBImpl : public DB {
|
|
|
1257
1283
|
// flush LOG out of application buffer
|
|
1258
1284
|
void FlushInfoLog();
|
|
1259
1285
|
|
|
1260
|
-
//
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
//
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
//
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
//
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
//
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
//
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
// `SuperVersionContext` for clean up after release mutex.
|
|
1279
|
-
void InstallSeqnoToTimeMappingInSV(
|
|
1280
|
-
std::vector<SuperVersionContext>* sv_contexts);
|
|
1286
|
+
// For the background timer job
|
|
1287
|
+
void RecordSeqnoToTimeMapping();
|
|
1288
|
+
|
|
1289
|
+
// Compactions rely on an event triggers like flush/compaction/SetOptions.
|
|
1290
|
+
// We need to trigger periodic compactions even when there is no such trigger.
|
|
1291
|
+
// This function checks and schedules available compactions and will run
|
|
1292
|
+
// periodically.
|
|
1293
|
+
void TriggerPeriodicCompaction();
|
|
1294
|
+
|
|
1295
|
+
// REQUIRES: DB mutex held
|
|
1296
|
+
std::pair<SequenceNumber, uint64_t> GetSeqnoToTimeSample() const;
|
|
1297
|
+
|
|
1298
|
+
// REQUIRES: DB mutex held or during open
|
|
1299
|
+
void EnsureSeqnoToTimeMapping(const MinAndMaxPreserveSeconds& preserve_secs);
|
|
1300
|
+
|
|
1301
|
+
// Only called during open
|
|
1302
|
+
void PrepopulateSeqnoToTimeMapping(
|
|
1303
|
+
const MinAndMaxPreserveSeconds& preserve_secs);
|
|
1281
1304
|
|
|
1282
1305
|
// Interface to block and signal the DB in case of stalling writes by
|
|
1283
1306
|
// WriteBufferManager. Each DBImpl object contains ptr to WBMStallInterface.
|
|
@@ -1365,16 +1388,16 @@ class DBImpl : public DB {
|
|
|
1365
1388
|
|
|
1366
1389
|
// State below is protected by mutex_
|
|
1367
1390
|
// With two_write_queues enabled, some of the variables that accessed during
|
|
1368
|
-
// WriteToWAL need different synchronization:
|
|
1369
|
-
// logs_,
|
|
1391
|
+
// WriteToWAL need different synchronization: wal_empty_, alive_wal_files_,
|
|
1392
|
+
// logs_, cur_wal_number_. Refer to the definition of each variable below for
|
|
1370
1393
|
// more description.
|
|
1371
1394
|
//
|
|
1372
1395
|
// `mutex_` can be a hot lock in some workloads, so it deserves dedicated
|
|
1373
1396
|
// cachelines.
|
|
1374
1397
|
mutable CacheAlignedInstrumentedMutex mutex_;
|
|
1375
1398
|
|
|
1376
|
-
ColumnFamilyHandleImpl* default_cf_handle_;
|
|
1377
|
-
InternalStats* default_cf_internal_stats_;
|
|
1399
|
+
ColumnFamilyHandleImpl* default_cf_handle_ = nullptr;
|
|
1400
|
+
InternalStats* default_cf_internal_stats_ = nullptr;
|
|
1378
1401
|
|
|
1379
1402
|
// table_cache_ provides its own synchronization
|
|
1380
1403
|
std::shared_ptr<Cache> table_cache_;
|
|
@@ -1386,7 +1409,7 @@ class DBImpl : public DB {
|
|
|
1386
1409
|
|
|
1387
1410
|
// only used for dynamically adjusting max_total_wal_size. it is a sum of
|
|
1388
1411
|
// [write_buffer_size * max_write_buffer_number] over all column families
|
|
1389
|
-
std::atomic<uint64_t> max_total_in_memory_state_;
|
|
1412
|
+
std::atomic<uint64_t> max_total_in_memory_state_ = 0;
|
|
1390
1413
|
|
|
1391
1414
|
// The options to access storage files
|
|
1392
1415
|
const FileOptions file_options_;
|
|
@@ -1413,14 +1436,14 @@ class DBImpl : public DB {
|
|
|
1413
1436
|
|
|
1414
1437
|
// Each flush or compaction gets its own job id. this counter makes sure
|
|
1415
1438
|
// they're unique
|
|
1416
|
-
std::atomic<int> next_job_id_;
|
|
1439
|
+
std::atomic<int> next_job_id_ = 1;
|
|
1417
1440
|
|
|
1418
|
-
std::atomic<bool> shutting_down_;
|
|
1441
|
+
std::atomic<bool> shutting_down_ = false;
|
|
1419
1442
|
|
|
1420
1443
|
// No new background jobs can be queued if true. This is used to prevent new
|
|
1421
1444
|
// background jobs from being queued after WaitForCompact() completes waiting
|
|
1422
1445
|
// all background jobs then attempts to close when close_db_ option is true.
|
|
1423
|
-
bool reject_new_background_jobs_;
|
|
1446
|
+
bool reject_new_background_jobs_ = false;
|
|
1424
1447
|
|
|
1425
1448
|
// RecoveryContext struct stores the context about version edits along
|
|
1426
1449
|
// with corresponding column_family_data and column_family_options.
|
|
@@ -1440,7 +1463,6 @@ class DBImpl : public DB {
|
|
|
1440
1463
|
uint32_t size = static_cast<uint32_t>(map_.size());
|
|
1441
1464
|
map_.emplace(cfd->GetID(), size);
|
|
1442
1465
|
cfds_.emplace_back(cfd);
|
|
1443
|
-
mutable_cf_opts_.emplace_back(cfd->GetLatestMutableCFOptions());
|
|
1444
1466
|
edit_lists_.emplace_back(autovector<VersionEdit*>());
|
|
1445
1467
|
}
|
|
1446
1468
|
uint32_t i = map_[cfd->GetID()];
|
|
@@ -1449,7 +1471,6 @@ class DBImpl : public DB {
|
|
|
1449
1471
|
|
|
1450
1472
|
std::unordered_map<uint32_t, uint32_t> map_; // cf_id to index;
|
|
1451
1473
|
autovector<ColumnFamilyData*> cfds_;
|
|
1452
|
-
autovector<const MutableCFOptions*> mutable_cf_opts_;
|
|
1453
1474
|
autovector<autovector<VersionEdit*>> edit_lists_;
|
|
1454
1475
|
// All existing data files (SST files and Blob files) found during DB::Open.
|
|
1455
1476
|
std::vector<std::string> existing_data_files_;
|
|
@@ -1520,11 +1541,11 @@ class DBImpl : public DB {
|
|
|
1520
1541
|
// ingests `wbwi` is done.
|
|
1521
1542
|
// @param memtable_updated Whether the same write that ingests wbwi has
|
|
1522
1543
|
// updated memtable. This is useful for determining whether to set bg
|
|
1523
|
-
// error when
|
|
1524
|
-
Status
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1544
|
+
// error when IngestWBWIAsMemtable fails.
|
|
1545
|
+
Status IngestWBWIAsMemtable(std::shared_ptr<WriteBatchWithIndex> wbwi,
|
|
1546
|
+
const WBWIMemTable::SeqnoRange& assigned_seqno,
|
|
1547
|
+
uint64_t min_prep_log, SequenceNumber last_seqno,
|
|
1548
|
+
bool memtable_updated, bool ignore_missing_cf);
|
|
1528
1549
|
|
|
1529
1550
|
// If disable_memtable is set the application logic must guarantee that the
|
|
1530
1551
|
// batch will still be skipped from memtable during the recovery. An excption
|
|
@@ -1554,18 +1575,17 @@ class DBImpl : public DB {
|
|
|
1554
1575
|
Status WriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1555
1576
|
WriteCallback* callback = nullptr,
|
|
1556
1577
|
UserWriteCallback* user_write_cb = nullptr,
|
|
1557
|
-
uint64_t*
|
|
1578
|
+
uint64_t* wal_used = nullptr, uint64_t log_ref = 0,
|
|
1558
1579
|
bool disable_memtable = false, uint64_t* seq_used = nullptr,
|
|
1559
1580
|
size_t batch_cnt = 0,
|
|
1560
1581
|
PreReleaseCallback* pre_release_callback = nullptr,
|
|
1561
1582
|
PostMemTableCallback* post_memtable_callback = nullptr,
|
|
1562
|
-
std::shared_ptr<WriteBatchWithIndex> wbwi = nullptr
|
|
1563
|
-
uint64_t min_prep_log = 0);
|
|
1583
|
+
std::shared_ptr<WriteBatchWithIndex> wbwi = nullptr);
|
|
1564
1584
|
|
|
1565
1585
|
Status PipelinedWriteImpl(const WriteOptions& options, WriteBatch* updates,
|
|
1566
1586
|
WriteCallback* callback = nullptr,
|
|
1567
1587
|
UserWriteCallback* user_write_cb = nullptr,
|
|
1568
|
-
uint64_t*
|
|
1588
|
+
uint64_t* wal_used = nullptr, uint64_t log_ref = 0,
|
|
1569
1589
|
bool disable_memtable = false,
|
|
1570
1590
|
uint64_t* seq_used = nullptr);
|
|
1571
1591
|
|
|
@@ -1592,7 +1612,7 @@ class DBImpl : public DB {
|
|
|
1592
1612
|
Status WriteImplWALOnly(
|
|
1593
1613
|
WriteThread* write_thread, const WriteOptions& options,
|
|
1594
1614
|
WriteBatch* updates, WriteCallback* callback,
|
|
1595
|
-
UserWriteCallback* user_write_cb, uint64_t*
|
|
1615
|
+
UserWriteCallback* user_write_cb, uint64_t* wal_used,
|
|
1596
1616
|
const uint64_t log_ref, uint64_t* seq_used, const size_t sub_batch_cnt,
|
|
1597
1617
|
PreReleaseCallback* pre_release_callback, const AssignOrder assign_order,
|
|
1598
1618
|
const PublishLastSeq publish_last_seq, const bool disable_memtable);
|
|
@@ -1753,9 +1773,9 @@ class DBImpl : public DB {
|
|
|
1753
1773
|
}
|
|
1754
1774
|
};
|
|
1755
1775
|
|
|
1756
|
-
struct
|
|
1757
|
-
explicit
|
|
1758
|
-
|
|
1776
|
+
struct WalFileNumberSize {
|
|
1777
|
+
explicit WalFileNumberSize(uint64_t _number) : number(_number) {}
|
|
1778
|
+
WalFileNumberSize() {}
|
|
1759
1779
|
void AddSize(uint64_t new_size) { size += new_size; }
|
|
1760
1780
|
uint64_t number;
|
|
1761
1781
|
uint64_t size = 0;
|
|
@@ -1777,6 +1797,13 @@ class DBImpl : public DB {
|
|
|
1777
1797
|
if (writer->file()) {
|
|
1778
1798
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
1779
1799
|
s = writer->WriteBuffer(WriteOptions());
|
|
1800
|
+
if (attempt_truncate_size < SIZE_MAX &&
|
|
1801
|
+
attempt_truncate_size < writer->file()->GetFileSize()) {
|
|
1802
|
+
Status s2 = writer->file()->writable_file()->Truncate(
|
|
1803
|
+
attempt_truncate_size, IOOptions{}, nullptr);
|
|
1804
|
+
// This is just a best effort attempt
|
|
1805
|
+
s2.PermitUncheckedError();
|
|
1806
|
+
}
|
|
1780
1807
|
}
|
|
1781
1808
|
delete writer;
|
|
1782
1809
|
writer = nullptr;
|
|
@@ -1809,6 +1836,11 @@ class DBImpl : public DB {
|
|
|
1809
1836
|
getting_synced = false;
|
|
1810
1837
|
}
|
|
1811
1838
|
|
|
1839
|
+
void SetAttemptTruncateSize(uint64_t size) {
|
|
1840
|
+
assert(attempt_truncate_size == SIZE_MAX);
|
|
1841
|
+
attempt_truncate_size = size;
|
|
1842
|
+
}
|
|
1843
|
+
|
|
1812
1844
|
uint64_t number;
|
|
1813
1845
|
// Visual Studio doesn't support deque's member to be noncopyable because
|
|
1814
1846
|
// of a std::unique_ptr as a member.
|
|
@@ -1821,15 +1853,20 @@ class DBImpl : public DB {
|
|
|
1821
1853
|
// to be persisted even if appends happen during sync so it can be used for
|
|
1822
1854
|
// tracking the synced size in MANIFEST.
|
|
1823
1855
|
uint64_t pre_sync_size = 0;
|
|
1856
|
+
// When < SIZE_MAX, attempt to truncate the WAL to this size on close,
|
|
1857
|
+
// because a bad entry was written to it beyond that point and it likely
|
|
1858
|
+
// won't be recoverable with the bad entry.
|
|
1859
|
+
uint64_t attempt_truncate_size = SIZE_MAX;
|
|
1824
1860
|
};
|
|
1825
1861
|
|
|
1826
|
-
struct
|
|
1827
|
-
explicit
|
|
1828
|
-
:
|
|
1829
|
-
bool
|
|
1830
|
-
bool
|
|
1862
|
+
struct WalContext {
|
|
1863
|
+
explicit WalContext(bool need_sync = false)
|
|
1864
|
+
: need_wal_sync(need_sync), need_wal_dir_sync(need_sync) {}
|
|
1865
|
+
bool need_wal_sync = false;
|
|
1866
|
+
bool need_wal_dir_sync = false;
|
|
1831
1867
|
log::Writer* writer = nullptr;
|
|
1832
|
-
|
|
1868
|
+
WalFileNumberSize* wal_file_number_size = nullptr;
|
|
1869
|
+
uint64_t prev_size = SIZE_MAX;
|
|
1833
1870
|
};
|
|
1834
1871
|
|
|
1835
1872
|
// PurgeFileInfo is a structure to hold information of files to be deleted in
|
|
@@ -1910,8 +1947,8 @@ class DBImpl : public DB {
|
|
|
1910
1947
|
const InternalKey* begin = nullptr; // nullptr means beginning of key range
|
|
1911
1948
|
const InternalKey* end = nullptr; // nullptr means end of key range
|
|
1912
1949
|
InternalKey* manual_end = nullptr; // how far we are compacting
|
|
1913
|
-
InternalKey tmp_storage;
|
|
1914
|
-
InternalKey tmp_storage1;
|
|
1950
|
+
InternalKey tmp_storage; // Used to keep track of compaction progress
|
|
1951
|
+
InternalKey tmp_storage1; // Used to keep track of compaction progress
|
|
1915
1952
|
|
|
1916
1953
|
// When the user provides a canceled pointer in CompactRangeOptions, the
|
|
1917
1954
|
// above varaibe is the reference of the user-provided
|
|
@@ -1921,12 +1958,19 @@ class DBImpl : public DB {
|
|
|
1921
1958
|
};
|
|
1922
1959
|
struct PrepickedCompaction {
|
|
1923
1960
|
// background compaction takes ownership of `compaction`.
|
|
1961
|
+
// TODO(hx235): consider using std::shared_ptr for easier ownership
|
|
1962
|
+
// management
|
|
1924
1963
|
Compaction* compaction;
|
|
1925
1964
|
// caller retains ownership of `manual_compaction_state` as it is reused
|
|
1926
1965
|
// across background compactions.
|
|
1927
1966
|
ManualCompactionState* manual_compaction_state; // nullptr if non-manual
|
|
1928
1967
|
// task limiter token is requested during compaction picking.
|
|
1929
1968
|
std::unique_ptr<TaskLimiterToken> task_token;
|
|
1969
|
+
// If true, `compaction` is picked temporarily to express compaction intent
|
|
1970
|
+
// and will be released before re-picking a real compaction based on the
|
|
1971
|
+
// updated LSM shape when thread associated with `compaction` is ready to
|
|
1972
|
+
// run
|
|
1973
|
+
bool need_repick;
|
|
1930
1974
|
};
|
|
1931
1975
|
|
|
1932
1976
|
struct CompactionArg {
|
|
@@ -1971,7 +2015,7 @@ class DBImpl : public DB {
|
|
|
1971
2015
|
|
|
1972
2016
|
// Follow-up work to user creating a column family or (families)
|
|
1973
2017
|
Status WrapUpCreateColumnFamilies(
|
|
1974
|
-
const
|
|
2018
|
+
const WriteOptions& write_options,
|
|
1975
2019
|
const std::vector<const ColumnFamilyOptions*>& cf_options);
|
|
1976
2020
|
|
|
1977
2021
|
Status DropColumnFamilyImpl(ColumnFamilyHandle* column_family);
|
|
@@ -2017,14 +2061,13 @@ class DBImpl : public DB {
|
|
|
2017
2061
|
// Flush the in-memory write buffer to storage. Switches to a new
|
|
2018
2062
|
// log-file/memtable and writes a new descriptor iff successful. Then
|
|
2019
2063
|
// installs a new super version for the column family.
|
|
2020
|
-
Status FlushMemTableToOutputFile(
|
|
2021
|
-
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
Env::Priority thread_pri);
|
|
2064
|
+
Status FlushMemTableToOutputFile(ColumnFamilyData* cfd,
|
|
2065
|
+
const MutableCFOptions& mutable_cf_options,
|
|
2066
|
+
bool* madeProgress, JobContext* job_context,
|
|
2067
|
+
FlushReason flush_reason,
|
|
2068
|
+
SuperVersionContext* superversion_context,
|
|
2069
|
+
LogBuffer* log_buffer,
|
|
2070
|
+
Env::Priority thread_pri);
|
|
2028
2071
|
|
|
2029
2072
|
// Flush the memtables of (multiple) column families to multiple files on
|
|
2030
2073
|
// persistent storage.
|
|
@@ -2037,12 +2080,105 @@ class DBImpl : public DB {
|
|
|
2037
2080
|
JobContext* job_context, LogBuffer* log_buffer, Env::Priority thread_pri);
|
|
2038
2081
|
|
|
2039
2082
|
// REQUIRES: log_numbers are sorted in ascending order
|
|
2040
|
-
//
|
|
2083
|
+
// corrupted_wal_found is set to true if we recover from a corrupted log file.
|
|
2041
2084
|
Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
|
|
2042
2085
|
SequenceNumber* next_sequence, bool read_only,
|
|
2043
|
-
bool is_retry, bool*
|
|
2086
|
+
bool is_retry, bool* corrupted_wal_found,
|
|
2044
2087
|
RecoveryContext* recovery_ctx);
|
|
2045
2088
|
|
|
2089
|
+
void SetupLogFilesRecovery(
|
|
2090
|
+
const std::vector<uint64_t>& wal_numbers,
|
|
2091
|
+
std::unordered_map<int, VersionEdit>* version_edits, int* job_id,
|
|
2092
|
+
uint64_t* min_wal_number);
|
|
2093
|
+
|
|
2094
|
+
Status ProcessLogFiles(const std::vector<uint64_t>& wal_numbers,
|
|
2095
|
+
bool read_only, bool is_retry, uint64_t min_wal_number,
|
|
2096
|
+
int job_id, SequenceNumber* next_sequence,
|
|
2097
|
+
std::unordered_map<int, VersionEdit>* version_edits,
|
|
2098
|
+
bool* corrupted_wal_found,
|
|
2099
|
+
RecoveryContext* recovery_ctx);
|
|
2100
|
+
|
|
2101
|
+
Status ProcessLogFile(
|
|
2102
|
+
uint64_t wal_number, uint64_t min_wal_number, bool is_retry,
|
|
2103
|
+
bool read_only, int job_id, SequenceNumber* next_sequence,
|
|
2104
|
+
bool* stop_replay_for_corruption, bool* stop_replay_by_wal_filter,
|
|
2105
|
+
uint64_t* corrupted_wal_number, bool* corrupted_wal_found,
|
|
2106
|
+
std::unordered_map<int, VersionEdit>* version_edits, bool* flushed,
|
|
2107
|
+
PredecessorWALInfo& predecessor_wal_info);
|
|
2108
|
+
|
|
2109
|
+
void SetupLogFileProcessing(uint64_t wal_number);
|
|
2110
|
+
|
|
2111
|
+
Status InitializeLogReader(uint64_t wal_number, bool is_retry,
|
|
2112
|
+
std::string& fname,
|
|
2113
|
+
|
|
2114
|
+
bool stop_replay_for_corruption,
|
|
2115
|
+
uint64_t min_wal_number,
|
|
2116
|
+
const PredecessorWALInfo& predecessor_wal_info,
|
|
2117
|
+
bool* const old_log_record,
|
|
2118
|
+
Status* const reporter_status,
|
|
2119
|
+
DBOpenLogRecordReadReporter* reporter,
|
|
2120
|
+
std::unique_ptr<log::Reader>& reader);
|
|
2121
|
+
Status ProcessLogRecord(
|
|
2122
|
+
Slice record, const std::unique_ptr<log::Reader>& reader,
|
|
2123
|
+
const UnorderedMap<uint32_t, size_t>& running_ts_sz, uint64_t wal_number,
|
|
2124
|
+
const std::string& fname, bool read_only, int job_id,
|
|
2125
|
+
const std::function<void()>& logFileDropped,
|
|
2126
|
+
DBOpenLogRecordReadReporter* reporter, uint64_t* record_checksum,
|
|
2127
|
+
SequenceNumber* last_seqno_observed, SequenceNumber* next_sequence,
|
|
2128
|
+
bool* stop_replay_for_corruption, Status* status,
|
|
2129
|
+
bool* stop_replay_by_wal_filter,
|
|
2130
|
+
std::unordered_map<int, VersionEdit>* version_edits, bool* flushed);
|
|
2131
|
+
|
|
2132
|
+
Status InitializeWriteBatchForLogRecord(
|
|
2133
|
+
Slice record, const std::unique_ptr<log::Reader>& reader,
|
|
2134
|
+
const UnorderedMap<uint32_t, size_t>& running_ts_sz, WriteBatch* batch,
|
|
2135
|
+
std::unique_ptr<WriteBatch>& new_batch, WriteBatch*& batch_to_use,
|
|
2136
|
+
uint64_t* record_checksum);
|
|
2137
|
+
|
|
2138
|
+
void MaybeReviseStopReplayForCorruption(
|
|
2139
|
+
SequenceNumber sequence, SequenceNumber const* const next_sequence,
|
|
2140
|
+
bool* stop_replay_for_corruption);
|
|
2141
|
+
|
|
2142
|
+
Status InsertLogRecordToMemtable(WriteBatch* batch_to_use,
|
|
2143
|
+
uint64_t wal_number,
|
|
2144
|
+
SequenceNumber* next_sequence,
|
|
2145
|
+
bool* has_valid_writes);
|
|
2146
|
+
|
|
2147
|
+
Status MaybeWriteLevel0TableForRecovery(
|
|
2148
|
+
bool has_valid_writes, bool read_only, uint64_t wal_number, int job_id,
|
|
2149
|
+
SequenceNumber const* const next_sequence,
|
|
2150
|
+
std::unordered_map<int, VersionEdit>* version_edits, bool* flushed);
|
|
2151
|
+
|
|
2152
|
+
Status HandleNonOkStatusOrOldLogRecord(
|
|
2153
|
+
uint64_t wal_number, SequenceNumber const* const next_sequence,
|
|
2154
|
+
Status status, const DBOpenLogRecordReadReporter& reporter,
|
|
2155
|
+
bool* old_log_record, bool* stop_replay_for_corruption,
|
|
2156
|
+
uint64_t* corrupted_wal_number, bool* corrupted_wal_found);
|
|
2157
|
+
|
|
2158
|
+
Status UpdatePredecessorWALInfo(uint64_t wal_number,
|
|
2159
|
+
const SequenceNumber last_seqno_observed,
|
|
2160
|
+
const std::string& fname,
|
|
2161
|
+
PredecessorWALInfo& predecessor_wal_info);
|
|
2162
|
+
|
|
2163
|
+
void FinishLogFileProcessing(const Status& status,
|
|
2164
|
+
const SequenceNumber* next_sequence);
|
|
2165
|
+
|
|
2166
|
+
// Return `Status::Corruption()` when `stop_replay_for_corruption == true` and
|
|
2167
|
+
// exits inconsistency between SST and WAL data
|
|
2168
|
+
Status MaybeHandleStopReplayForCorruptionForInconsistency(
|
|
2169
|
+
bool stop_replay_for_corruption, uint64_t corrupted_wal_number);
|
|
2170
|
+
|
|
2171
|
+
Status MaybeFlushFinalMemtableOrRestoreActiveLogFiles(
|
|
2172
|
+
const std::vector<uint64_t>& wal_numbers, bool read_only, int job_id,
|
|
2173
|
+
bool flushed, std::unordered_map<int, VersionEdit>* version_edits,
|
|
2174
|
+
RecoveryContext* recovery_ctx);
|
|
2175
|
+
|
|
2176
|
+
// Check that DB sequence number is not set back during recovery between
|
|
2177
|
+
// replaying of WAL files and between replaying of WriteBatches.
|
|
2178
|
+
Status CheckSeqnoNotSetBackDuringRecovery(SequenceNumber prev_next_seqno,
|
|
2179
|
+
SequenceNumber current_next_seqno);
|
|
2180
|
+
|
|
2181
|
+
void FinishLogFilesRecovery(int job_id, const Status& status);
|
|
2046
2182
|
// The following two methods are used to flush a memtable to
|
|
2047
2183
|
// storage. The first one is used at database RecoveryTime (when the
|
|
2048
2184
|
// database is opened) and is heavyweight because it holds the mutex
|
|
@@ -2055,12 +2191,12 @@ class DBImpl : public DB {
|
|
|
2055
2191
|
// log file to its actual size, thereby freeing preallocated space.
|
|
2056
2192
|
// Return success even if truncate fails
|
|
2057
2193
|
Status GetLogSizeAndMaybeTruncate(uint64_t wal_number, bool truncate,
|
|
2058
|
-
|
|
2194
|
+
WalFileNumberSize* log);
|
|
2059
2195
|
|
|
2060
|
-
// Restore
|
|
2196
|
+
// Restore alive_wal_files_ and wals_total_size_ after recovery.
|
|
2061
2197
|
// It needs to run only when there's no flush during recovery
|
|
2062
2198
|
// (e.g. avoid_flush_during_recovery=true). May also trigger flush
|
|
2063
|
-
// in case
|
|
2199
|
+
// in case wals_total_size > max_total_wal_size.
|
|
2064
2200
|
Status RestoreAliveLogFiles(const std::vector<uint64_t>& log_numbers);
|
|
2065
2201
|
|
|
2066
2202
|
// num_bytes: for slowdown case, delay time is calculated based on
|
|
@@ -2209,7 +2345,7 @@ class DBImpl : public DB {
|
|
|
2209
2345
|
|
|
2210
2346
|
// REQUIRES: mutex locked
|
|
2211
2347
|
Status PreprocessWrite(const WriteOptions& write_options,
|
|
2212
|
-
|
|
2348
|
+
WalContext* log_context, WriteContext* write_context);
|
|
2213
2349
|
|
|
2214
2350
|
// Merge write batches in the write group into merged_batch.
|
|
2215
2351
|
// Returns OK if merge is successful.
|
|
@@ -2220,19 +2356,21 @@ class DBImpl : public DB {
|
|
|
2220
2356
|
|
|
2221
2357
|
IOStatus WriteToWAL(const WriteBatch& merged_batch,
|
|
2222
2358
|
const WriteOptions& write_options,
|
|
2223
|
-
log::Writer* log_writer, uint64_t*
|
|
2359
|
+
log::Writer* log_writer, uint64_t* wal_used,
|
|
2224
2360
|
uint64_t* log_size,
|
|
2225
|
-
|
|
2361
|
+
WalFileNumberSize& wal_file_number_size,
|
|
2362
|
+
SequenceNumber sequence);
|
|
2226
2363
|
|
|
2227
|
-
IOStatus
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
|
|
2364
|
+
IOStatus WriteGroupToWAL(const WriteThread::WriteGroup& write_group,
|
|
2365
|
+
log::Writer* log_writer, uint64_t* wal_used,
|
|
2366
|
+
bool need_wal_sync, bool need_wal_dir_sync,
|
|
2367
|
+
SequenceNumber sequence,
|
|
2368
|
+
WalFileNumberSize& wal_file_number_size);
|
|
2232
2369
|
|
|
2233
|
-
IOStatus
|
|
2234
|
-
|
|
2235
|
-
|
|
2370
|
+
IOStatus ConcurrentWriteGroupToWAL(const WriteThread::WriteGroup& write_group,
|
|
2371
|
+
uint64_t* wal_used,
|
|
2372
|
+
SequenceNumber* last_sequence,
|
|
2373
|
+
size_t seq_inc);
|
|
2236
2374
|
|
|
2237
2375
|
// Used by WriteImpl to update bg_error_ if paranoid check is enabled.
|
|
2238
2376
|
// Caller must hold mutex_.
|
|
@@ -2246,7 +2384,7 @@ class DBImpl : public DB {
|
|
|
2246
2384
|
void WALIOStatusCheck(const IOStatus& status);
|
|
2247
2385
|
|
|
2248
2386
|
// Used by WriteImpl to update bg_error_ in case of memtable insert error.
|
|
2249
|
-
void
|
|
2387
|
+
void HandleMemTableInsertFailure(const Status& nonok_memtable_insert_status);
|
|
2250
2388
|
|
|
2251
2389
|
Status CompactFilesImpl(const CompactionOptions& compact_options,
|
|
2252
2390
|
ColumnFamilyData* cfd, Version* version,
|
|
@@ -2331,6 +2469,8 @@ class DBImpl : public DB {
|
|
|
2331
2469
|
bool* flush_rescheduled_to_retain_udt,
|
|
2332
2470
|
Env::Priority thread_pri);
|
|
2333
2471
|
|
|
2472
|
+
Compaction* CreateIntendedCompactionForwardedToBottomPriorityPool(
|
|
2473
|
+
Compaction* c);
|
|
2334
2474
|
bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
|
|
2335
2475
|
const std::vector<CompactionInputFiles>& inputs,
|
|
2336
2476
|
bool* sfm_bookkeeping, LogBuffer* log_buffer);
|
|
@@ -2353,9 +2493,7 @@ class DBImpl : public DB {
|
|
|
2353
2493
|
// Cancel scheduled periodic tasks
|
|
2354
2494
|
Status CancelPeriodicTaskScheduler();
|
|
2355
2495
|
|
|
2356
|
-
Status RegisterRecordSeqnoTimeWorker(
|
|
2357
|
-
const WriteOptions& write_options,
|
|
2358
|
-
bool is_new_db);
|
|
2496
|
+
Status RegisterRecordSeqnoTimeWorker();
|
|
2359
2497
|
|
|
2360
2498
|
void PrintStatistics();
|
|
2361
2499
|
|
|
@@ -2421,14 +2559,22 @@ class DBImpl : public DB {
|
|
|
2421
2559
|
|
|
2422
2560
|
// Background threads call this function, which is just a wrapper around
|
|
2423
2561
|
// the InstallSuperVersion() function. Background threads carry
|
|
2424
|
-
// sv_context
|
|
2425
|
-
//
|
|
2562
|
+
// sv_context to allow allocation of SuperVersion object outside of holding
|
|
2563
|
+
// the DB mutex.
|
|
2426
2564
|
// All ColumnFamily state changes go through this function. Here we analyze
|
|
2427
2565
|
// the new state and we schedule background work if we detect that the new
|
|
2428
2566
|
// state needs flush or compaction.
|
|
2567
|
+
// See also InstallSuperVersionForConfigChange().
|
|
2429
2568
|
void InstallSuperVersionAndScheduleWork(
|
|
2430
2569
|
ColumnFamilyData* cfd, SuperVersionContext* sv_context,
|
|
2431
|
-
|
|
2570
|
+
std::optional<std::shared_ptr<SeqnoToTimeMapping>>
|
|
2571
|
+
new_seqno_to_time_mapping = {});
|
|
2572
|
+
|
|
2573
|
+
// A variant of InstallSuperVersionAndScheduleWork() that must be used for
|
|
2574
|
+
// new CFs or for changes to mutable_cf_options. This is so that it can
|
|
2575
|
+
// update seqno_to_time_mapping cached for the new SuperVersion as relevant.
|
|
2576
|
+
void InstallSuperVersionForConfigChange(ColumnFamilyData* cfd,
|
|
2577
|
+
SuperVersionContext* sv_context);
|
|
2432
2578
|
|
|
2433
2579
|
bool GetIntPropertyInternal(ColumnFamilyData* cfd,
|
|
2434
2580
|
const DBPropertyInfo& property_info,
|
|
@@ -2442,7 +2588,7 @@ class DBImpl : public DB {
|
|
|
2442
2588
|
bool ShouldntRunManualCompaction(ManualCompactionState* m);
|
|
2443
2589
|
bool HaveManualCompaction(ColumnFamilyData* cfd);
|
|
2444
2590
|
bool MCOverlap(ManualCompactionState* m, ManualCompactionState* m1);
|
|
2445
|
-
void
|
|
2591
|
+
void UpdateFIFOCompactionStatus(const std::unique_ptr<Compaction>& c);
|
|
2446
2592
|
|
|
2447
2593
|
// May open and read table files for table property.
|
|
2448
2594
|
// Should not be called while holding mutex_.
|
|
@@ -2467,6 +2613,7 @@ class DBImpl : public DB {
|
|
|
2467
2613
|
|
|
2468
2614
|
IOStatus CreateWAL(const WriteOptions& write_options, uint64_t log_file_num,
|
|
2469
2615
|
uint64_t recycle_log_number, size_t preallocate_block_size,
|
|
2616
|
+
const PredecessorWALInfo& predecessor_wal_info,
|
|
2470
2617
|
log::Writer** new_log);
|
|
2471
2618
|
|
|
2472
2619
|
// Validate self-consistency of DB options
|
|
@@ -2591,8 +2738,13 @@ class DBImpl : public DB {
|
|
|
2591
2738
|
const std::vector<ColumnFamilyHandle*>& column_families,
|
|
2592
2739
|
ErrorIteratorFuncType error_iterator_func);
|
|
2593
2740
|
|
|
2741
|
+
bool ShouldPickCompaction(bool is_prepicked,
|
|
2742
|
+
const PrepickedCompaction* prepicked_compaction);
|
|
2743
|
+
|
|
2744
|
+
void ResetBottomPriCompactionIntent(ColumnFamilyData* cfd,
|
|
2745
|
+
std::unique_ptr<Compaction>& c);
|
|
2594
2746
|
// Lock over the persistent DB state. Non-nullptr iff successfully acquired.
|
|
2595
|
-
FileLock* db_lock_;
|
|
2747
|
+
FileLock* db_lock_ = nullptr;
|
|
2596
2748
|
|
|
2597
2749
|
// Guards changes to DB and CF options to ensure consistency between
|
|
2598
2750
|
// * In-memory options objects
|
|
@@ -2606,20 +2758,20 @@ class DBImpl : public DB {
|
|
|
2606
2758
|
// Guards reads and writes to in-memory stats_history_.
|
|
2607
2759
|
InstrumentedMutex stats_history_mutex_;
|
|
2608
2760
|
|
|
2609
|
-
// In addition to mutex_,
|
|
2610
|
-
//
|
|
2611
|
-
// and
|
|
2761
|
+
// In addition to mutex_, wal_write_mutex_ protects writes to logs_ and
|
|
2762
|
+
// cur_wal_number_. With two_write_queues it also protects alive_wal_files_,
|
|
2763
|
+
// and wal_empty_. Refer to the definition of each variable below for more
|
|
2612
2764
|
// details.
|
|
2613
|
-
// Note: to avoid deadlock, if needed to acquire both
|
|
2614
|
-
// mutex_, the order should be first mutex_ and then
|
|
2615
|
-
InstrumentedMutex
|
|
2765
|
+
// Note: to avoid deadlock, if needed to acquire both wal_write_mutex_ and
|
|
2766
|
+
// mutex_, the order should be first mutex_ and then wal_write_mutex_.
|
|
2767
|
+
InstrumentedMutex wal_write_mutex_;
|
|
2616
2768
|
|
|
2617
2769
|
// If zero, manual compactions are allowed to proceed. If non-zero, manual
|
|
2618
2770
|
// compactions may still be running, but will quickly fail with
|
|
2619
2771
|
// `Status::Incomplete`. The value indicates how many threads have paused
|
|
2620
2772
|
// manual compactions. It is accessed in read mode outside the DB mutex in
|
|
2621
2773
|
// compaction code paths.
|
|
2622
|
-
std::atomic<int> manual_compaction_paused_;
|
|
2774
|
+
std::atomic<int> manual_compaction_paused_ = false;
|
|
2623
2775
|
|
|
2624
2776
|
// This condition variable is signaled on these conditions:
|
|
2625
2777
|
// * whenever bg_compaction_scheduled_ goes down to 0
|
|
@@ -2635,106 +2787,114 @@ class DBImpl : public DB {
|
|
|
2635
2787
|
// * whenever SetOptions successfully updates options.
|
|
2636
2788
|
// * whenever a column family is dropped.
|
|
2637
2789
|
InstrumentedCondVar bg_cv_;
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2790
|
+
|
|
2791
|
+
ColumnFamilyHandleImpl* persist_stats_cf_handle_ = nullptr;
|
|
2792
|
+
|
|
2793
|
+
bool persistent_stats_cfd_exists_ = true;
|
|
2794
|
+
|
|
2795
|
+
// Writes are protected by locking both mutex_ and wal_write_mutex_, and reads
|
|
2796
|
+
// must be under either mutex_ or wal_write_mutex_. Since after ::Open,
|
|
2797
|
+
// cur_wal_number_ is currently updated only in write_thread_, it can be read
|
|
2641
2798
|
// from the same write_thread_ without any locks.
|
|
2642
|
-
uint64_t
|
|
2799
|
+
uint64_t cur_wal_number_ = 0;
|
|
2800
|
+
|
|
2643
2801
|
// Log files that we can recycle. Must be protected by db mutex_.
|
|
2644
|
-
std::deque<uint64_t>
|
|
2802
|
+
std::deque<uint64_t> wal_recycle_files_;
|
|
2803
|
+
|
|
2645
2804
|
// The minimum log file number taht can be recycled, if log recycling is
|
|
2646
2805
|
// enabled. This is used to ensure that log files created by previous
|
|
2647
2806
|
// instances of the database are not recycled, as we cannot be sure they
|
|
2648
2807
|
// were created in the recyclable format.
|
|
2649
|
-
uint64_t
|
|
2650
|
-
|
|
2651
|
-
|
|
2652
|
-
|
|
2808
|
+
uint64_t min_wal_number_to_recycle_ = 0;
|
|
2809
|
+
|
|
2810
|
+
// Protected by wal_write_mutex_.
|
|
2811
|
+
bool wal_dir_synced_ = false;
|
|
2812
|
+
|
|
2813
|
+
// Without two_write_queues, read and writes to wal_empty_ are protected by
|
|
2653
2814
|
// mutex_. Since it is currently updated/read only in write_thread_, it can be
|
|
2654
2815
|
// accessed from the same write_thread_ without any locks. With
|
|
2655
2816
|
// two_write_queues writes, where it can be updated in different threads,
|
|
2656
|
-
// read and writes are protected by
|
|
2657
|
-
// expensive mutex_ lock during WAL write, which update
|
|
2658
|
-
bool
|
|
2659
|
-
|
|
2660
|
-
ColumnFamilyHandleImpl* persist_stats_cf_handle_;
|
|
2661
|
-
|
|
2662
|
-
bool persistent_stats_cfd_exists_ = true;
|
|
2817
|
+
// read and writes are protected by wal_write_mutex_ instead. This is to avoid
|
|
2818
|
+
// expensive mutex_ lock during WAL write, which update wal_empty_.
|
|
2819
|
+
bool wal_empty_ = true;
|
|
2663
2820
|
|
|
2664
2821
|
// The current WAL file and those that have not been found obsolete from
|
|
2665
2822
|
// memtable flushes. A WAL not on this list might still be pending writer
|
|
2666
|
-
// flush and/or sync and close and might still be in logs_.
|
|
2667
|
-
// is protected by mutex_ and
|
|
2823
|
+
// flush and/or sync and close and might still be in logs_. alive_wal_files_
|
|
2824
|
+
// is protected by mutex_ and wal_write_mutex_ with details as follows:
|
|
2668
2825
|
// 1. read by FindObsoleteFiles() which can be called in either application
|
|
2669
|
-
// thread or RocksDB bg threads, both mutex_ and
|
|
2826
|
+
// thread or RocksDB bg threads, both mutex_ and wal_write_mutex_ are
|
|
2670
2827
|
// held.
|
|
2671
|
-
// 2. pop_front() by FindObsoleteFiles(), both mutex_ and
|
|
2828
|
+
// 2. pop_front() by FindObsoleteFiles(), both mutex_ and wal_write_mutex_
|
|
2672
2829
|
// are held.
|
|
2673
2830
|
// 3. push_back() by DBImpl::Open() and DBImpl::RestoreAliveLogFiles()
|
|
2674
2831
|
// (actually called by Open()), only mutex_ is held because at this point,
|
|
2675
2832
|
// the DB::Open() call has not returned success to application, and the
|
|
2676
2833
|
// only other thread(s) that can conflict are bg threads calling
|
|
2677
|
-
// FindObsoleteFiles() which ensure that both mutex_ and
|
|
2678
|
-
// are held when accessing
|
|
2834
|
+
// FindObsoleteFiles() which ensure that both mutex_ and wal_write_mutex_
|
|
2835
|
+
// are held when accessing alive_wal_files_.
|
|
2679
2836
|
// 4. read by DBImpl::Open() is protected by mutex_.
|
|
2680
|
-
// 5. push_back() by SwitchMemtable(). Both mutex_ and
|
|
2837
|
+
// 5. push_back() by SwitchMemtable(). Both mutex_ and wal_write_mutex_ are
|
|
2681
2838
|
// held. This is done by the write group leader. Note that in the case of
|
|
2682
2839
|
// two-write-queues, another WAL-only write thread can be writing to the
|
|
2683
2840
|
// WAL concurrently. See 9.
|
|
2684
|
-
// 6. read by SwitchWAL() with both mutex_ and
|
|
2841
|
+
// 6. read by SwitchWAL() with both mutex_ and wal_write_mutex_ held. This is
|
|
2685
2842
|
// done by write group leader.
|
|
2686
2843
|
// 7. read by ConcurrentWriteToWAL() by the write group leader in the case of
|
|
2687
|
-
// two-write-queues. Only
|
|
2844
|
+
// two-write-queues. Only wal_write_mutex_ is held to protect concurrent
|
|
2688
2845
|
// pop_front() by FindObsoleteFiles().
|
|
2689
|
-
// 8. read by PreprocessWrite() by the write group leader.
|
|
2846
|
+
// 8. read by PreprocessWrite() by the write group leader. wal_write_mutex_
|
|
2690
2847
|
// is held to protect the data structure from concurrent pop_front() by
|
|
2691
2848
|
// FindObsoleteFiles().
|
|
2692
2849
|
// 9. read by ConcurrentWriteToWAL() by a WAL-only write thread in the case
|
|
2693
|
-
// of two-write-queues. Only
|
|
2850
|
+
// of two-write-queues. Only wal_write_mutex_ is held. This suffices to
|
|
2694
2851
|
// protect the data structure from concurrent push_back() by current
|
|
2695
2852
|
// write group leader as well as pop_front() by FindObsoleteFiles().
|
|
2696
|
-
std::deque<
|
|
2853
|
+
std::deque<WalFileNumberSize> alive_wal_files_;
|
|
2854
|
+
|
|
2855
|
+
// Total size of all "alive" WALs (for easy access without synchronization)
|
|
2856
|
+
RelaxedAtomic<uint64_t> wals_total_size_{0};
|
|
2697
2857
|
|
|
2698
2858
|
// Log files that aren't fully synced, and the current log file.
|
|
2699
2859
|
// Synchronization:
|
|
2700
2860
|
// 1. read by FindObsoleteFiles() which can be called either in application
|
|
2701
|
-
// thread or RocksDB bg threads.
|
|
2861
|
+
// thread or RocksDB bg threads. wal_write_mutex_ is always held, while
|
|
2702
2862
|
// some reads are performed without mutex_.
|
|
2703
|
-
// 2. pop_front() by FindObsoleteFiles() with only
|
|
2704
|
-
// 3. read by DBImpl::Open() with both mutex_ and
|
|
2705
|
-
// 4. emplace_back() by DBImpl::Open() with both mutex_ and
|
|
2863
|
+
// 2. pop_front() by FindObsoleteFiles() with only wal_write_mutex_ held.
|
|
2864
|
+
// 3. read by DBImpl::Open() with both mutex_ and wal_write_mutex_.
|
|
2865
|
+
// 4. emplace_back() by DBImpl::Open() with both mutex_ and wal_write_mutex.
|
|
2706
2866
|
// Note that at this point, DB::Open() has not returned success to
|
|
2707
2867
|
// application, thus the only other thread(s) that can conflict are bg
|
|
2708
2868
|
// threads calling FindObsoleteFiles(). See 1.
|
|
2709
|
-
// 5. iteration and clear() from CloseHelper() always hold
|
|
2869
|
+
// 5. iteration and clear() from CloseHelper() always hold wal_write_mutex
|
|
2710
2870
|
// and mutex_.
|
|
2711
2871
|
// 6. back() called by APIs FlushWAL() and LockWAL() are protected by only
|
|
2712
|
-
//
|
|
2872
|
+
// wal_write_mutex_. These two can be called by application threads after
|
|
2713
2873
|
// DB::Open() returns success to applications.
|
|
2714
|
-
// 7. read by SyncWAL(), another API, protected by only
|
|
2874
|
+
// 7. read by SyncWAL(), another API, protected by only wal_write_mutex_.
|
|
2715
2875
|
// 8. read by MarkLogsNotSynced() and MarkLogsSynced() are protected by
|
|
2716
|
-
//
|
|
2717
|
-
// 9. erase() by MarkLogsSynced() protected by
|
|
2718
|
-
// 10. read by SyncClosedWals() protected by only
|
|
2876
|
+
// wal_write_mutex_.
|
|
2877
|
+
// 9. erase() by MarkLogsSynced() protected by wal_write_mutex_.
|
|
2878
|
+
// 10. read by SyncClosedWals() protected by only wal_write_mutex_. This can
|
|
2719
2879
|
// happen in bg flush threads after DB::Open() returns success to
|
|
2720
2880
|
// applications.
|
|
2721
2881
|
// 11. reads, e.g. front(), iteration, and back() called by PreprocessWrite()
|
|
2722
|
-
// holds only the
|
|
2882
|
+
// holds only the wal_write_mutex_. This is done by the write group
|
|
2723
2883
|
// leader. A bg thread calling FindObsoleteFiles() or MarkLogsSynced()
|
|
2724
|
-
// can happen concurrently. This is fine because
|
|
2884
|
+
// can happen concurrently. This is fine because wal_write_mutex_ is used
|
|
2725
2885
|
// by all parties. See 2, 5, 9.
|
|
2726
2886
|
// 12. reads, empty(), back() called by SwitchMemtable() hold both mutex_ and
|
|
2727
|
-
//
|
|
2887
|
+
// wal_write_mutex_. This happens in the write group leader.
|
|
2728
2888
|
// 13. emplace_back() by SwitchMemtable() hold both mutex_ and
|
|
2729
|
-
//
|
|
2889
|
+
// wal_write_mutex_. This happens in the write group leader. Can conflict
|
|
2730
2890
|
// with bg threads calling FindObsoleteFiles(), MarkLogsSynced(),
|
|
2731
2891
|
// SyncClosedWals(), etc. as well as application threads calling
|
|
2732
2892
|
// FlushWAL(), SyncWAL(), LockWAL(). This is fine because all parties
|
|
2733
|
-
// require at least
|
|
2893
|
+
// require at least wal_write_mutex_.
|
|
2734
2894
|
// 14. iteration called in WriteToWAL(write_group) protected by
|
|
2735
|
-
//
|
|
2895
|
+
// wal_write_mutex_. This is done by write group leader when
|
|
2736
2896
|
// two-write-queues is disabled and write needs to sync logs.
|
|
2737
|
-
// 15. back() called in ConcurrentWriteToWAL() protected by
|
|
2897
|
+
// 15. back() called in ConcurrentWriteToWAL() protected by wal_write_mutex_.
|
|
2738
2898
|
// This can be done by the write group leader if two-write-queues is
|
|
2739
2899
|
// enabled. It can also be done by another WAL-only write thread.
|
|
2740
2900
|
//
|
|
@@ -2751,23 +2911,22 @@ class DBImpl : public DB {
|
|
|
2751
2911
|
std::deque<LogWriterNumber> logs_;
|
|
2752
2912
|
|
|
2753
2913
|
// Signaled when getting_synced becomes false for some of the logs_.
|
|
2754
|
-
InstrumentedCondVar
|
|
2914
|
+
InstrumentedCondVar wal_sync_cv_;
|
|
2755
2915
|
// This is the app-level state that is written to the WAL but will be used
|
|
2756
2916
|
// only during recovery. Using this feature enables not writing the state to
|
|
2757
2917
|
// memtable on normal writes and hence improving the throughput. Each new
|
|
2758
2918
|
// write of the state will replace the previous state entirely even if the
|
|
2759
2919
|
// keys in the two consecutive states do not overlap.
|
|
2760
|
-
// It is protected by
|
|
2920
|
+
// It is protected by wal_write_mutex_ when two_write_queues_ is enabled.
|
|
2761
2921
|
// Otherwise only the heaad of write_thread_ can access it.
|
|
2762
2922
|
WriteBatch cached_recoverable_state_;
|
|
2763
2923
|
std::atomic<bool> cached_recoverable_state_empty_ = {true};
|
|
2764
|
-
std::atomic<uint64_t> total_log_size_;
|
|
2765
2924
|
|
|
2766
2925
|
// If this is non-empty, we need to delete these log files in background
|
|
2767
|
-
// threads. Protected by
|
|
2768
|
-
autovector<log::Writer*>
|
|
2926
|
+
// threads. Protected by wal_write_mutex_.
|
|
2927
|
+
autovector<log::Writer*> wals_to_free_;
|
|
2769
2928
|
|
|
2770
|
-
bool is_snapshot_supported_;
|
|
2929
|
+
bool is_snapshot_supported_ = true;
|
|
2771
2930
|
|
|
2772
2931
|
std::map<uint64_t, std::map<std::string, uint64_t>> stats_history_;
|
|
2773
2932
|
|
|
@@ -2791,7 +2950,7 @@ class DBImpl : public DB {
|
|
|
2791
2950
|
// sleep if it uses up the quota.
|
|
2792
2951
|
// Note: This is to protect memtable and compaction. If the batch only writes
|
|
2793
2952
|
// to the WAL its size need not to be included in this.
|
|
2794
|
-
uint64_t last_batch_group_size_;
|
|
2953
|
+
uint64_t last_batch_group_size_ = 0;
|
|
2795
2954
|
|
|
2796
2955
|
FlushScheduler flush_scheduler_;
|
|
2797
2956
|
|
|
@@ -2850,32 +3009,32 @@ class DBImpl : public DB {
|
|
|
2850
3009
|
std::unordered_set<uint64_t> files_grabbed_for_purge_;
|
|
2851
3010
|
|
|
2852
3011
|
// A queue to store log writers to close. Protected by db mutex_.
|
|
2853
|
-
std::deque<log::Writer*>
|
|
3012
|
+
std::deque<log::Writer*> wals_to_free_queue_;
|
|
2854
3013
|
|
|
2855
3014
|
std::deque<SuperVersion*> superversions_to_free_queue_;
|
|
2856
3015
|
|
|
2857
|
-
int unscheduled_flushes_;
|
|
3016
|
+
int unscheduled_flushes_ = 0;
|
|
2858
3017
|
|
|
2859
|
-
int unscheduled_compactions_;
|
|
3018
|
+
int unscheduled_compactions_ = 0;
|
|
2860
3019
|
|
|
2861
3020
|
// count how many background compactions are running or have been scheduled in
|
|
2862
3021
|
// the BOTTOM pool
|
|
2863
|
-
int bg_bottom_compaction_scheduled_;
|
|
3022
|
+
int bg_bottom_compaction_scheduled_ = 0;
|
|
2864
3023
|
|
|
2865
3024
|
// count how many background compactions are running or have been scheduled
|
|
2866
|
-
int bg_compaction_scheduled_;
|
|
3025
|
+
int bg_compaction_scheduled_ = 0;
|
|
2867
3026
|
|
|
2868
3027
|
// stores the number of compactions are currently running
|
|
2869
|
-
int num_running_compactions_;
|
|
3028
|
+
int num_running_compactions_ = 0;
|
|
2870
3029
|
|
|
2871
3030
|
// number of background memtable flush jobs, submitted to the HIGH pool
|
|
2872
|
-
int bg_flush_scheduled_;
|
|
3031
|
+
int bg_flush_scheduled_ = 0;
|
|
2873
3032
|
|
|
2874
3033
|
// stores the number of flushes are currently running
|
|
2875
|
-
int num_running_flushes_;
|
|
3034
|
+
int num_running_flushes_ = 0;
|
|
2876
3035
|
|
|
2877
3036
|
// number of background obsolete file purge jobs, submitted to the HIGH pool
|
|
2878
|
-
int bg_purge_scheduled_;
|
|
3037
|
+
int bg_purge_scheduled_ = 0;
|
|
2879
3038
|
|
|
2880
3039
|
std::deque<ManualCompactionState*> manual_compaction_dequeue_;
|
|
2881
3040
|
|
|
@@ -2885,11 +3044,11 @@ class DBImpl : public DB {
|
|
|
2885
3044
|
// This enables two different threads to call
|
|
2886
3045
|
// EnableFileDeletions() and DisableFileDeletions()
|
|
2887
3046
|
// without any synchronization
|
|
2888
|
-
int disable_delete_obsolete_files_;
|
|
3047
|
+
int disable_delete_obsolete_files_ = 0;
|
|
2889
3048
|
|
|
2890
3049
|
// Number of times FindObsoleteFiles has found deletable files and the
|
|
2891
3050
|
// corresponding call to PurgeObsoleteFiles has not yet finished.
|
|
2892
|
-
int pending_purge_obsolete_files_;
|
|
3051
|
+
int pending_purge_obsolete_files_ = 0;
|
|
2893
3052
|
|
|
2894
3053
|
// last time when DeleteObsoleteFiles with full scan was executed. Originally
|
|
2895
3054
|
// initialized with startup time.
|
|
@@ -2901,12 +3060,12 @@ class DBImpl : public DB {
|
|
|
2901
3060
|
// The mutex used by switch_cv_. mutex_ should be acquired beforehand.
|
|
2902
3061
|
std::mutex switch_mutex_;
|
|
2903
3062
|
// Number of threads intending to write to memtable
|
|
2904
|
-
std::atomic<size_t> pending_memtable_writes_
|
|
3063
|
+
std::atomic<size_t> pending_memtable_writes_{0};
|
|
2905
3064
|
|
|
2906
3065
|
// A flag indicating whether the current rocksdb database has any
|
|
2907
3066
|
// data that is not yet persisted into either WAL or SST file.
|
|
2908
3067
|
// Used when disableWAL is true.
|
|
2909
|
-
std::atomic<bool> has_unpersisted_data_;
|
|
3068
|
+
std::atomic<bool> has_unpersisted_data_{false};
|
|
2910
3069
|
|
|
2911
3070
|
// if an attempt was made to flush all column families that
|
|
2912
3071
|
// the oldest log depends on but uncommitted data in the oldest
|
|
@@ -2914,26 +3073,26 @@ class DBImpl : public DB {
|
|
|
2914
3073
|
// We must attempt to free the dependent memtables again
|
|
2915
3074
|
// at a later time after the transaction in the oldest
|
|
2916
3075
|
// log is fully commited.
|
|
2917
|
-
bool unable_to_release_oldest_log_;
|
|
3076
|
+
bool unable_to_release_oldest_log_{false};
|
|
2918
3077
|
|
|
2919
3078
|
// Number of running IngestExternalFile() or CreateColumnFamilyWithImport()
|
|
2920
3079
|
// calls.
|
|
2921
3080
|
// REQUIRES: mutex held
|
|
2922
|
-
int num_running_ingest_file_;
|
|
3081
|
+
int num_running_ingest_file_ = 0;
|
|
2923
3082
|
|
|
2924
3083
|
WalManager wal_manager_;
|
|
2925
3084
|
|
|
2926
3085
|
// A value of > 0 temporarily disables scheduling of background work
|
|
2927
|
-
int bg_work_paused_;
|
|
3086
|
+
int bg_work_paused_ = 0;
|
|
2928
3087
|
|
|
2929
3088
|
// A value of > 0 temporarily disables scheduling of background compaction
|
|
2930
|
-
int bg_compaction_paused_;
|
|
3089
|
+
int bg_compaction_paused_ = 0;
|
|
2931
3090
|
|
|
2932
3091
|
// Guard against multiple concurrent refitting
|
|
2933
|
-
bool refitting_level_;
|
|
3092
|
+
bool refitting_level_ = false;
|
|
2934
3093
|
|
|
2935
3094
|
// Indicate DB was opened successfully
|
|
2936
|
-
bool opened_successfully_;
|
|
3095
|
+
bool opened_successfully_ = false;
|
|
2937
3096
|
|
|
2938
3097
|
// The min threshold to triggere bottommost compaction for removing
|
|
2939
3098
|
// garbages, among all column families.
|
|
@@ -2979,13 +3138,13 @@ class DBImpl : public DB {
|
|
|
2979
3138
|
// error recovery from going on in parallel. The latter, shutting_down_,
|
|
2980
3139
|
// is set a little later during the shutdown after scheduling memtable
|
|
2981
3140
|
// flushes
|
|
2982
|
-
std::atomic<bool> shutdown_initiated_;
|
|
3141
|
+
std::atomic<bool> shutdown_initiated_{false};
|
|
2983
3142
|
// Flag to indicate whether sst_file_manager object was allocated in
|
|
2984
3143
|
// DB::Open() or passed to us
|
|
2985
3144
|
bool own_sfm_;
|
|
2986
3145
|
|
|
2987
3146
|
// Flag to check whether Close() has been called on this DB
|
|
2988
|
-
bool closed_;
|
|
3147
|
+
bool closed_ = false;
|
|
2989
3148
|
// save the closing status, for re-calling the close()
|
|
2990
3149
|
Status closing_status_;
|
|
2991
3150
|
// mutex for DB::Close()
|
|
@@ -3021,7 +3180,7 @@ class DBImpl : public DB {
|
|
|
3021
3180
|
|
|
3022
3181
|
// The number of LockWAL called without matching UnlockWAL call.
|
|
3023
3182
|
// See also lock_wal_write_token_
|
|
3024
|
-
uint32_t lock_wal_count_;
|
|
3183
|
+
uint32_t lock_wal_count_ = 0;
|
|
3025
3184
|
};
|
|
3026
3185
|
|
|
3027
3186
|
class GetWithTimestampReadCallback : public ReadCallback {
|