@nxtedition/rocksdb 15.4.0 → 15.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +24 -19
- package/cache.js +1 -1
- package/chained-batch.js +12 -3
- package/deps/rocksdb/rocksdb/.clang-tidy +86 -0
- package/deps/rocksdb/rocksdb/BUCK +42 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +11 -0
- package/deps/rocksdb/rocksdb/Makefile +59 -32
- package/deps/rocksdb/rocksdb/cache/cache.cc +0 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +9 -9
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/cache_key.h +5 -5
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +16 -16
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +258 -294
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +98 -49
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +1 -5
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +18 -18
- package/deps/rocksdb/rocksdb/crash_test.mk +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +23 -22
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +6 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +14 -16
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +38 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +101 -18
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +12 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +6 -9
- package/deps/rocksdb/rocksdb/db/builder.cc +23 -0
- package/deps/rocksdb/rocksdb/db/builder.h +7 -0
- package/deps/rocksdb/rocksdb/db/c.cc +373 -57
- package/deps/rocksdb/rocksdb/db/c_test.c +101 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +31 -3
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +10 -13
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +35 -48
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +13 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +201 -39
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -455
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +19 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +72 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +12 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +405 -83
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +25 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +23 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +1410 -106
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +19 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +505 -45
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +9 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +7 -9
- package/deps/rocksdb/rocksdb/db/convenience.cc +4 -4
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +60 -88
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +10 -12
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +471 -40
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +116 -2
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +5 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_abort_test.cc +993 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +329 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +155 -13
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +54 -31
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +232 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +57 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +224 -31
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +164 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +47 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +22 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +9 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +371 -6
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +7 -5
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +22 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +32 -13
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +68 -15
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +6 -21
- package/deps/rocksdb/rocksdb/db/db_test.cc +644 -128
- package/deps/rocksdb/rocksdb/db/db_test2.cc +198 -81
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +35 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -2
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +36 -32
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +11 -7
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +499 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +284 -20
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db/dbformat.h +0 -5
- package/deps/rocksdb/rocksdb/db/error_handler.cc +24 -0
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +12 -14
- package/deps/rocksdb/rocksdb/db/experimental.cc +13 -10
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +22 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +21 -15
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +4 -6
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -3
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +5 -6
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +17 -17
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +154 -27
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/memtable.cc +197 -51
- package/deps/rocksdb/rocksdb/db/memtable.h +6 -0
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +3 -4
- package/deps/rocksdb/rocksdb/db/merge_test.cc +37 -35
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/options_file_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +9 -11
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.cc +10 -1
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler_test.cc +292 -15
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +10 -17
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +6 -8
- package/deps/rocksdb/rocksdb/db/repair.cc +10 -10
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/table_cache.cc +142 -135
- package/deps/rocksdb/rocksdb/db/table_cache.h +30 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +11 -50
- package/deps/rocksdb/rocksdb/db/version_builder.h +2 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/version_edit.cc +51 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +91 -29
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set.cc +211 -50
- package/deps/rocksdb/rocksdb/db/version_set.h +40 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +294 -21
- package/deps/rocksdb/rocksdb/db/version_util.cc +96 -0
- package/deps/rocksdb/rocksdb/db/version_util.h +24 -0
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +647 -31
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +219 -1
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +549 -12
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +3 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +21 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +32 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +74 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +9 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +143 -61
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +76 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +92 -72
- package/deps/rocksdb/rocksdb/env/env.cc +1 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +365 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +31 -30
- package/deps/rocksdb/rocksdb/env/io_posix.cc +8 -11
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -1
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +108 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +32 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +4 -4
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/file_util.h +2 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +331 -12
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +52 -35
- package/deps/rocksdb/rocksdb/folly.mk +22 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +100 -54
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +67 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +149 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +1 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +78 -97
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/external_table.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +17 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/io_dispatcher.h +358 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +43 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +20 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +63 -21
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +10 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +2 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +49 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +77 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +16 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +5 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +2 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +106 -46
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +14 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +1 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +226 -8
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +490 -0
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +11 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +4 -12
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/monitoring/file_read_sample.h +21 -4
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +9 -3
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +21 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +21 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +0 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +26 -5
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -1
- package/deps/rocksdb/rocksdb/options/options.cc +5 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +7 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +109 -103
- package/deps/rocksdb/rocksdb/options/options_test.cc +14 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +15 -17
- package/deps/rocksdb/rocksdb/port/lang.h +4 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +0 -23
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +36 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.h +9 -0
- package/deps/rocksdb/rocksdb/src.mk +12 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +571 -292
- package/deps/rocksdb/rocksdb/table/block_based/block.h +143 -53
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +154 -90
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +51 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +0 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +147 -734
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +30 -233
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +178 -108
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +13 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +17 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +70 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +168 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +9 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +548 -169
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +30 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_util.h +156 -0
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.cc +73 -30
- package/deps/rocksdb/rocksdb/table/block_based/data_block_footer.h +74 -7
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +20 -14
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +22 -12
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.cc +332 -0
- package/deps/rocksdb/rocksdb/table/block_based/multi_scan_index_iterator.h +133 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +0 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +126 -46
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +31 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cleanable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +25 -4
- package/deps/rocksdb/rocksdb/table/format.cc +27 -15
- package/deps/rocksdb/rocksdb/table/format.h +41 -15
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +22 -12
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +7 -21
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +0 -1
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +88 -13
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +53 -42
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +3 -12
- package/deps/rocksdb/rocksdb/table/table_builder.h +0 -4
- package/deps/rocksdb/rocksdb/table/table_properties.cc +18 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +2 -3
- package/deps/rocksdb/rocksdb/table/table_test.cc +848 -172
- package/deps/rocksdb/rocksdb/table/unique_id.cc +24 -20
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +8 -8
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +5 -4
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +2 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +238 -120
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +2 -4
- package/deps/rocksdb/rocksdb/tools/dump/db_dump_tool.cc +4 -8
- package/deps/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +2 -3
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +82 -20
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +41 -47
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +5 -6
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/tool_hooks.cc +6 -5
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +4 -4
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +1 -3
- package/deps/rocksdb/rocksdb/util/atomic.h +30 -23
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +6 -7
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +3 -3
- package/deps/rocksdb/rocksdb/util/bit_fields.h +68 -46
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +16 -16
- package/deps/rocksdb/rocksdb/util/coding.h +14 -27
- package/deps/rocksdb/rocksdb/util/compression.cc +365 -207
- package/deps/rocksdb/rocksdb/util/compression.h +16 -1298
- package/deps/rocksdb/rocksdb/util/compression_test.cc +347 -61
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +8 -9
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.h +1 -1
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +1 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +3 -3
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +18 -18
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +3 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +19 -7
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.cc +1099 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_imp.h +36 -0
- package/deps/rocksdb/rocksdb/util/io_dispatcher_test.cc +1919 -0
- package/deps/rocksdb/rocksdb/util/math.h +3 -1
- package/deps/rocksdb/rocksdb/util/mutexlock.h +19 -19
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +25 -25
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +5 -7
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -5
- package/deps/rocksdb/rocksdb/util/slice.cc +0 -10
- package/deps/rocksdb/rocksdb/util/slice_test.cc +35 -1
- package/deps/rocksdb/rocksdb/util/slice_transform_test.cc +5 -7
- package/deps/rocksdb/rocksdb/util/status.cc +3 -1
- package/deps/rocksdb/rocksdb/util/stop_watch.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +123 -78
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +12 -93
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +1 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +0 -21
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +6 -48
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +94 -307
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +12 -58
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +2 -8
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +205 -811
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +2 -7
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +1 -9
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +17 -11
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/test_utils.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +68 -61
- package/deps/rocksdb/rocksdb/utilities/debug.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +105 -59
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs_test.cc +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +13 -17
- package/deps/rocksdb/rocksdb/utilities/memory/memory_util.cc +16 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +25 -25
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +40 -40
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +2 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +17 -19
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +4 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_seqno_test.cc +431 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +91 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.cc +562 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/bitvector.h +615 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.cc +2575 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/louds_trie.h +685 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_db_test.cc +2843 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.cc +567 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_factory.h +275 -0
- package/deps/rocksdb/rocksdb/utilities/trie_index/trie_index_test.cc +5183 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +4 -3
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +3 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +93 -88
- package/deps/rocksdb/rocksdb.gyp +7 -0
- package/index.js +11 -2
- package/iterator.js +15 -7
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h +0 -43
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h +0 -55
|
@@ -73,7 +73,7 @@ Status OptimisticTransactionDB::Open(
|
|
|
73
73
|
std::vector<ColumnFamilyHandle*>* handles,
|
|
74
74
|
OptimisticTransactionDB** dbptr) {
|
|
75
75
|
Status s;
|
|
76
|
-
DB
|
|
76
|
+
std::unique_ptr<DB> db;
|
|
77
77
|
|
|
78
78
|
std::vector<ColumnFamilyDescriptor> column_families_copy = column_families;
|
|
79
79
|
|
|
@@ -91,7 +91,7 @@ Status OptimisticTransactionDB::Open(
|
|
|
91
91
|
s = DB::Open(db_options, dbname, column_families_copy, handles, &db);
|
|
92
92
|
|
|
93
93
|
if (s.ok()) {
|
|
94
|
-
*dbptr = new OptimisticTransactionDBImpl(db, occ_options);
|
|
94
|
+
*dbptr = new OptimisticTransactionDBImpl(std::move(db), occ_options);
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
return s;
|
|
@@ -44,10 +44,9 @@ class OccLockBucketsImpl : public OccLockBucketsImplBase {
|
|
|
44
44
|
class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
|
|
45
45
|
public:
|
|
46
46
|
explicit OptimisticTransactionDBImpl(
|
|
47
|
-
DB
|
|
48
|
-
|
|
49
|
-
: OptimisticTransactionDB(db),
|
|
50
|
-
db_owner_(take_ownership),
|
|
47
|
+
std::unique_ptr<DB>&& db,
|
|
48
|
+
const OptimisticTransactionDBOptions& occ_options)
|
|
49
|
+
: OptimisticTransactionDB(std::move(db)),
|
|
51
50
|
validate_policy_(occ_options.validate_policy) {
|
|
52
51
|
if (validate_policy_ == OccValidationPolicy::kValidateParallel) {
|
|
53
52
|
auto bucketed_locks = occ_options.shared_lock_buckets;
|
|
@@ -60,13 +59,7 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
|
|
|
60
59
|
}
|
|
61
60
|
}
|
|
62
61
|
|
|
63
|
-
~OptimisticTransactionDBImpl()
|
|
64
|
-
// Prevent this stackable from destroying
|
|
65
|
-
// base db
|
|
66
|
-
if (!db_owner_) {
|
|
67
|
-
db_ = nullptr;
|
|
68
|
-
}
|
|
69
|
-
}
|
|
62
|
+
~OptimisticTransactionDBImpl() override = default;
|
|
70
63
|
|
|
71
64
|
Transaction* BeginTransaction(const WriteOptions& write_options,
|
|
72
65
|
const OptimisticTransactionOptions& txn_options,
|
|
@@ -97,8 +90,6 @@ class OptimisticTransactionDBImpl : public OptimisticTransactionDB {
|
|
|
97
90
|
private:
|
|
98
91
|
std::shared_ptr<OccLockBucketsImplBase> bucketed_locks_;
|
|
99
92
|
|
|
100
|
-
bool db_owner_;
|
|
101
|
-
|
|
102
93
|
const OccValidationPolicy validate_policy_;
|
|
103
94
|
|
|
104
95
|
void ReinitializeTransaction(Transaction* txn,
|
|
@@ -9005,7 +9005,7 @@ class CommitBypassMemtableTest
|
|
|
9005
9005
|
txn_db_opts.use_per_key_point_lock_mgr = std::get<1>(GetParam());
|
|
9006
9006
|
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db));
|
|
9007
9007
|
ASSERT_NE(txn_db, nullptr);
|
|
9008
|
-
db_
|
|
9008
|
+
db_.reset(txn_db);
|
|
9009
9009
|
}
|
|
9010
9010
|
};
|
|
9011
9011
|
|
|
@@ -9453,9 +9453,9 @@ TEST_P(CommitBypassMemtableTest, Recovery) {
|
|
|
9453
9453
|
VerifyDBFromMap(expected);
|
|
9454
9454
|
|
|
9455
9455
|
ASSERT_OK(txn_db->Close());
|
|
9456
|
-
|
|
9456
|
+
db_.reset(); // destroys txn_db (owned by db_)
|
|
9457
9457
|
ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, &txn_db));
|
|
9458
|
-
db_
|
|
9458
|
+
db_.reset(txn_db);
|
|
9459
9459
|
|
|
9460
9460
|
VerifyDBFromMap(expected);
|
|
9461
9461
|
}
|
|
@@ -82,6 +82,12 @@ class TransactionTestBase : public ::testing::Test {
|
|
|
82
82
|
txn_db_options.write_policy = write_policy;
|
|
83
83
|
txn_db_options.rollback_merge_operands = true;
|
|
84
84
|
txn_db_options.use_per_key_point_lock_mgr = use_per_key_point_lock_mgr;
|
|
85
|
+
// Reduce commit cache size from the default 2^23 (64MB) to 2^13 (64KB).
|
|
86
|
+
// The default is sized for production workloads but makes TSAN builds
|
|
87
|
+
// very slow because value-initializing 8M atomics triggers __tsan_memset,
|
|
88
|
+
// which updates shadow memory for every 8-byte cell. Tests that need
|
|
89
|
+
// specific cache sizes (e.g., for wrapping/eviction) override this.
|
|
90
|
+
txn_db_options.wp_commit_cache_bits = 13;
|
|
85
91
|
// This will stress write unprepared, by forcing write batch flush on every
|
|
86
92
|
// write.
|
|
87
93
|
txn_db_options.default_write_batch_flush_threshold = 1;
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
|
|
2
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
3
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
4
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
5
|
+
|
|
6
|
+
// Test to verify that sequence numbers remain consistent during error recovery
|
|
7
|
+
// with WritePrepared TransactionDB and two_write_queues=true.
|
|
8
|
+
//
|
|
9
|
+
// The fix: SyncLastSequenceWithAllocated() is called during ResumeImpl to
|
|
10
|
+
// ensure that allocated-but-not-published sequence numbers are accounted for
|
|
11
|
+
// before creating new memtables/WALs, preventing "sequence number going
|
|
12
|
+
// backwards" corruption on subsequent recovery.
|
|
13
|
+
|
|
14
|
+
#include <atomic>
|
|
15
|
+
#include <memory>
|
|
16
|
+
#include <string>
|
|
17
|
+
|
|
18
|
+
#include "db/db_impl/db_impl.h"
|
|
19
|
+
#include "db/db_test_util.h"
|
|
20
|
+
#include "db/version_set.h"
|
|
21
|
+
#include "port/stack_trace.h"
|
|
22
|
+
#include "rocksdb/utilities/transaction_db.h"
|
|
23
|
+
#include "test_util/sync_point.h"
|
|
24
|
+
#include "test_util/testharness.h"
|
|
25
|
+
#include "test_util/testutil.h"
|
|
26
|
+
#include "utilities/fault_injection_fs.h"
|
|
27
|
+
|
|
28
|
+
namespace ROCKSDB_NAMESPACE {
|
|
29
|
+
|
|
30
|
+
class WritePreparedTransactionSeqnoTest : public ::testing::Test {
|
|
31
|
+
public:
|
|
32
|
+
WritePreparedTransactionSeqnoTest()
|
|
33
|
+
: db_(nullptr),
|
|
34
|
+
special_env_(Env::Default()),
|
|
35
|
+
fault_fs_(new FaultInjectionTestFS(FileSystem::Default())),
|
|
36
|
+
env_(new CompositeEnvWrapper(&special_env_, fault_fs_)) {
|
|
37
|
+
options_.create_if_missing = true;
|
|
38
|
+
options_.max_write_buffer_number = 2;
|
|
39
|
+
options_.write_buffer_size = 4 * 1024;
|
|
40
|
+
options_.level0_file_num_compaction_trigger = 2;
|
|
41
|
+
options_.env = env_.get();
|
|
42
|
+
// Use two_write_queues which is typical for WritePrepared
|
|
43
|
+
options_.two_write_queues = true;
|
|
44
|
+
// Enable auto recovery from retryable errors
|
|
45
|
+
options_.max_bgerror_resume_count = 2;
|
|
46
|
+
options_.bgerror_resume_retry_interval = 100000; // 100ms
|
|
47
|
+
|
|
48
|
+
dbname_ = test::PerThreadDBPath("write_prepared_seqno_test");
|
|
49
|
+
EXPECT_OK(DestroyDB(dbname_, options_));
|
|
50
|
+
|
|
51
|
+
txn_db_options_.transaction_lock_timeout = 0;
|
|
52
|
+
txn_db_options_.default_lock_timeout = 0;
|
|
53
|
+
txn_db_options_.write_policy = TxnDBWritePolicy::WRITE_PREPARED;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
~WritePreparedTransactionSeqnoTest() {
|
|
57
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
58
|
+
SyncPoint::GetInstance()->ClearAllCallBacks();
|
|
59
|
+
if (db_) {
|
|
60
|
+
for (auto h : handles_) {
|
|
61
|
+
if (h) {
|
|
62
|
+
EXPECT_OK(db_->DestroyColumnFamilyHandle(h));
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
handles_.clear();
|
|
66
|
+
delete db_;
|
|
67
|
+
db_ = nullptr;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
Status Open() {
|
|
72
|
+
return TransactionDB::Open(options_, txn_db_options_, dbname_, &db_);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
void Close() {
|
|
76
|
+
for (auto h : handles_) {
|
|
77
|
+
if (h) {
|
|
78
|
+
EXPECT_OK(db_->DestroyColumnFamilyHandle(h));
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
handles_.clear();
|
|
82
|
+
delete db_;
|
|
83
|
+
db_ = nullptr;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
DBImpl* dbimpl() { return static_cast_with_check<DBImpl>(db_->GetRootDB()); }
|
|
87
|
+
|
|
88
|
+
protected:
|
|
89
|
+
TransactionDB* db_;
|
|
90
|
+
SpecialEnv special_env_;
|
|
91
|
+
std::shared_ptr<FaultInjectionTestFS> fault_fs_;
|
|
92
|
+
std::unique_ptr<Env> env_;
|
|
93
|
+
std::string dbname_;
|
|
94
|
+
Options options_;
|
|
95
|
+
TransactionDBOptions txn_db_options_;
|
|
96
|
+
std::vector<ColumnFamilyHandle*> handles_;
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// Regression test: verify that after error recovery with two_write_queues,
|
|
100
|
+
// the DB can be closed and reopened without sequence number corruption.
|
|
101
|
+
TEST_F(WritePreparedTransactionSeqnoTest,
|
|
102
|
+
SeqnoGoesBackwardsDuringErrorRecovery) {
|
|
103
|
+
ASSERT_OK(Open());
|
|
104
|
+
|
|
105
|
+
// Write some initial data and flush to establish baseline
|
|
106
|
+
WriteOptions write_opts;
|
|
107
|
+
TransactionOptions txn_opts;
|
|
108
|
+
for (int i = 0; i < 10; i++) {
|
|
109
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
110
|
+
ASSERT_NE(txn, nullptr);
|
|
111
|
+
ASSERT_OK(txn->SetName("txn" + std::to_string(i)));
|
|
112
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
113
|
+
ASSERT_OK(txn->Prepare());
|
|
114
|
+
ASSERT_OK(txn->Commit());
|
|
115
|
+
delete txn;
|
|
116
|
+
}
|
|
117
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
118
|
+
|
|
119
|
+
// Write more data - these will allocate sequence numbers
|
|
120
|
+
for (int i = 10; i < 20; i++) {
|
|
121
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
122
|
+
ASSERT_NE(txn, nullptr);
|
|
123
|
+
ASSERT_OK(txn->SetName("txn" + std::to_string(i)));
|
|
124
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
125
|
+
ASSERT_OK(txn->Prepare());
|
|
126
|
+
ASSERT_OK(txn->Commit());
|
|
127
|
+
delete txn;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// Set up sync point dependency chain for deterministic recovery
|
|
131
|
+
// synchronization, following the pattern from
|
|
132
|
+
// ManifestWriteRetryableErrorAutoRecover in error_handler_fs_test.cc.
|
|
133
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
|
134
|
+
{{"RecoverFromRetryableBGIOError:BeforeStart",
|
|
135
|
+
"SeqnoGoesBackwardsDuringErrorRecovery:0"},
|
|
136
|
+
{"SeqnoGoesBackwardsDuringErrorRecovery:1",
|
|
137
|
+
"RecoverFromRetryableBGIOError:BeforeWait1"},
|
|
138
|
+
{"RecoverFromRetryableBGIOError:RecoverSuccess",
|
|
139
|
+
"SeqnoGoesBackwardsDuringErrorRecovery:2"}});
|
|
140
|
+
|
|
141
|
+
// Inject a retryable MANIFEST write error on the next flush
|
|
142
|
+
IOStatus error_to_inject = IOStatus::IOError("Injected MANIFEST error");
|
|
143
|
+
error_to_inject.SetRetryable(true);
|
|
144
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
145
|
+
"VersionSet::LogAndApply:WriteManifest",
|
|
146
|
+
[&](void*) { fault_fs_->SetFilesystemActive(false, error_to_inject); });
|
|
147
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
148
|
+
|
|
149
|
+
// Trigger a flush that will fail due to MANIFEST write error
|
|
150
|
+
Status s = db_->Flush(FlushOptions());
|
|
151
|
+
ASSERT_NOK(s);
|
|
152
|
+
|
|
153
|
+
// Wait for recovery to start, then re-enable filesystem and let it proceed.
|
|
154
|
+
// Clear the callback first to prevent it from re-disabling the filesystem
|
|
155
|
+
// if recovery's ResumeImpl triggers WriteManifest before we re-enable.
|
|
156
|
+
TEST_SYNC_POINT("SeqnoGoesBackwardsDuringErrorRecovery:0");
|
|
157
|
+
SyncPoint::GetInstance()->ClearCallBack(
|
|
158
|
+
"VersionSet::LogAndApply:WriteManifest");
|
|
159
|
+
fault_fs_->SetFilesystemActive(true);
|
|
160
|
+
TEST_SYNC_POINT("SeqnoGoesBackwardsDuringErrorRecovery:1");
|
|
161
|
+
|
|
162
|
+
// Wait for recovery to complete
|
|
163
|
+
TEST_SYNC_POINT("SeqnoGoesBackwardsDuringErrorRecovery:2");
|
|
164
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
165
|
+
|
|
166
|
+
// Write some more data after recovery
|
|
167
|
+
for (int i = 20; i < 30; i++) {
|
|
168
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
169
|
+
ASSERT_NE(txn, nullptr);
|
|
170
|
+
ASSERT_OK(txn->SetName("txn_after_" + std::to_string(i)));
|
|
171
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
172
|
+
ASSERT_OK(txn->Prepare());
|
|
173
|
+
ASSERT_OK(txn->Commit());
|
|
174
|
+
delete txn;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Close and reopen - this would fail with "sequence number going backwards"
|
|
178
|
+
// before the fix.
|
|
179
|
+
Close();
|
|
180
|
+
|
|
181
|
+
Status reopen_s = Open();
|
|
182
|
+
ASSERT_OK(reopen_s);
|
|
183
|
+
|
|
184
|
+
// Verify data integrity
|
|
185
|
+
ReadOptions read_opts;
|
|
186
|
+
for (int i = 0; i < 20; i++) {
|
|
187
|
+
std::string value;
|
|
188
|
+
ASSERT_OK(db_->Get(read_opts, "key" + std::to_string(i), &value));
|
|
189
|
+
ASSERT_EQ(value, "value" + std::to_string(i));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
Close();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Test that verifies the sequence number discrepancy is resolved by checking
|
|
196
|
+
// that LastSequence >= LastAllocatedSequence after recovery completes.
|
|
197
|
+
TEST_F(WritePreparedTransactionSeqnoTest, SeqnoDiscrepancyDuringErrorRecovery) {
|
|
198
|
+
ASSERT_OK(Open());
|
|
199
|
+
|
|
200
|
+
WriteOptions write_opts;
|
|
201
|
+
TransactionOptions txn_opts;
|
|
202
|
+
|
|
203
|
+
// Write initial data and flush
|
|
204
|
+
for (int i = 0; i < 5; i++) {
|
|
205
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
206
|
+
ASSERT_NE(txn, nullptr);
|
|
207
|
+
ASSERT_OK(txn->SetName("init_txn" + std::to_string(i)));
|
|
208
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
209
|
+
ASSERT_OK(txn->Prepare());
|
|
210
|
+
ASSERT_OK(txn->Commit());
|
|
211
|
+
delete txn;
|
|
212
|
+
}
|
|
213
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
214
|
+
|
|
215
|
+
// Write more transactions with two_write_queues to potentially create a gap
|
|
216
|
+
// between allocated and published sequence numbers. These must be written
|
|
217
|
+
// before installing the error injection callback, since the small write
|
|
218
|
+
// buffer (4KB) could trigger an automatic flush during these writes.
|
|
219
|
+
for (int i = 5; i < 10; i++) {
|
|
220
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
221
|
+
ASSERT_NE(txn, nullptr);
|
|
222
|
+
ASSERT_OK(txn->SetName("txn" + std::to_string(i)));
|
|
223
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
224
|
+
ASSERT_OK(txn->Prepare());
|
|
225
|
+
ASSERT_OK(txn->Commit());
|
|
226
|
+
delete txn;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Track sequence numbers at key points
|
|
230
|
+
std::atomic<uint64_t> last_seq_after_recovery{0};
|
|
231
|
+
std::atomic<uint64_t> last_allocated_seq_after_recovery{0};
|
|
232
|
+
std::atomic<bool> captured_seqs_after{false};
|
|
233
|
+
|
|
234
|
+
IOStatus error_to_inject = IOStatus::IOError("Injected error");
|
|
235
|
+
error_to_inject.SetRetryable(true);
|
|
236
|
+
|
|
237
|
+
// Set up sync point dependency chain for deterministic synchronization
|
|
238
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
|
239
|
+
{{"RecoverFromRetryableBGIOError:BeforeStart",
|
|
240
|
+
"SeqnoDiscrepancyDuringErrorRecovery:0"},
|
|
241
|
+
{"SeqnoDiscrepancyDuringErrorRecovery:1",
|
|
242
|
+
"RecoverFromRetryableBGIOError:BeforeWait1"},
|
|
243
|
+
{"RecoverFromRetryableBGIOError:RecoverSuccess",
|
|
244
|
+
"SeqnoDiscrepancyDuringErrorRecovery:2"}});
|
|
245
|
+
|
|
246
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
247
|
+
"VersionSet::LogAndApply:WriteManifest",
|
|
248
|
+
[&](void*) { fault_fs_->SetFilesystemActive(false, error_to_inject); });
|
|
249
|
+
|
|
250
|
+
// Capture sequence numbers after recovery completes to verify the fix
|
|
251
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
252
|
+
"RecoverFromRetryableBGIOError:RecoverSuccess", [&](void*) {
|
|
253
|
+
DBImpl* db_impl = dbimpl();
|
|
254
|
+
if (db_impl) {
|
|
255
|
+
VersionSet* vs = db_impl->GetVersionSet();
|
|
256
|
+
if (vs) {
|
|
257
|
+
last_seq_after_recovery.store(vs->LastSequence());
|
|
258
|
+
last_allocated_seq_after_recovery.store(
|
|
259
|
+
vs->LastAllocatedSequence());
|
|
260
|
+
captured_seqs_after.store(true);
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
266
|
+
|
|
267
|
+
// Trigger a flush that will fail
|
|
268
|
+
Status flush_s = db_->Flush(FlushOptions());
|
|
269
|
+
ASSERT_NOK(flush_s);
|
|
270
|
+
|
|
271
|
+
// Wait for recovery to start, re-enable filesystem, let it proceed.
|
|
272
|
+
// Clear the callback first to prevent it from re-disabling the filesystem
|
|
273
|
+
// if recovery's ResumeImpl triggers WriteManifest before we re-enable.
|
|
274
|
+
TEST_SYNC_POINT("SeqnoDiscrepancyDuringErrorRecovery:0");
|
|
275
|
+
SyncPoint::GetInstance()->ClearCallBack(
|
|
276
|
+
"VersionSet::LogAndApply:WriteManifest");
|
|
277
|
+
fault_fs_->SetFilesystemActive(true);
|
|
278
|
+
TEST_SYNC_POINT("SeqnoDiscrepancyDuringErrorRecovery:1");
|
|
279
|
+
|
|
280
|
+
// Wait for recovery to complete
|
|
281
|
+
TEST_SYNC_POINT("SeqnoDiscrepancyDuringErrorRecovery:2");
|
|
282
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
283
|
+
|
|
284
|
+
// Verify that sequences were captured and are in sync after recovery
|
|
285
|
+
ASSERT_TRUE(captured_seqs_after.load());
|
|
286
|
+
ASSERT_GE(last_seq_after_recovery.load(),
|
|
287
|
+
last_allocated_seq_after_recovery.load())
|
|
288
|
+
<< "LastSequence should be >= LastAllocatedSequence after recovery";
|
|
289
|
+
|
|
290
|
+
// Close and reopen should succeed without corruption
|
|
291
|
+
Close();
|
|
292
|
+
ASSERT_OK(Open());
|
|
293
|
+
|
|
294
|
+
// Verify data integrity
|
|
295
|
+
ReadOptions read_opts;
|
|
296
|
+
for (int i = 0; i < 10; i++) {
|
|
297
|
+
std::string value;
|
|
298
|
+
ASSERT_OK(db_->Get(read_opts, "key" + std::to_string(i), &value));
|
|
299
|
+
ASSERT_EQ(value, "value" + std::to_string(i));
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
Close();
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// Test that verifies SyncLastSequenceWithAllocated is called during ResumeImpl
|
|
306
|
+
// by checking sequence numbers before and after the sync point.
|
|
307
|
+
TEST_F(WritePreparedTransactionSeqnoTest, ConcurrentWritesDuringErrorRecovery) {
|
|
308
|
+
ASSERT_OK(Open());
|
|
309
|
+
|
|
310
|
+
WriteOptions write_opts;
|
|
311
|
+
TransactionOptions txn_opts;
|
|
312
|
+
|
|
313
|
+
// Write initial data and flush
|
|
314
|
+
for (int i = 0; i < 5; i++) {
|
|
315
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
316
|
+
ASSERT_NE(txn, nullptr);
|
|
317
|
+
ASSERT_OK(txn->SetName("init_txn" + std::to_string(i)));
|
|
318
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
319
|
+
ASSERT_OK(txn->Prepare());
|
|
320
|
+
ASSERT_OK(txn->Commit());
|
|
321
|
+
delete txn;
|
|
322
|
+
}
|
|
323
|
+
ASSERT_OK(db_->Flush(FlushOptions()));
|
|
324
|
+
|
|
325
|
+
// Write more transactions. These must be written before installing the error
|
|
326
|
+
// injection callback, since the small write buffer (4KB) could trigger an
|
|
327
|
+
// automatic flush during these writes.
|
|
328
|
+
for (int i = 5; i < 10; i++) {
|
|
329
|
+
Transaction* txn = db_->BeginTransaction(write_opts, txn_opts);
|
|
330
|
+
ASSERT_NE(txn, nullptr);
|
|
331
|
+
ASSERT_OK(txn->SetName("txn" + std::to_string(i)));
|
|
332
|
+
ASSERT_OK(txn->Put("key" + std::to_string(i), "value" + std::to_string(i)));
|
|
333
|
+
ASSERT_OK(txn->Prepare());
|
|
334
|
+
ASSERT_OK(txn->Commit());
|
|
335
|
+
delete txn;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// Track sequence numbers at key points during recovery
|
|
339
|
+
std::atomic<uint64_t> seq_before_resume{0};
|
|
340
|
+
std::atomic<uint64_t> alloc_seq_before_resume{0};
|
|
341
|
+
std::atomic<uint64_t> seq_after_resume{0};
|
|
342
|
+
std::atomic<uint64_t> alloc_seq_after_resume{0};
|
|
343
|
+
|
|
344
|
+
IOStatus error_to_inject = IOStatus::IOError("Injected error");
|
|
345
|
+
error_to_inject.SetRetryable(true);
|
|
346
|
+
|
|
347
|
+
// Set up sync point dependency chain for deterministic synchronization
|
|
348
|
+
ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
|
|
349
|
+
{{"RecoverFromRetryableBGIOError:BeforeStart",
|
|
350
|
+
"ConcurrentWritesDuringErrorRecovery:0"},
|
|
351
|
+
{"ConcurrentWritesDuringErrorRecovery:1",
|
|
352
|
+
"RecoverFromRetryableBGIOError:BeforeWait1"},
|
|
353
|
+
{"RecoverFromRetryableBGIOError:RecoverSuccess",
|
|
354
|
+
"ConcurrentWritesDuringErrorRecovery:2"}});
|
|
355
|
+
|
|
356
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
357
|
+
"VersionSet::LogAndApply:WriteManifest",
|
|
358
|
+
[&](void*) { fault_fs_->SetFilesystemActive(false, error_to_inject); });
|
|
359
|
+
|
|
360
|
+
// Capture sequences right before ResumeImpl runs the sync
|
|
361
|
+
SyncPoint::GetInstance()->SetCallBack("DBImpl::ResumeImpl:Start", [&](void*) {
|
|
362
|
+
DBImpl* db_impl = dbimpl();
|
|
363
|
+
if (db_impl) {
|
|
364
|
+
VersionSet* vs = db_impl->GetVersionSet();
|
|
365
|
+
if (vs) {
|
|
366
|
+
seq_before_resume.store(vs->LastSequence());
|
|
367
|
+
alloc_seq_before_resume.store(vs->LastAllocatedSequence());
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
});
|
|
371
|
+
|
|
372
|
+
// Capture sequences right after ResumeImpl syncs them
|
|
373
|
+
SyncPoint::GetInstance()->SetCallBack(
|
|
374
|
+
"DBImpl::ResumeImpl:AfterSyncSeq", [&](void*) {
|
|
375
|
+
DBImpl* db_impl = dbimpl();
|
|
376
|
+
if (db_impl) {
|
|
377
|
+
VersionSet* vs = db_impl->GetVersionSet();
|
|
378
|
+
if (vs) {
|
|
379
|
+
seq_after_resume.store(vs->LastSequence());
|
|
380
|
+
alloc_seq_after_resume.store(vs->LastAllocatedSequence());
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
});
|
|
384
|
+
|
|
385
|
+
SyncPoint::GetInstance()->EnableProcessing();
|
|
386
|
+
|
|
387
|
+
// Trigger a flush that will fail
|
|
388
|
+
Status flush_s = db_->Flush(FlushOptions());
|
|
389
|
+
ASSERT_NOK(flush_s);
|
|
390
|
+
|
|
391
|
+
// Wait for recovery to start, re-enable filesystem, let it proceed.
|
|
392
|
+
// Clear the callback first to prevent it from re-disabling the filesystem
|
|
393
|
+
// if recovery's ResumeImpl triggers WriteManifest before we re-enable.
|
|
394
|
+
TEST_SYNC_POINT("ConcurrentWritesDuringErrorRecovery:0");
|
|
395
|
+
SyncPoint::GetInstance()->ClearCallBack(
|
|
396
|
+
"VersionSet::LogAndApply:WriteManifest");
|
|
397
|
+
fault_fs_->SetFilesystemActive(true);
|
|
398
|
+
TEST_SYNC_POINT("ConcurrentWritesDuringErrorRecovery:1");
|
|
399
|
+
|
|
400
|
+
// Wait for recovery to complete
|
|
401
|
+
TEST_SYNC_POINT("ConcurrentWritesDuringErrorRecovery:2");
|
|
402
|
+
SyncPoint::GetInstance()->DisableProcessing();
|
|
403
|
+
|
|
404
|
+
// Verify that the AfterSyncSeq callback fired and sequences are in sync
|
|
405
|
+
ASSERT_GT(seq_after_resume.load(), 0u)
|
|
406
|
+
<< "DBImpl::ResumeImpl:AfterSyncSeq callback should have fired";
|
|
407
|
+
ASSERT_EQ(seq_after_resume.load(), alloc_seq_after_resume.load())
|
|
408
|
+
<< "Fix should have synced sequences";
|
|
409
|
+
|
|
410
|
+
// Close and reopen
|
|
411
|
+
Close();
|
|
412
|
+
ASSERT_OK(Open());
|
|
413
|
+
|
|
414
|
+
// Verify data integrity
|
|
415
|
+
ReadOptions read_opts;
|
|
416
|
+
for (int i = 0; i < 10; i++) {
|
|
417
|
+
std::string value;
|
|
418
|
+
ASSERT_OK(db_->Get(read_opts, "key" + std::to_string(i), &value));
|
|
419
|
+
ASSERT_EQ(value, "value" + std::to_string(i));
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
Close();
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
426
|
+
|
|
427
|
+
int main(int argc, char** argv) {
|
|
428
|
+
ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
|
|
429
|
+
::testing::InitGoogleTest(&argc, argv);
|
|
430
|
+
return RUN_ALL_TESTS();
|
|
431
|
+
}
|
|
@@ -196,7 +196,7 @@ TEST(PreparedHeap, Concurrent) {
|
|
|
196
196
|
TEST(WriteBatchWithIndex, SubBatchCnt) {
|
|
197
197
|
ColumnFamilyOptions cf_options;
|
|
198
198
|
std::string cf_name = "two";
|
|
199
|
-
DB
|
|
199
|
+
std::unique_ptr<DB> db;
|
|
200
200
|
Options options;
|
|
201
201
|
options.create_if_missing = true;
|
|
202
202
|
const std::string dbname = test::PerThreadDBPath("transaction_testdb");
|
|
@@ -285,7 +285,6 @@ TEST(WriteBatchWithIndex, SubBatchCnt) {
|
|
|
285
285
|
}
|
|
286
286
|
|
|
287
287
|
delete cf_handle;
|
|
288
|
-
delete db;
|
|
289
288
|
}
|
|
290
289
|
|
|
291
290
|
TEST(CommitEntry64b, BasicTest) {
|
|
@@ -36,6 +36,97 @@ class WritePreparedTxnDB;
|
|
|
36
36
|
// committed data from uncommitted data. Uncommitted data could be after the
|
|
37
37
|
// Prepare phase in 2PC (WritePreparedTxn) or before that
|
|
38
38
|
// (WriteUnpreparedTxnImpl).
|
|
39
|
+
//
|
|
40
|
+
// == Concrete example: WritePrepared 2PC transaction ==
|
|
41
|
+
//
|
|
42
|
+
// User code:
|
|
43
|
+
//
|
|
44
|
+
// Transaction* txn = db->BeginTransaction(write_opts, txn_opts);
|
|
45
|
+
// txn->SetName("txn1");
|
|
46
|
+
// txn->Put("key1", "value1"); // buffered in WriteBatch, nothing written
|
|
47
|
+
// yet txn->Prepare(); // Phase 1 txn->Commit(); // Phase 2
|
|
48
|
+
//
|
|
49
|
+
// -- Phase 1: Prepare (PrepareInternal) --
|
|
50
|
+
//
|
|
51
|
+
// The Prepare call (write_prepared_txn.cc PrepareInternal) calls:
|
|
52
|
+
//
|
|
53
|
+
// db_impl_->WriteImpl(write_options, GetWriteBatch(),
|
|
54
|
+
// ..., !DISABLE_MEMTABLE, ...);
|
|
55
|
+
//
|
|
56
|
+
// !DISABLE_MEMTABLE is false — memtable is enabled. This is the defining
|
|
57
|
+
// characteristic of "WritePrepared": the actual data (Put("key1", "value1"))
|
|
58
|
+
// is written to the memtable at Prepare time.
|
|
59
|
+
//
|
|
60
|
+
// Because disable_memtable == false, the routing check at
|
|
61
|
+
// db_impl_write.cc:502 is not taken. The write goes through the main write
|
|
62
|
+
// queue (write_thread_), which handles both WAL and memtable:
|
|
63
|
+
//
|
|
64
|
+
// Destination | What gets written | Sequence
|
|
65
|
+
// ------------|--------------------------------------------|-----------
|
|
66
|
+
// WAL | Put(key1, value1) + EndPrepare(txn1) | prepare_seq
|
|
67
|
+
// Memtable | Put(key1, value1) | prepare_seq
|
|
68
|
+
//
|
|
69
|
+
// The data is now durable (WAL) and in the memtable, but not yet visible
|
|
70
|
+
// to readers. Readers use GetLastPublishedSequence() which consults a
|
|
71
|
+
// commit map — since prepare_seq is in the PreparedHeap but not yet in the
|
|
72
|
+
// CommitCache, readers know this data is uncommitted and skip it.
|
|
73
|
+
//
|
|
74
|
+
// -- Phase 2: Commit (CommitInternal) --
|
|
75
|
+
//
|
|
76
|
+
// The Commit call (write_prepared_txn.cc CommitInternal) calls:
|
|
77
|
+
//
|
|
78
|
+
// db_impl_->WriteImpl(write_options_, working_batch,
|
|
79
|
+
// ..., disable_memtable, ...);
|
|
80
|
+
//
|
|
81
|
+
// In the typical case (do_one_write == true, i.e., the commit-time batch
|
|
82
|
+
// is empty or has no data), disable_memtable is true. Now the routing
|
|
83
|
+
// check at db_impl_write.cc:502 is taken:
|
|
84
|
+
//
|
|
85
|
+
// if (two_write_queues_ && disable_memtable) {
|
|
86
|
+
// return WriteImplWALOnly(&nonmem_write_thread_, ...);
|
|
87
|
+
// }
|
|
88
|
+
//
|
|
89
|
+
// The commit goes through the second write queue (nonmem_write_thread_),
|
|
90
|
+
// WAL only:
|
|
91
|
+
//
|
|
92
|
+
// Destination | What gets written | Sequence
|
|
93
|
+
// ------------|---------------------|-----------
|
|
94
|
+
// WAL | Commit(txn1) marker | commit_seq
|
|
95
|
+
// Memtable | Nothing | —
|
|
96
|
+
//
|
|
97
|
+
// The PreReleaseCallback (WritePreparedCommitEntryPreReleaseCallback)
|
|
98
|
+
// updates the CommitCache to record that prepare_seq was committed at
|
|
99
|
+
// commit_seq. After this, readers consulting the commit map will see that
|
|
100
|
+
// the data at prepare_seq is committed and therefore visible.
|
|
101
|
+
//
|
|
102
|
+
// -- Why two queues help --
|
|
103
|
+
//
|
|
104
|
+
// The Commit phase doesn't touch the memtable — it only writes a small
|
|
105
|
+
// marker to WAL and updates an in-memory commit map. By routing this
|
|
106
|
+
// through a separate queue, Commit writes don't have to wait behind other
|
|
107
|
+
// transactions' Prepare writes (which do the expensive memtable insertion
|
|
108
|
+
// on the main queue). This is the optimization mentioned in the options
|
|
109
|
+
// comment about MySQL 2PC where commits are serial.
|
|
110
|
+
//
|
|
111
|
+
// -- Sequence number flow --
|
|
112
|
+
//
|
|
113
|
+
// last_sequence_ | last_allocated_seq |
|
|
114
|
+
// last_published_seq
|
|
115
|
+
// ---------------|--------------------|-------------------
|
|
116
|
+
// Before Prepare: 9 | 9 | 9
|
|
117
|
+
//
|
|
118
|
+
// Prepare (main queue):
|
|
119
|
+
// FetchAdd alloc seq 9 | 10 | 9
|
|
120
|
+
// Write WAL + memtable
|
|
121
|
+
// SetLastSequence 10 | 10 | 9
|
|
122
|
+
// (published_seq not advanced yet — data is uncommitted)
|
|
123
|
+
//
|
|
124
|
+
// Commit (2nd queue):
|
|
125
|
+
// FetchAdd alloc seq 10 | 11 | 9
|
|
126
|
+
// Write WAL only
|
|
127
|
+
// Update CommitCache
|
|
128
|
+
// SetLastPublishedSeq 10 | 11 | 11
|
|
129
|
+
//
|
|
39
130
|
class WritePreparedTxn : public PessimisticTransaction {
|
|
40
131
|
public:
|
|
41
132
|
WritePreparedTxn(WritePreparedTxnDB* db, const WriteOptions& write_options,
|