@nxtedition/rocksdb 5.2.21 → 5.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +216 -252
- package/binding.gyp +78 -72
- package/deps/rocksdb/build_version.cc +70 -4
- package/deps/rocksdb/rocksdb/CMakeLists.txt +281 -149
- package/deps/rocksdb/rocksdb/Makefile +459 -469
- package/deps/rocksdb/rocksdb/README.md +4 -4
- package/deps/rocksdb/rocksdb/TARGETS +5244 -1500
- package/deps/rocksdb/rocksdb/cache/cache.cc +12 -3
- package/deps/rocksdb/rocksdb/cache/cache_bench.cc +7 -368
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +924 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +128 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +103 -0
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +183 -0
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +11 -0
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +344 -0
- package/deps/rocksdb/rocksdb/cache/cache_key.h +132 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +183 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +288 -0
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +468 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +85 -8
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +121 -51
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +171 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +86 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +607 -0
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +381 -154
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +176 -33
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1659 -3
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +94 -23
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +49 -28
- package/deps/rocksdb/rocksdb/crash_test.mk +93 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +54 -31
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +10 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +146 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator_test.cc +326 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.cc +34 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_fetcher.h +37 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition.cc +4 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_addition_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +99 -40
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.h +20 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +95 -83
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +13 -10
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +7 -4
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache_test.cc +37 -37
- package/deps/rocksdb/rocksdb/db/blob/blob_file_completion_callback.h +101 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.cc +8 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_meta.h +6 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +209 -44
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +37 -11
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +382 -179
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.cc +100 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter.h +102 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_garbage_meter_test.cc +196 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +3 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_log_format.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +7 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.h +10 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +12 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.h +5 -5
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +772 -9
- package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +730 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_corruption_test.cc +82 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +155 -17
- package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.cc +21 -0
- package/deps/rocksdb/rocksdb/db/blob/prefetch_buffer_collection.h +38 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +137 -89
- package/deps/rocksdb/rocksdb/db/builder.h +16 -37
- package/deps/rocksdb/rocksdb/db/c.cc +413 -208
- package/deps/rocksdb/rocksdb/db/c_test.c +227 -138
- package/deps/rocksdb/rocksdb/db/column_family.cc +118 -103
- package/deps/rocksdb/rocksdb/db/column_family.h +86 -44
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +38 -24
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +81 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +275 -0
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator_test.cc +258 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +81 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +43 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h +12 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +406 -215
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +147 -50
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +167 -61
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1321 -156
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +197 -28
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +246 -43
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +65 -26
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +7 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +122 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +18 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +536 -44
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +311 -30
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +849 -0
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +92 -0
- package/deps/rocksdb/rocksdb/db/compaction/sst_partitioner.cc +46 -0
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +6 -3
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +383 -28
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +7 -2
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +154 -45
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +1095 -33
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +1249 -203
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +135 -9
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +1348 -166
- package/deps/rocksdb/rocksdb/db/db_dynamic_level_test.cc +3 -5
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +312 -45
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1734 -48
- package/deps/rocksdb/rocksdb/db/{compacted_db_impl.cc → db_impl/compacted_db_impl.cc} +24 -7
- package/deps/rocksdb/rocksdb/db/{compacted_db_impl.h → db_impl/compacted_db_impl.h} +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +644 -333
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +365 -92
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +578 -210
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +38 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +17 -10
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +75 -74
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +450 -183
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +42 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +232 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +42 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +297 -100
- package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +16 -15
- package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +31 -1
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +6 -5
- package/deps/rocksdb/rocksdb/db/db_iter.cc +218 -153
- package/deps/rocksdb/rocksdb/db/db_iter.h +14 -12
- package/deps/rocksdb/rocksdb/db/db_iter_stress_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +84 -160
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +47 -6
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +204 -0
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +21 -13
- package/deps/rocksdb/rocksdb/db/db_logical_block_size_cache_test.cc +17 -10
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +38 -24
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +184 -19
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +183 -3
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +409 -9
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +92 -23
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +446 -0
- package/deps/rocksdb/rocksdb/db/{db_impl/db_secondary_test.cc → db_secondary_test.cc} +363 -35
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +520 -15
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +50 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +139 -4
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +669 -359
- package/deps/rocksdb/rocksdb/db/db_test2.cc +2110 -304
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +76 -43
- package/deps/rocksdb/rocksdb/db/db_test_util.h +231 -103
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +19 -11
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +490 -71
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +980 -349
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +11 -12
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +793 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +4 -12
- package/deps/rocksdb/rocksdb/db/dbformat.h +28 -18
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +50 -15
- package/deps/rocksdb/rocksdb/db/error_handler.cc +127 -41
- package/deps/rocksdb/rocksdb/db/error_handler.h +12 -5
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +524 -255
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +136 -11
- package/deps/rocksdb/rocksdb/db/event_helpers.h +27 -2
- package/deps/rocksdb/rocksdb/db/experimental.cc +100 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +307 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +137 -60
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +12 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -55
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +86 -5
- package/deps/rocksdb/rocksdb/db/filename_test.cc +63 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +619 -64
- package/deps/rocksdb/rocksdb/db/flush_job.h +30 -7
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +33 -16
- package/deps/rocksdb/rocksdb/db/flush_scheduler.h +2 -1
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +18 -17
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +5 -4
- package/deps/rocksdb/rocksdb/db/forward_iterator_bench.cc +0 -1
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +91 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +25 -14
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -5
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +471 -50
- package/deps/rocksdb/rocksdb/db/internal_stats.h +129 -25
- package/deps/rocksdb/rocksdb/db/job_context.h +22 -9
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +394 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +518 -41
- package/deps/rocksdb/rocksdb/db/log_format.h +4 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +129 -6
- package/deps/rocksdb/rocksdb/db/log_reader.h +17 -1
- package/deps/rocksdb/rocksdb/db/log_test.cc +161 -11
- package/deps/rocksdb/rocksdb/db/log_writer.cc +92 -13
- package/deps/rocksdb/rocksdb/db/log_writer.h +18 -5
- package/deps/rocksdb/rocksdb/db/logs_with_prep_tracker.h +1 -1
- package/deps/rocksdb/rocksdb/db/lookup_key.h +0 -1
- package/deps/rocksdb/rocksdb/db/malloc_stats.cc +2 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +21 -8
- package/deps/rocksdb/rocksdb/db/memtable.cc +144 -54
- package/deps/rocksdb/rocksdb/db/memtable.h +72 -15
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +95 -47
- package/deps/rocksdb/rocksdb/db/memtable_list.h +33 -13
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +61 -31
- package/deps/rocksdb/rocksdb/db/merge_context.h +20 -8
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +54 -11
- package/deps/rocksdb/rocksdb/db/merge_helper.h +17 -6
- package/deps/rocksdb/rocksdb/db/merge_helper_test.cc +13 -7
- package/deps/rocksdb/rocksdb/db/merge_test.cc +40 -19
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +14 -25
- package/deps/rocksdb/rocksdb/db/output_validator.cc +3 -0
- package/deps/rocksdb/rocksdb/db/output_validator.h +5 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +32 -28
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.cc +43 -29
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler.h +9 -7
- package/deps/rocksdb/rocksdb/db/periodic_work_scheduler_test.cc +21 -16
- package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +1 -1
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +29 -36
- package/deps/rocksdb/rocksdb/db/pre_release_callback.h +1 -2
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +2 -2
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_bench.cc +11 -11
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc +14 -8
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +17 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/read_callback.h +1 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +87 -58
- package/deps/rocksdb/rocksdb/db/repair_test.cc +35 -5
- package/deps/rocksdb/rocksdb/db/snapshot_impl.h +2 -1
- package/deps/rocksdb/rocksdb/db/table_cache.cc +95 -69
- package/deps/rocksdb/rocksdb/db/table_cache.h +63 -53
- package/deps/rocksdb/rocksdb/db/table_properties_collector.cc +4 -4
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +78 -10
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +28 -33
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +30 -51
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +12 -8
- package/deps/rocksdb/rocksdb/db/version_builder.cc +564 -341
- package/deps/rocksdb/rocksdb/db/version_builder.h +8 -8
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +327 -155
- package/deps/rocksdb/rocksdb/db/version_edit.cc +89 -27
- package/deps/rocksdb/rocksdb/db/version_edit.h +42 -17
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +324 -43
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +79 -22
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +165 -20
- package/deps/rocksdb/rocksdb/db/version_set.cc +935 -1034
- package/deps/rocksdb/rocksdb/db/version_set.h +183 -122
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +556 -138
- package/deps/rocksdb/rocksdb/db/version_util.h +68 -0
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +23 -21
- package/deps/rocksdb/rocksdb/db/wal_manager.h +5 -2
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +30 -27
- package/deps/rocksdb/rocksdb/db/write_batch.cc +704 -209
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +135 -2
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +209 -5
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/write_controller.cc +47 -54
- package/deps/rocksdb/rocksdb/db/write_controller.h +12 -9
- package/deps/rocksdb/rocksdb/db/write_controller_test.cc +215 -103
- package/deps/rocksdb/rocksdb/db/write_thread.cc +11 -0
- package/deps/rocksdb/rocksdb/db/write_thread.h +14 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +10 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress.cc +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +19 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +78 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +13 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +29 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +199 -32
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.cc +188 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +59 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +77 -109
- package/deps/rocksdb/rocksdb/{third-party/folly/folly/synchronization/WaitOptions.cpp → db_stress_tool/db_stress_stat.cc} +9 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +7 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_table_properties_collector.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +699 -143
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +20 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +49 -39
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +631 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.h +287 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +1565 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +374 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +149 -18
- package/deps/rocksdb/rocksdb/env/composite_env.cc +464 -0
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +98 -646
- package/deps/rocksdb/rocksdb/env/emulated_clock.h +114 -0
- package/deps/rocksdb/rocksdb/env/env.cc +632 -42
- package/deps/rocksdb/rocksdb/env/env_basic_test.cc +84 -36
- package/deps/rocksdb/rocksdb/env/env_chroot.cc +88 -286
- package/deps/rocksdb/rocksdb/env/env_chroot.h +34 -1
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +469 -277
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +9 -30
- package/deps/rocksdb/rocksdb/env/env_posix.cc +110 -119
- package/deps/rocksdb/rocksdb/env/env_test.cc +1128 -39
- package/deps/rocksdb/rocksdb/env/file_system.cc +147 -8
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +207 -136
- package/deps/rocksdb/rocksdb/env/file_system_tracer.h +86 -54
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +192 -64
- package/deps/rocksdb/rocksdb/env/fs_readonly.h +107 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.cc +339 -0
- package/deps/rocksdb/rocksdb/env/fs_remap.h +139 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +245 -41
- package/deps/rocksdb/rocksdb/env/io_posix.h +66 -1
- package/deps/rocksdb/rocksdb/env/mock_env.cc +147 -149
- package/deps/rocksdb/rocksdb/env/mock_env.h +113 -11
- package/deps/rocksdb/rocksdb/env/mock_env_test.cc +2 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +164 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +71 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +9 -5
- package/deps/rocksdb/rocksdb/file/delete_scheduler.h +6 -4
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +19 -12
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +459 -70
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +205 -28
- package/deps/rocksdb/rocksdb/file/file_util.cc +39 -28
- package/deps/rocksdb/rocksdb/file/file_util.h +18 -27
- package/deps/rocksdb/rocksdb/file/filename.cc +59 -22
- package/deps/rocksdb/rocksdb/file/filename.h +13 -8
- package/deps/rocksdb/rocksdb/file/line_file_reader.cc +68 -0
- package/deps/rocksdb/rocksdb/file/line_file_reader.h +59 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +1130 -6
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +220 -36
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +69 -17
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +13 -12
- package/deps/rocksdb/rocksdb/file/read_write_util.cc +3 -38
- package/deps/rocksdb/rocksdb/file/read_write_util.h +0 -4
- package/deps/rocksdb/rocksdb/file/readahead_file_info.h +33 -0
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.cc +57 -9
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +58 -6
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.cc +29 -54
- package/deps/rocksdb/rocksdb/file/sst_file_manager_impl.h +22 -29
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +424 -50
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +66 -19
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +157 -66
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +224 -121
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +333 -30
- package/deps/rocksdb/rocksdb/include/rocksdb/cache_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cleanable.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +90 -50
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +13 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +20 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h +8 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +53 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +31 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/customizable.h +102 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +51 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +370 -262
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +286 -87
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +124 -64
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +27 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_checksum.h +21 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +384 -41
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +111 -143
- package/deps/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h +20 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/functor_wrapper.h +56 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +15 -33
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +37 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +314 -26
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +11 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +50 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +10 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +186 -96
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +373 -103
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +13 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/persistent_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +37 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +87 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +5 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +59 -30
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +11 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +22 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_partitioner.h +17 -10
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +121 -41
- package/deps/rocksdb/rocksdb/include/rocksdb/stats_history.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +114 -136
- package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +116 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +160 -18
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +57 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h +10 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record.h +247 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/trace_record_result.h +187 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +14 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/unique_id.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +14 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/agg_merge.h +138 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +631 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +142 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +12 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +368 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h +418 -63
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +143 -73
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/replayer.h +87 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +43 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +18 -23
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +26 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +32 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h +1 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +30 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/wal_filter.h +11 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +89 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h +108 -38
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +40 -23
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.h +12 -5
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +100 -49
- package/deps/rocksdb/rocksdb/logging/env_logger.h +7 -5
- package/deps/rocksdb/rocksdb/logging/env_logger_test.cc +0 -1
- package/deps/rocksdb/rocksdb/logging/posix_logger.h +3 -9
- package/deps/rocksdb/rocksdb/memory/arena.cc +3 -1
- package/deps/rocksdb/rocksdb/memory/arena.h +1 -1
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +171 -106
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +31 -15
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.cc +15 -4
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator.h +24 -8
- package/deps/rocksdb/rocksdb/memory/memory_allocator.cc +91 -0
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +239 -0
- package/deps/rocksdb/rocksdb/memory/memory_usage.h +14 -1
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.cc +72 -9
- package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc +52 -6
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +53 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist_test.cc +5 -5
- package/deps/rocksdb/rocksdb/memtable/memtablerep_bench.cc +17 -5
- package/deps/rocksdb/rocksdb/memtable/skiplist_test.cc +1 -1
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +87 -0
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +20 -10
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager.cc +148 -94
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +160 -62
- package/deps/rocksdb/rocksdb/microbench/CMakeLists.txt +17 -0
- package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +1360 -0
- package/deps/rocksdb/rocksdb/microbench/ribbon_bench.cc +153 -0
- package/deps/rocksdb/rocksdb/monitoring/histogram.cc +8 -15
- package/deps/rocksdb/rocksdb/monitoring/histogram.h +0 -1
- package/deps/rocksdb/rocksdb/monitoring/histogram_test.cc +18 -16
- package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.cc +9 -7
- package/deps/rocksdb/rocksdb/monitoring/histogram_windowing.h +5 -3
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.cc +7 -5
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +37 -12
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +26 -6
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +6 -10
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +14 -13
- package/deps/rocksdb/rocksdb/monitoring/perf_context_imp.h +19 -20
- package/deps/rocksdb/rocksdb/monitoring/perf_step_timer.h +18 -18
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +84 -2
- package/deps/rocksdb/rocksdb/monitoring/statistics.h +6 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics_test.cc +47 -2
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +67 -54
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +4 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +2 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +280 -212
- package/deps/rocksdb/rocksdb/options/cf_options.h +51 -57
- package/deps/rocksdb/rocksdb/options/configurable.cc +242 -138
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +4 -68
- package/deps/rocksdb/rocksdb/options/configurable_test.cc +144 -21
- package/deps/rocksdb/rocksdb/options/configurable_test.h +2 -3
- package/deps/rocksdb/rocksdb/options/customizable.cc +67 -7
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +1773 -151
- package/deps/rocksdb/rocksdb/options/db_options.cc +275 -47
- package/deps/rocksdb/rocksdb/options/db_options.h +36 -7
- package/deps/rocksdb/rocksdb/options/options.cc +49 -17
- package/deps/rocksdb/rocksdb/options/options_helper.cc +369 -352
- package/deps/rocksdb/rocksdb/options/options_helper.h +23 -23
- package/deps/rocksdb/rocksdb/options/options_parser.cc +18 -13
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +67 -54
- package/deps/rocksdb/rocksdb/options/options_test.cc +1162 -187
- package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +1 -1
- package/deps/rocksdb/rocksdb/port/lang.h +52 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +31 -2
- package/deps/rocksdb/rocksdb/port/port_posix.h +20 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +20 -4
- package/deps/rocksdb/rocksdb/port/sys_time.h +2 -2
- package/deps/rocksdb/rocksdb/port/win/env_default.cc +7 -7
- package/deps/rocksdb/rocksdb/port/win/env_win.cc +44 -74
- package/deps/rocksdb/rocksdb/port/win/env_win.h +25 -23
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +32 -34
- package/deps/rocksdb/rocksdb/port/win/io_win.h +12 -6
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +55 -35
- package/deps/rocksdb/rocksdb/port/win/port_win.h +22 -5
- package/deps/rocksdb/rocksdb/port/win/win_logger.cc +3 -3
- package/deps/rocksdb/rocksdb/port/win/win_logger.h +3 -5
- package/deps/rocksdb/rocksdb/port/win/win_thread.cc +7 -1
- package/deps/rocksdb/rocksdb/port/win/win_thread.h +12 -17
- package/deps/rocksdb/rocksdb/python.mk +9 -0
- package/deps/rocksdb/rocksdb/src.mk +82 -34
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc +3 -4
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +158 -80
- package/deps/rocksdb/rocksdb/table/block_based/block.h +64 -36
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc +23 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block.h +13 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc +3 -218
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +603 -328
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +28 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +220 -82
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +28 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +598 -492
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +151 -96
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +31 -58
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +330 -92
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +50 -19
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +23 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +226 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +56 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +42 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +34 -20
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +9 -10
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +26 -3
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +844 -202
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +281 -81
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +62 -2
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.h +2 -3
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -7
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +22 -6
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +28 -26
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/mock_block_based_table.h +11 -4
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +68 -26
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +44 -9
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +12 -10
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.h +23 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +44 -19
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +5 -1
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +16 -28
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +7 -4
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +77 -57
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +23 -12
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +43 -56
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +8 -8
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +52 -70
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc +5 -8
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +17 -11
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc +42 -51
- package/deps/rocksdb/rocksdb/table/format.cc +258 -104
- package/deps/rocksdb/rocksdb/table/format.h +120 -109
- package/deps/rocksdb/rocksdb/table/get_context.cc +97 -65
- package/deps/rocksdb/rocksdb/table/get_context.h +19 -12
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +14 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +8 -0
- package/deps/rocksdb/rocksdb/table/merger_test.cc +3 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +11 -21
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +176 -171
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +47 -33
- package/deps/rocksdb/rocksdb/table/mock_table.cc +7 -9
- package/deps/rocksdb/rocksdb/table/mock_table.h +3 -2
- package/deps/rocksdb/rocksdb/table/multiget_context.h +15 -8
- package/deps/rocksdb/rocksdb/table/persistent_cache_helper.cc +22 -29
- package/deps/rocksdb/rocksdb/table/persistent_cache_options.h +6 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_bloom.h +5 -8
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +29 -26
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +12 -16
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.cc +145 -69
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.cc +7 -6
- package/deps/rocksdb/rocksdb/table/plain/plain_table_index.h +3 -4
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +3 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.h +1 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +13 -18
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.h +4 -9
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +55 -37
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +10 -5
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +11 -8
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +222 -16
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +106 -58
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +6 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +68 -44
- package/deps/rocksdb/rocksdb/table/table_factory.cc +37 -10
- package/deps/rocksdb/rocksdb/table/table_properties.cc +109 -54
- package/deps/rocksdb/rocksdb/table/table_properties_internal.h +4 -20
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +33 -32
- package/deps/rocksdb/rocksdb/table/table_reader_caller.h +2 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +989 -326
- package/deps/rocksdb/rocksdb/table/two_level_iterator.cc +4 -0
- package/deps/rocksdb/rocksdb/table/unique_id.cc +166 -0
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +59 -0
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.cc +1 -1
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +13 -10
- package/deps/rocksdb/rocksdb/test_util/sync_point.cc +1 -2
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +35 -16
- package/deps/rocksdb/rocksdb/test_util/sync_point_impl.cc +32 -10
- package/deps/rocksdb/rocksdb/test_util/sync_point_impl.h +31 -4
- package/deps/rocksdb/rocksdb/test_util/testharness.cc +53 -1
- package/deps/rocksdb/rocksdb/test_util/testharness.h +67 -3
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +236 -66
- package/deps/rocksdb/rocksdb/test_util/testutil.h +63 -100
- package/deps/rocksdb/rocksdb/test_util/transaction_test_util.cc +12 -1
- package/deps/rocksdb/rocksdb/tools/blob_dump.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc +6 -3
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h +1 -0
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/db_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +1420 -611
- package/deps/rocksdb/rocksdb/tools/db_bench_tool_test.cc +11 -8
- package/deps/rocksdb/rocksdb/tools/db_repl_stress.cc +11 -1
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_test.cc +4 -2
- package/deps/rocksdb/rocksdb/tools/io_tracer_parser_tool.cc +46 -22
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +655 -179
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +58 -6
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +472 -29
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +23 -2
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +2 -2
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.cc +246 -0
- package/deps/rocksdb/rocksdb/tools/simulated_hybrid_file_system.h +126 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +83 -29
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +38 -17
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_test.cc +191 -55
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.cc +219 -296
- package/deps/rocksdb/rocksdb/tools/trace_analyzer_tool.h +87 -53
- package/deps/rocksdb/rocksdb/tools/write_stress.cc +8 -7
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +6 -5
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.h +5 -4
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc +14 -9
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.cc +134 -60
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer.h +49 -38
- package/deps/rocksdb/rocksdb/trace_replay/io_tracer_test.cc +152 -15
- package/deps/rocksdb/rocksdb/trace_replay/trace_record.cc +206 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.cc +190 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_handler.h +46 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_record_result.cc +146 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +475 -344
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.h +83 -95
- package/deps/rocksdb/rocksdb/util/autovector.h +38 -18
- package/deps/rocksdb/rocksdb/util/autovector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +4 -0
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +276 -94
- package/deps/rocksdb/rocksdb/util/build_version.cc.in +81 -4
- package/deps/rocksdb/rocksdb/util/cast_util.h +22 -0
- package/deps/rocksdb/rocksdb/util/channel.h +2 -0
- package/deps/rocksdb/rocksdb/util/coding.h +1 -33
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +8 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +163 -3
- package/deps/rocksdb/rocksdb/util/compression.cc +122 -0
- package/deps/rocksdb/rocksdb/util/compression.h +212 -7
- package/deps/rocksdb/rocksdb/util/compression_context_cache.cc +1 -3
- package/deps/rocksdb/rocksdb/util/crc32c.cc +165 -2
- package/deps/rocksdb/rocksdb/util/crc32c.h +6 -0
- package/deps/rocksdb/rocksdb/util/crc32c_arm64.cc +14 -0
- package/deps/rocksdb/rocksdb/util/crc32c_ppc.h +3 -0
- package/deps/rocksdb/rocksdb/util/crc32c_test.cc +47 -0
- package/deps/rocksdb/rocksdb/util/defer.h +30 -1
- package/deps/rocksdb/rocksdb/util/defer_test.cc +11 -0
- package/deps/rocksdb/rocksdb/util/duplicate_detector.h +3 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +3 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom_test.cc +5 -4
- package/deps/rocksdb/rocksdb/util/fastrange.h +2 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +36 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +3 -1
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +512 -52
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +65 -10
- package/deps/rocksdb/rocksdb/util/gflags_compat.h +6 -1
- package/deps/rocksdb/rocksdb/util/hash.cc +121 -3
- package/deps/rocksdb/rocksdb/util/hash.h +31 -1
- package/deps/rocksdb/rocksdb/util/hash128.h +26 -0
- package/deps/rocksdb/rocksdb/util/hash_containers.h +51 -0
- package/deps/rocksdb/rocksdb/util/hash_test.cc +194 -2
- package/deps/rocksdb/rocksdb/util/heap.h +6 -1
- package/deps/rocksdb/rocksdb/util/kv_map.h +1 -1
- package/deps/rocksdb/rocksdb/util/log_write_bench.cc +8 -6
- package/deps/rocksdb/rocksdb/util/math.h +74 -7
- package/deps/rocksdb/rocksdb/util/math128.h +13 -1
- package/deps/rocksdb/rocksdb/util/murmurhash.h +3 -3
- package/deps/rocksdb/rocksdb/util/random.cc +9 -0
- package/deps/rocksdb/rocksdb/util/random.h +6 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +298 -144
- package/deps/rocksdb/rocksdb/util/rate_limiter.h +68 -19
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +335 -23
- package/deps/rocksdb/rocksdb/util/repeatable_thread.h +10 -12
- package/deps/rocksdb/rocksdb/util/repeatable_thread_test.cc +18 -15
- package/deps/rocksdb/rocksdb/util/ribbon_alg.h +98 -74
- package/deps/rocksdb/rocksdb/util/ribbon_config.cc +506 -0
- package/deps/rocksdb/rocksdb/util/ribbon_config.h +182 -0
- package/deps/rocksdb/rocksdb/util/ribbon_impl.h +154 -79
- package/deps/rocksdb/rocksdb/util/ribbon_test.cc +742 -365
- package/deps/rocksdb/rocksdb/util/set_comparator.h +2 -0
- package/deps/rocksdb/rocksdb/util/slice.cc +198 -35
- package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -1
- package/deps/rocksdb/rocksdb/util/status.cc +32 -29
- package/deps/rocksdb/rocksdb/util/stop_watch.h +18 -18
- package/deps/rocksdb/rocksdb/util/string_util.cc +85 -6
- package/deps/rocksdb/rocksdb/util/string_util.h +47 -2
- package/deps/rocksdb/rocksdb/util/thread_guard.h +41 -0
- package/deps/rocksdb/rocksdb/util/thread_local.h +2 -2
- package/deps/rocksdb/rocksdb/util/thread_local_test.cc +22 -24
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +7 -6
- package/deps/rocksdb/rocksdb/util/timer.h +55 -46
- package/deps/rocksdb/rocksdb/util/timer_test.cc +50 -48
- package/deps/rocksdb/rocksdb/util/user_comparator_wrapper.h +4 -0
- package/deps/rocksdb/rocksdb/util/vector_iterator.h +31 -15
- package/deps/rocksdb/rocksdb/util/work_queue.h +2 -0
- package/deps/rocksdb/rocksdb/util/xxhash.cc +35 -1144
- package/deps/rocksdb/rocksdb/util/xxhash.h +5117 -373
- package/deps/rocksdb/rocksdb/util/xxph3.h +1762 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.cc +238 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge.h +49 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/agg_merge_test.cc +134 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.cc +104 -0
- package/deps/rocksdb/rocksdb/utilities/agg_merge/test_agg_merge.h +47 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +3164 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_impl.h +29 -0
- package/deps/rocksdb/rocksdb/utilities/{backupable/backupable_db_test.cc → backup/backup_engine_test.cc} +1679 -485
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc +6 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h +14 -9
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +37 -27
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +8 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h +13 -10
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc +44 -25
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +3 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +27 -19
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.h +4 -2
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +489 -0
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +366 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc +67 -4
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h +21 -6
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +107 -7
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_options.h +43 -0
- package/deps/rocksdb/rocksdb/utilities/cassandra/format.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc +24 -8
- package/deps/rocksdb/rocksdb/utilities/cassandra/merge_operator.h +7 -7
- package/deps/rocksdb/rocksdb/utilities/cassandra/serialize.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +99 -218
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h +8 -24
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +114 -1
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/layered_compaction_filter_base.h +6 -2
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc +0 -4
- package/deps/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/compaction_filters.cc +56 -0
- package/deps/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +355 -0
- package/deps/rocksdb/rocksdb/utilities/counted_fs.h +152 -0
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +13 -0
- package/deps/rocksdb/rocksdb/utilities/env_timed.cc +164 -122
- package/deps/rocksdb/rocksdb/utilities/env_timed.h +97 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +75 -17
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +19 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +539 -126
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +162 -17
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +110 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +94 -0
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +104 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h +5 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/max.cc +4 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/put.cc +11 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc +0 -2
- package/deps/rocksdb/rocksdb/utilities/merge_operators/sortlist.h +5 -1
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc +29 -10
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc +29 -14
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +71 -18
- package/deps/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc +15 -9
- package/deps/rocksdb/rocksdb/utilities/merge_operators.cc +120 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators.h +3 -23
- package/deps/rocksdb/rocksdb/utilities/object_registry.cc +267 -42
- package/deps/rocksdb/rocksdb/utilities/object_registry_test.cc +702 -76
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +26 -5
- package/deps/rocksdb/rocksdb/utilities/options/options_util.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +124 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h +8 -9
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +15 -13
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h +4 -4
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc +8 -9
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +43 -35
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc +20 -18
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +107 -2
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +23 -15
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h +2 -2
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.cc +316 -0
- package/deps/rocksdb/rocksdb/utilities/trace/replayer_impl.h +86 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +4 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +119 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.cc +20 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/locktree/locktree.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_external_pthread.h +3 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.cc +38 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/range_tree_lock_manager.h +17 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +423 -34
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +82 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +72 -40
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h +32 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +13 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +7 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +207 -43
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +50 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.cc +28 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_util.h +11 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +516 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +506 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +27 -13
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +14 -14
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +14 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +305 -27
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +55 -159
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +209 -2
- package/deps/rocksdb/rocksdb/utilities/wal_filter.cc +23 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +157 -88
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +501 -114
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +91 -316
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +1212 -672
- package/deps/rocksdb/rocksdb.gyp +425 -446
- package/package.json +8 -8
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x86/node.napi.node +0 -0
- package/prebuilds/{darwin-x64+arm64 → linux-x64}/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/env/env_hdfs.cc +0 -648
- package/deps/rocksdb/rocksdb/hdfs/README +0 -23
- package/deps/rocksdb/rocksdb/hdfs/env_hdfs.h +0 -386
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h +0 -535
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h +0 -175
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h +0 -34
- package/deps/rocksdb/rocksdb/memory/memkind_kmem_allocator_test.cc +0 -102
- package/deps/rocksdb/rocksdb/memtable/hash_linklist_rep.h +0 -49
- package/deps/rocksdb/rocksdb/memtable/hash_skiplist_rep.h +0 -44
- package/deps/rocksdb/rocksdb/options/customizable_helper.h +0 -216
- package/deps/rocksdb/rocksdb/third-party/folly/folly/CPortability.h +0 -27
- package/deps/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h +0 -45
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h +0 -166
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Optional.h +0 -570
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Portability.h +0 -92
- package/deps/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h +0 -54
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Traits.h +0 -152
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Unit.h +0 -59
- package/deps/rocksdb/rocksdb/third-party/folly/folly/Utility.h +0 -141
- package/deps/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h +0 -33
- package/deps/rocksdb/rocksdb/third-party/folly/folly/container/Array.h +0 -74
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h +0 -117
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp +0 -263
- package/deps/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h +0 -96
- package/deps/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h +0 -40
- package/deps/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h +0 -29
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h +0 -144
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h +0 -30
- package/deps/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h +0 -51
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h +0 -28
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h +0 -10
- package/deps/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h +0 -26
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h +0 -138
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp +0 -23
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h +0 -260
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h +0 -52
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h +0 -328
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h +0 -1703
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp +0 -16
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h +0 -304
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h +0 -39
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp +0 -26
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h +0 -318
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h +0 -219
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h +0 -207
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h +0 -164
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h +0 -57
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h +0 -77
- package/deps/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp +0 -1145
- package/deps/rocksdb/rocksdb/util/build_version.h +0 -15
- package/deps/rocksdb/rocksdb/util/xxh3p.h +0 -1392
- package/deps/rocksdb/rocksdb/utilities/backupable/backupable_db.cc +0 -2354
- package/deps/rocksdb/rocksdb/utilities/env_librados.cc +0 -1497
- package/deps/rocksdb/rocksdb/utilities/env_librados_test.cc +0 -1146
- package/prebuilds/linux-x64/node.napi.glibc.node +0 -0
|
@@ -7,12 +7,14 @@
|
|
|
7
7
|
// Use of this source code is governed by a BSD-style license that can be
|
|
8
8
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
9
9
|
#include <cinttypes>
|
|
10
|
+
#include <deque>
|
|
10
11
|
|
|
11
12
|
#include "db/builder.h"
|
|
12
13
|
#include "db/db_impl/db_impl.h"
|
|
13
14
|
#include "db/error_handler.h"
|
|
14
15
|
#include "db/event_helpers.h"
|
|
15
16
|
#include "file/sst_file_manager_impl.h"
|
|
17
|
+
#include "logging/logging.h"
|
|
16
18
|
#include "monitoring/iostats_context_imp.h"
|
|
17
19
|
#include "monitoring/perf_context_imp.h"
|
|
18
20
|
#include "monitoring/thread_status_updater.h"
|
|
@@ -101,6 +103,8 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
|
|
|
101
103
|
if (!logs_to_sync.empty()) {
|
|
102
104
|
mutex_.Unlock();
|
|
103
105
|
|
|
106
|
+
assert(job_context);
|
|
107
|
+
|
|
104
108
|
for (log::Writer* log : logs_to_sync) {
|
|
105
109
|
ROCKS_LOG_INFO(immutable_db_options_.info_log,
|
|
106
110
|
"[JOB %d] Syncing log #%" PRIu64, job_context->job_id,
|
|
@@ -118,9 +122,13 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
|
|
|
118
122
|
}
|
|
119
123
|
}
|
|
120
124
|
if (io_s.ok()) {
|
|
121
|
-
io_s = directories_.GetWalDir()->
|
|
125
|
+
io_s = directories_.GetWalDir()->FsyncWithDirOptions(
|
|
126
|
+
IOOptions(), nullptr,
|
|
127
|
+
DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
|
|
122
128
|
}
|
|
123
129
|
|
|
130
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::SyncClosedLogs:BeforeReLock",
|
|
131
|
+
/*arg=*/nullptr);
|
|
124
132
|
mutex_.Lock();
|
|
125
133
|
|
|
126
134
|
// "number <= current_log_number - 1" is equivalent to
|
|
@@ -131,16 +139,11 @@ IOStatus DBImpl::SyncClosedLogs(JobContext* job_context) {
|
|
|
131
139
|
MarkLogsNotSynced(current_log_number - 1);
|
|
132
140
|
}
|
|
133
141
|
if (!io_s.ok()) {
|
|
134
|
-
if (total_log_size_ > 0) {
|
|
135
|
-
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
|
|
136
|
-
} else {
|
|
137
|
-
// If the WAL is empty, we use different error reason
|
|
138
|
-
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlushNoWAL);
|
|
139
|
-
}
|
|
140
142
|
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Failed");
|
|
141
143
|
return io_s;
|
|
142
144
|
}
|
|
143
145
|
}
|
|
146
|
+
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:end");
|
|
144
147
|
return io_s;
|
|
145
148
|
}
|
|
146
149
|
|
|
@@ -154,50 +157,97 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
154
157
|
Env::Priority thread_pri) {
|
|
155
158
|
mutex_.AssertHeld();
|
|
156
159
|
assert(cfd);
|
|
160
|
+
assert(cfd->imm());
|
|
157
161
|
assert(cfd->imm()->NumNotFlushed() != 0);
|
|
158
162
|
assert(cfd->imm()->IsFlushPending());
|
|
163
|
+
assert(versions_);
|
|
164
|
+
assert(versions_->GetColumnFamilySet());
|
|
165
|
+
// If there are more than one column families, we need to make sure that
|
|
166
|
+
// all the log files except the most recent one are synced. Otherwise if
|
|
167
|
+
// the host crashes after flushing and before WAL is persistent, the
|
|
168
|
+
// flushed SST may contain data from write batches whose updates to
|
|
169
|
+
// other (unflushed) column families are missing.
|
|
170
|
+
const bool needs_to_sync_closed_wals =
|
|
171
|
+
logfile_number_ > 0 &&
|
|
172
|
+
versions_->GetColumnFamilySet()->NumberOfColumnFamilies() > 1;
|
|
173
|
+
|
|
174
|
+
// If needs_to_sync_closed_wals is true, we need to record the current
|
|
175
|
+
// maximum memtable ID of this column family so that a later PickMemtables()
|
|
176
|
+
// call will not pick memtables whose IDs are higher. This is due to the fact
|
|
177
|
+
// that SyncClosedLogs() may release the db mutex, and memtable switch can
|
|
178
|
+
// happen for this column family in the meantime. The newly created memtables
|
|
179
|
+
// have their data backed by unsynced WALs, thus they cannot be included in
|
|
180
|
+
// this flush job.
|
|
181
|
+
// Another reason why we must record the current maximum memtable ID of this
|
|
182
|
+
// column family: SyncClosedLogs() may release db mutex, thus it's possible
|
|
183
|
+
// for application to continue to insert into memtables increasing db's
|
|
184
|
+
// sequence number. The application may take a snapshot, but this snapshot is
|
|
185
|
+
// not included in `snapshot_seqs` which will be passed to flush job because
|
|
186
|
+
// `snapshot_seqs` has already been computed before this function starts.
|
|
187
|
+
// Recording the max memtable ID ensures that the flush job does not flush
|
|
188
|
+
// a memtable without knowing such snapshot(s).
|
|
189
|
+
uint64_t max_memtable_id = needs_to_sync_closed_wals
|
|
190
|
+
? cfd->imm()->GetLatestMemTableID()
|
|
191
|
+
: port::kMaxUint64;
|
|
192
|
+
|
|
193
|
+
// If needs_to_sync_closed_wals is false, then the flush job will pick ALL
|
|
194
|
+
// existing memtables of the column family when PickMemTable() is called
|
|
195
|
+
// later. Although we won't call SyncClosedLogs() in this case, we may still
|
|
196
|
+
// call the callbacks of the listeners, i.e. NotifyOnFlushBegin() which also
|
|
197
|
+
// releases and re-acquires the db mutex. In the meantime, the application
|
|
198
|
+
// can still insert into the memtables and increase the db's sequence number.
|
|
199
|
+
// The application can take a snapshot, hoping that the latest visible state
|
|
200
|
+
// to this snapshto is preserved. This is hard to guarantee since db mutex
|
|
201
|
+
// not held. This newly-created snapshot is not included in `snapshot_seqs`
|
|
202
|
+
// and the flush job is unaware of its presence. Consequently, the flush job
|
|
203
|
+
// may drop certain keys when generating the L0, causing incorrect data to be
|
|
204
|
+
// returned for snapshot read using this snapshot.
|
|
205
|
+
// To address this, we make sure NotifyOnFlushBegin() executes after memtable
|
|
206
|
+
// picking so that no new snapshot can be taken between the two functions.
|
|
159
207
|
|
|
160
208
|
FlushJob flush_job(
|
|
161
|
-
dbname_, cfd, immutable_db_options_, mutable_cf_options,
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U),
|
|
209
|
+
dbname_, cfd, immutable_db_options_, mutable_cf_options, max_memtable_id,
|
|
210
|
+
file_options_for_compaction_, versions_.get(), &mutex_, &shutting_down_,
|
|
211
|
+
snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
|
|
212
|
+
job_context, log_buffer, directories_.GetDbDir(), GetDataDir(cfd, 0U),
|
|
166
213
|
GetCompressionFlush(*cfd->ioptions(), mutable_cf_options), stats_,
|
|
167
214
|
&event_logger_, mutable_cf_options.report_bg_io_stats,
|
|
168
215
|
true /* sync_output_directory */, true /* write_manifest */, thread_pri,
|
|
169
|
-
io_tracer_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow()
|
|
216
|
+
io_tracer_, db_id_, db_session_id_, cfd->GetFullHistoryTsLow(),
|
|
217
|
+
&blob_callback_);
|
|
170
218
|
FileMetaData file_meta;
|
|
171
219
|
|
|
172
|
-
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
|
|
173
|
-
flush_job.PickMemTable();
|
|
174
|
-
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:AfterPickMemtables");
|
|
175
|
-
|
|
176
|
-
#ifndef ROCKSDB_LITE
|
|
177
|
-
// may temporarily unlock and lock the mutex.
|
|
178
|
-
NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id);
|
|
179
|
-
#endif // ROCKSDB_LITE
|
|
180
|
-
|
|
181
220
|
Status s;
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
// If there are more than one column families, we need to make sure that
|
|
186
|
-
// all the log files except the most recent one are synced. Otherwise if
|
|
187
|
-
// the host crashes after flushing and before WAL is persistent, the
|
|
188
|
-
// flushed SST may contain data from write batches whose updates to
|
|
189
|
-
// other column families are missing.
|
|
221
|
+
bool need_cancel = false;
|
|
222
|
+
IOStatus log_io_s = IOStatus::OK();
|
|
223
|
+
if (needs_to_sync_closed_wals) {
|
|
190
224
|
// SyncClosedLogs() may unlock and re-lock the db_mutex.
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
|
|
225
|
+
log_io_s = SyncClosedLogs(job_context);
|
|
226
|
+
if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
|
|
227
|
+
!log_io_s.IsColumnFamilyDropped()) {
|
|
228
|
+
error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
|
|
196
229
|
}
|
|
197
230
|
} else {
|
|
198
231
|
TEST_SYNC_POINT("DBImpl::SyncClosedLogs:Skip");
|
|
199
232
|
}
|
|
233
|
+
s = log_io_s;
|
|
234
|
+
|
|
235
|
+
// If the log sync failed, we do not need to pick memtable. Otherwise,
|
|
236
|
+
// num_flush_not_started_ needs to be rollback.
|
|
237
|
+
TEST_SYNC_POINT("DBImpl::FlushMemTableToOutputFile:BeforePickMemtables");
|
|
238
|
+
if (s.ok()) {
|
|
239
|
+
flush_job.PickMemTable();
|
|
240
|
+
need_cancel = true;
|
|
241
|
+
}
|
|
242
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
243
|
+
"DBImpl::FlushMemTableToOutputFile:AfterPickMemtables", &flush_job);
|
|
244
|
+
|
|
245
|
+
#ifndef ROCKSDB_LITE
|
|
246
|
+
// may temporarily unlock and lock the mutex.
|
|
247
|
+
NotifyOnFlushBegin(cfd, &file_meta, mutable_cf_options, job_context->job_id);
|
|
248
|
+
#endif // ROCKSDB_LITE
|
|
200
249
|
|
|
250
|
+
bool switched_to_mempurge = false;
|
|
201
251
|
// Within flush_job.Run, rocksdb may call event listener to notify
|
|
202
252
|
// file creation and deletion.
|
|
203
253
|
//
|
|
@@ -205,12 +255,13 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
205
255
|
// and EventListener callback will be called when the db_mutex
|
|
206
256
|
// is unlocked by the current thread.
|
|
207
257
|
if (s.ok()) {
|
|
208
|
-
s = flush_job.Run(&logs_with_prep_tracker_, &file_meta
|
|
209
|
-
|
|
210
|
-
|
|
258
|
+
s = flush_job.Run(&logs_with_prep_tracker_, &file_meta,
|
|
259
|
+
&switched_to_mempurge);
|
|
260
|
+
need_cancel = false;
|
|
211
261
|
}
|
|
212
|
-
|
|
213
|
-
|
|
262
|
+
|
|
263
|
+
if (!s.ok() && need_cancel) {
|
|
264
|
+
flush_job.Cancel();
|
|
214
265
|
}
|
|
215
266
|
|
|
216
267
|
if (s.ok()) {
|
|
@@ -235,47 +286,46 @@ Status DBImpl::FlushMemTableToOutputFile(
|
|
|
235
286
|
|
|
236
287
|
const auto& blob_files = storage_info->GetBlobFiles();
|
|
237
288
|
if (!blob_files.empty()) {
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
289
|
+
assert(blob_files.front());
|
|
290
|
+
assert(blob_files.back());
|
|
291
|
+
|
|
292
|
+
ROCKS_LOG_BUFFER(
|
|
293
|
+
log_buffer,
|
|
294
|
+
"[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
|
|
295
|
+
column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
|
|
296
|
+
blob_files.back()->GetBlobFileNumber());
|
|
243
297
|
}
|
|
244
298
|
}
|
|
245
299
|
|
|
246
300
|
if (!s.ok() && !s.IsShutdownInProgress() && !s.IsColumnFamilyDropped()) {
|
|
247
|
-
if (
|
|
248
|
-
!io_s.IsColumnFamilyDropped()) {
|
|
301
|
+
if (log_io_s.ok()) {
|
|
249
302
|
// Error while writing to MANIFEST.
|
|
250
303
|
// In fact, versions_->io_status() can also be the result of renaming
|
|
251
304
|
// CURRENT file. With current code, it's just difficult to tell. So just
|
|
252
305
|
// be pessimistic and try write to a new MANIFEST.
|
|
253
306
|
// TODO: distinguish between MANIFEST write and CURRENT renaming
|
|
254
307
|
if (!versions_->io_status().ok()) {
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
BackgroundErrorReason::kManifestWriteNoWAL);
|
|
262
|
-
}
|
|
263
|
-
} else if (total_log_size_ > 0) {
|
|
264
|
-
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
|
|
308
|
+
// If WAL sync is successful (either WAL size is 0 or there is no IO
|
|
309
|
+
// error), all the Manifest write will be map to soft error.
|
|
310
|
+
// TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor is
|
|
311
|
+
// needed.
|
|
312
|
+
error_handler_.SetBGError(s,
|
|
313
|
+
BackgroundErrorReason::kManifestWriteNoWAL);
|
|
265
314
|
} else {
|
|
266
|
-
// If
|
|
267
|
-
|
|
315
|
+
// If WAL sync is successful (either WAL size is 0 or there is no IO
|
|
316
|
+
// error), all the other SST file write errors will be set as
|
|
317
|
+
// kFlushNoWAL.
|
|
318
|
+
error_handler_.SetBGError(s, BackgroundErrorReason::kFlushNoWAL);
|
|
268
319
|
}
|
|
269
320
|
} else {
|
|
321
|
+
assert(s == log_io_s);
|
|
270
322
|
Status new_bg_error = s;
|
|
271
323
|
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
|
|
272
324
|
}
|
|
273
|
-
} else {
|
|
274
|
-
// If we got here, then we decided not to care about the i_os status (either
|
|
275
|
-
// from never needing it or ignoring the flush job status
|
|
276
|
-
io_s.PermitUncheckedError();
|
|
277
325
|
}
|
|
278
|
-
|
|
326
|
+
// If flush ran smoothly and no mempurge happened
|
|
327
|
+
// install new SST file path.
|
|
328
|
+
if (s.ok() && (!switched_to_mempurge)) {
|
|
279
329
|
#ifndef ROCKSDB_LITE
|
|
280
330
|
// may temporarily unlock and lock the mutex.
|
|
281
331
|
NotifyOnFlushCompleted(cfd, mutable_cf_options,
|
|
@@ -320,13 +370,14 @@ Status DBImpl::FlushMemTablesToOutputFiles(
|
|
|
320
370
|
&earliest_write_conflict_snapshot, &snapshot_checker);
|
|
321
371
|
const auto& bg_flush_arg = bg_flush_args[0];
|
|
322
372
|
ColumnFamilyData* cfd = bg_flush_arg.cfd_;
|
|
323
|
-
|
|
373
|
+
// intentional infrequent copy for each flush
|
|
374
|
+
MutableCFOptions mutable_cf_options_copy = *cfd->GetLatestMutableCFOptions();
|
|
324
375
|
SuperVersionContext* superversion_context =
|
|
325
376
|
bg_flush_arg.superversion_context_;
|
|
326
377
|
Status s = FlushMemTableToOutputFile(
|
|
327
|
-
cfd,
|
|
328
|
-
snapshot_seqs, earliest_write_conflict_snapshot,
|
|
329
|
-
log_buffer, thread_pri);
|
|
378
|
+
cfd, mutable_cf_options_copy, made_progress, job_context,
|
|
379
|
+
superversion_context, snapshot_seqs, earliest_write_conflict_snapshot,
|
|
380
|
+
snapshot_checker, log_buffer, thread_pri);
|
|
330
381
|
return s;
|
|
331
382
|
}
|
|
332
383
|
|
|
@@ -353,6 +404,7 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
353
404
|
for (const auto cfd : cfds) {
|
|
354
405
|
assert(cfd->imm()->NumNotFlushed() != 0);
|
|
355
406
|
assert(cfd->imm()->IsFlushPending());
|
|
407
|
+
assert(cfd->GetFlushReason() == cfds[0]->GetFlushReason());
|
|
356
408
|
}
|
|
357
409
|
#endif /* !NDEBUG */
|
|
358
410
|
|
|
@@ -400,13 +452,15 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
400
452
|
stats_, &event_logger_, mutable_cf_options.report_bg_io_stats,
|
|
401
453
|
false /* sync_output_directory */, false /* write_manifest */,
|
|
402
454
|
thread_pri, io_tracer_, db_id_, db_session_id_,
|
|
403
|
-
cfd->GetFullHistoryTsLow()));
|
|
404
|
-
jobs.back()->PickMemTable();
|
|
455
|
+
cfd->GetFullHistoryTsLow(), &blob_callback_));
|
|
405
456
|
}
|
|
406
457
|
|
|
407
458
|
std::vector<FileMetaData> file_meta(num_cfs);
|
|
459
|
+
// Use of deque<bool> because vector<bool>
|
|
460
|
+
// is specific and doesn't allow &v[i].
|
|
461
|
+
std::deque<bool> switched_to_mempurge(num_cfs, false);
|
|
408
462
|
Status s;
|
|
409
|
-
IOStatus
|
|
463
|
+
IOStatus log_io_s = IOStatus::OK();
|
|
410
464
|
assert(num_cfs == static_cast<int>(jobs.size()));
|
|
411
465
|
|
|
412
466
|
#ifndef ROCKSDB_LITE
|
|
@@ -421,27 +475,45 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
421
475
|
if (logfile_number_ > 0) {
|
|
422
476
|
// TODO (yanqin) investigate whether we should sync the closed logs for
|
|
423
477
|
// single column family case.
|
|
424
|
-
|
|
425
|
-
|
|
478
|
+
log_io_s = SyncClosedLogs(job_context);
|
|
479
|
+
if (!log_io_s.ok() && !log_io_s.IsShutdownInProgress() &&
|
|
480
|
+
!log_io_s.IsColumnFamilyDropped()) {
|
|
481
|
+
if (total_log_size_ > 0) {
|
|
482
|
+
error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlush);
|
|
483
|
+
} else {
|
|
484
|
+
// If the WAL is empty, we use different error reason
|
|
485
|
+
error_handler_.SetBGError(log_io_s, BackgroundErrorReason::kFlushNoWAL);
|
|
486
|
+
}
|
|
487
|
+
}
|
|
426
488
|
}
|
|
489
|
+
s = log_io_s;
|
|
427
490
|
|
|
428
491
|
// exec_status stores the execution status of flush_jobs as
|
|
429
492
|
// <bool /* executed */, Status /* status code */>
|
|
430
493
|
autovector<std::pair<bool, Status>> exec_status;
|
|
431
|
-
|
|
494
|
+
std::vector<bool> pick_status;
|
|
432
495
|
for (int i = 0; i != num_cfs; ++i) {
|
|
433
496
|
// Initially all jobs are not executed, with status OK.
|
|
434
497
|
exec_status.emplace_back(false, Status::OK());
|
|
435
|
-
|
|
498
|
+
pick_status.push_back(false);
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
if (s.ok()) {
|
|
502
|
+
for (int i = 0; i != num_cfs; ++i) {
|
|
503
|
+
jobs[i]->PickMemTable();
|
|
504
|
+
pick_status[i] = true;
|
|
505
|
+
}
|
|
436
506
|
}
|
|
437
507
|
|
|
438
508
|
if (s.ok()) {
|
|
509
|
+
assert(switched_to_mempurge.size() ==
|
|
510
|
+
static_cast<long unsigned int>(num_cfs));
|
|
439
511
|
// TODO (yanqin): parallelize jobs with threads.
|
|
440
512
|
for (int i = 1; i != num_cfs; ++i) {
|
|
441
513
|
exec_status[i].second =
|
|
442
|
-
jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i]
|
|
514
|
+
jobs[i]->Run(&logs_with_prep_tracker_, &file_meta[i],
|
|
515
|
+
&(switched_to_mempurge.at(i)));
|
|
443
516
|
exec_status[i].first = true;
|
|
444
|
-
io_status[i] = jobs[i]->io_status();
|
|
445
517
|
}
|
|
446
518
|
if (num_cfs > 1) {
|
|
447
519
|
TEST_SYNC_POINT(
|
|
@@ -451,10 +523,10 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
451
523
|
}
|
|
452
524
|
assert(exec_status.size() > 0);
|
|
453
525
|
assert(!file_meta.empty());
|
|
454
|
-
exec_status[0].second =
|
|
455
|
-
|
|
526
|
+
exec_status[0].second = jobs[0]->Run(
|
|
527
|
+
&logs_with_prep_tracker_, file_meta.data() /* &file_meta[0] */,
|
|
528
|
+
switched_to_mempurge.empty() ? nullptr : &(switched_to_mempurge.at(0)));
|
|
456
529
|
exec_status[0].first = true;
|
|
457
|
-
io_status[0] = jobs[0]->io_status();
|
|
458
530
|
|
|
459
531
|
Status error_status;
|
|
460
532
|
for (const auto& e : exec_status) {
|
|
@@ -473,20 +545,6 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
473
545
|
s = error_status.ok() ? s : error_status;
|
|
474
546
|
}
|
|
475
547
|
|
|
476
|
-
if (io_s.ok()) {
|
|
477
|
-
IOStatus io_error = IOStatus::OK();
|
|
478
|
-
for (int i = 0; i != static_cast<int>(io_status.size()); i++) {
|
|
479
|
-
if (!io_status[i].ok() && !io_status[i].IsShutdownInProgress() &&
|
|
480
|
-
!io_status[i].IsColumnFamilyDropped()) {
|
|
481
|
-
io_error = io_status[i];
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
io_s = io_error;
|
|
485
|
-
if (s.ok() && !io_s.ok()) {
|
|
486
|
-
s = io_s;
|
|
487
|
-
}
|
|
488
|
-
}
|
|
489
|
-
|
|
490
548
|
if (s.IsColumnFamilyDropped()) {
|
|
491
549
|
s = Status::OK();
|
|
492
550
|
}
|
|
@@ -495,7 +553,9 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
495
553
|
// Sync on all distinct output directories.
|
|
496
554
|
for (auto dir : distinct_output_dirs) {
|
|
497
555
|
if (dir != nullptr) {
|
|
498
|
-
Status error_status = dir->
|
|
556
|
+
Status error_status = dir->FsyncWithDirOptions(
|
|
557
|
+
IOOptions(), nullptr,
|
|
558
|
+
DirFsyncOptions(DirFsyncOptions::FsyncReason::kNewFileSynced));
|
|
499
559
|
if (!error_status.ok()) {
|
|
500
560
|
s = error_status;
|
|
501
561
|
break;
|
|
@@ -508,12 +568,12 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
508
568
|
// Have to cancel the flush jobs that have NOT executed because we need to
|
|
509
569
|
// unref the versions.
|
|
510
570
|
for (int i = 0; i != num_cfs; ++i) {
|
|
511
|
-
if (!exec_status[i].first) {
|
|
571
|
+
if (pick_status[i] && !exec_status[i].first) {
|
|
512
572
|
jobs[i]->Cancel();
|
|
513
573
|
}
|
|
514
574
|
}
|
|
515
575
|
for (int i = 0; i != num_cfs; ++i) {
|
|
516
|
-
if (exec_status[i].
|
|
576
|
+
if (exec_status[i].second.ok() && exec_status[i].first) {
|
|
517
577
|
auto& mems = jobs[i]->GetMemTables();
|
|
518
578
|
cfds[i]->imm()->RollbackMemtableFlush(mems,
|
|
519
579
|
file_meta[i].fd.GetNumber());
|
|
@@ -522,7 +582,15 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
522
582
|
}
|
|
523
583
|
|
|
524
584
|
if (s.ok()) {
|
|
525
|
-
auto wait_to_install_func =
|
|
585
|
+
const auto wait_to_install_func =
|
|
586
|
+
[&]() -> std::pair<Status, bool /*continue to wait*/> {
|
|
587
|
+
if (!versions_->io_status().ok()) {
|
|
588
|
+
// Something went wrong elsewhere, we cannot count on waiting for our
|
|
589
|
+
// turn to write/sync to MANIFEST or CURRENT. Just return.
|
|
590
|
+
return std::make_pair(versions_->io_status(), false);
|
|
591
|
+
} else if (shutting_down_.load(std::memory_order_acquire)) {
|
|
592
|
+
return std::make_pair(Status::ShutdownInProgress(), false);
|
|
593
|
+
}
|
|
526
594
|
bool ready = true;
|
|
527
595
|
for (size_t i = 0; i != cfds.size(); ++i) {
|
|
528
596
|
const auto& mems = jobs[i]->GetMemTables();
|
|
@@ -546,18 +614,46 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
546
614
|
break;
|
|
547
615
|
}
|
|
548
616
|
}
|
|
549
|
-
return ready;
|
|
617
|
+
return std::make_pair(Status::OK(), !ready);
|
|
550
618
|
};
|
|
551
619
|
|
|
552
|
-
bool resuming_from_bg_err =
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
620
|
+
bool resuming_from_bg_err =
|
|
621
|
+
error_handler_.IsDBStopped() ||
|
|
622
|
+
(cfds[0]->GetFlushReason() == FlushReason::kErrorRecovery ||
|
|
623
|
+
cfds[0]->GetFlushReason() == FlushReason::kErrorRecoveryRetryFlush);
|
|
624
|
+
while ((!resuming_from_bg_err || error_handler_.GetRecoveryError().ok())) {
|
|
625
|
+
std::pair<Status, bool> res = wait_to_install_func();
|
|
626
|
+
|
|
627
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
628
|
+
"DBImpl::AtomicFlushMemTablesToOutputFiles:WaitToCommit", &res);
|
|
629
|
+
|
|
630
|
+
if (!res.first.ok()) {
|
|
631
|
+
s = res.first;
|
|
632
|
+
break;
|
|
633
|
+
} else if (!res.second) {
|
|
634
|
+
break;
|
|
635
|
+
}
|
|
556
636
|
atomic_flush_install_cv_.Wait();
|
|
637
|
+
|
|
638
|
+
resuming_from_bg_err =
|
|
639
|
+
error_handler_.IsDBStopped() ||
|
|
640
|
+
(cfds[0]->GetFlushReason() == FlushReason::kErrorRecovery ||
|
|
641
|
+
cfds[0]->GetFlushReason() == FlushReason::kErrorRecoveryRetryFlush);
|
|
557
642
|
}
|
|
558
643
|
|
|
559
|
-
|
|
560
|
-
|
|
644
|
+
if (!resuming_from_bg_err) {
|
|
645
|
+
// If not resuming from bg err, then we determine future action based on
|
|
646
|
+
// whether we hit background error.
|
|
647
|
+
if (s.ok()) {
|
|
648
|
+
s = error_handler_.GetBGError();
|
|
649
|
+
}
|
|
650
|
+
} else if (s.ok()) {
|
|
651
|
+
// If resuming from bg err, we still rely on wait_to_install_func()'s
|
|
652
|
+
// result to determine future action. If wait_to_install_func() returns
|
|
653
|
+
// non-ok already, then we should not proceed to flush result
|
|
654
|
+
// installation.
|
|
655
|
+
s = error_handler_.GetRecoveryError();
|
|
656
|
+
}
|
|
561
657
|
}
|
|
562
658
|
|
|
563
659
|
if (s.ok()) {
|
|
@@ -565,6 +661,8 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
565
661
|
autovector<const autovector<MemTable*>*> mems_list;
|
|
566
662
|
autovector<const MutableCFOptions*> mutable_cf_options_list;
|
|
567
663
|
autovector<FileMetaData*> tmp_file_meta;
|
|
664
|
+
autovector<std::list<std::unique_ptr<FlushJobInfo>>*>
|
|
665
|
+
committed_flush_jobs_info;
|
|
568
666
|
for (int i = 0; i != num_cfs; ++i) {
|
|
569
667
|
const auto& mems = jobs[i]->GetMemTables();
|
|
570
668
|
if (!cfds[i]->IsDropped() && !mems.empty()) {
|
|
@@ -572,13 +670,18 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
572
670
|
mems_list.emplace_back(&mems);
|
|
573
671
|
mutable_cf_options_list.emplace_back(&all_mutable_cf_options[i]);
|
|
574
672
|
tmp_file_meta.emplace_back(&file_meta[i]);
|
|
673
|
+
#ifndef ROCKSDB_LITE
|
|
674
|
+
committed_flush_jobs_info.emplace_back(
|
|
675
|
+
jobs[i]->GetCommittedFlushJobsInfo());
|
|
676
|
+
#endif //! ROCKSDB_LITE
|
|
575
677
|
}
|
|
576
678
|
}
|
|
577
679
|
|
|
578
680
|
s = InstallMemtableAtomicFlushResults(
|
|
579
681
|
nullptr /* imm_lists */, tmp_cfds, mutable_cf_options_list, mems_list,
|
|
580
682
|
versions_.get(), &logs_with_prep_tracker_, &mutex_, tmp_file_meta,
|
|
581
|
-
&job_context->memtables_to_free,
|
|
683
|
+
committed_flush_jobs_info, &job_context->memtables_to_free,
|
|
684
|
+
directories_.GetDbDir(), log_buffer);
|
|
582
685
|
}
|
|
583
686
|
|
|
584
687
|
if (s.ok()) {
|
|
@@ -609,11 +712,14 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
609
712
|
|
|
610
713
|
const auto& blob_files = storage_info->GetBlobFiles();
|
|
611
714
|
if (!blob_files.empty()) {
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
715
|
+
assert(blob_files.front());
|
|
716
|
+
assert(blob_files.back());
|
|
717
|
+
|
|
718
|
+
ROCKS_LOG_BUFFER(
|
|
719
|
+
log_buffer,
|
|
720
|
+
"[%s] Blob file summary: head=%" PRIu64 ", tail=%" PRIu64 "\n",
|
|
721
|
+
column_family_name.c_str(), blob_files.front()->GetBlobFileNumber(),
|
|
722
|
+
blob_files.back()->GetBlobFileNumber());
|
|
617
723
|
}
|
|
618
724
|
}
|
|
619
725
|
if (made_progress) {
|
|
@@ -624,6 +730,11 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
624
730
|
immutable_db_options_.sst_file_manager.get());
|
|
625
731
|
assert(all_mutable_cf_options.size() == static_cast<size_t>(num_cfs));
|
|
626
732
|
for (int i = 0; s.ok() && i != num_cfs; ++i) {
|
|
733
|
+
// If mempurge happened instead of Flush,
|
|
734
|
+
// no NotifyOnFlushCompleted call (no SST file created).
|
|
735
|
+
if (switched_to_mempurge[i]) {
|
|
736
|
+
continue;
|
|
737
|
+
}
|
|
627
738
|
if (cfds[i]->IsDropped()) {
|
|
628
739
|
continue;
|
|
629
740
|
}
|
|
@@ -651,28 +762,27 @@ Status DBImpl::AtomicFlushMemTablesToOutputFiles(
|
|
|
651
762
|
// Need to undo atomic flush if something went wrong, i.e. s is not OK and
|
|
652
763
|
// it is not because of CF drop.
|
|
653
764
|
if (!s.ok() && !s.IsColumnFamilyDropped()) {
|
|
654
|
-
if (
|
|
765
|
+
if (log_io_s.ok()) {
|
|
655
766
|
// Error while writing to MANIFEST.
|
|
656
767
|
// In fact, versions_->io_status() can also be the result of renaming
|
|
657
768
|
// CURRENT file. With current code, it's just difficult to tell. So just
|
|
658
769
|
// be pessimistic and try write to a new MANIFEST.
|
|
659
770
|
// TODO: distinguish between MANIFEST write and CURRENT renaming
|
|
660
771
|
if (!versions_->io_status().ok()) {
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
BackgroundErrorReason::kManifestWriteNoWAL);
|
|
668
|
-
}
|
|
669
|
-
} else if (total_log_size_ > 0) {
|
|
670
|
-
error_handler_.SetBGError(io_s, BackgroundErrorReason::kFlush);
|
|
772
|
+
// If WAL sync is successful (either WAL size is 0 or there is no IO
|
|
773
|
+
// error), all the Manifest write will be map to soft error.
|
|
774
|
+
// TODO: kManifestWriteNoWAL and kFlushNoWAL are misleading. Refactor
|
|
775
|
+
// is needed.
|
|
776
|
+
error_handler_.SetBGError(s,
|
|
777
|
+
BackgroundErrorReason::kManifestWriteNoWAL);
|
|
671
778
|
} else {
|
|
672
|
-
// If
|
|
673
|
-
|
|
779
|
+
// If WAL sync is successful (either WAL size is 0 or there is no IO
|
|
780
|
+
// error), all the other SST file write errors will be set as
|
|
781
|
+
// kFlushNoWAL.
|
|
782
|
+
error_handler_.SetBGError(s, BackgroundErrorReason::kFlushNoWAL);
|
|
674
783
|
}
|
|
675
784
|
} else {
|
|
785
|
+
assert(s == log_io_s);
|
|
676
786
|
Status new_bg_error = s;
|
|
677
787
|
error_handler_.SetBGError(new_bg_error, BackgroundErrorReason::kFlush);
|
|
678
788
|
}
|
|
@@ -759,6 +869,8 @@ void DBImpl::NotifyOnFlushCompleted(
|
|
|
759
869
|
for (auto listener : immutable_db_options_.listeners) {
|
|
760
870
|
listener->OnFlushCompleted(this, *info);
|
|
761
871
|
}
|
|
872
|
+
TEST_SYNC_POINT(
|
|
873
|
+
"DBImpl::NotifyOnFlushCompleted::PostAllOnFlushCompleted");
|
|
762
874
|
}
|
|
763
875
|
flush_jobs_info->clear();
|
|
764
876
|
}
|
|
@@ -776,12 +888,20 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
|
|
|
776
888
|
ColumnFamilyHandle* column_family,
|
|
777
889
|
const Slice* begin_without_ts,
|
|
778
890
|
const Slice* end_without_ts) {
|
|
891
|
+
if (manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
|
|
892
|
+
return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
if (options.canceled && options.canceled->load(std::memory_order_acquire)) {
|
|
896
|
+
return Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
897
|
+
}
|
|
898
|
+
|
|
779
899
|
const Comparator* const ucmp = column_family->GetComparator();
|
|
780
900
|
assert(ucmp);
|
|
781
901
|
size_t ts_sz = ucmp->timestamp_size();
|
|
782
902
|
if (ts_sz == 0) {
|
|
783
903
|
return CompactRangeInternal(options, column_family, begin_without_ts,
|
|
784
|
-
end_without_ts);
|
|
904
|
+
end_without_ts, "" /*trim_ts*/);
|
|
785
905
|
}
|
|
786
906
|
|
|
787
907
|
std::string begin_str;
|
|
@@ -803,12 +923,54 @@ Status DBImpl::CompactRange(const CompactRangeOptions& options,
|
|
|
803
923
|
Slice* end_with_ts = end_without_ts ? &end : nullptr;
|
|
804
924
|
|
|
805
925
|
return CompactRangeInternal(options, column_family, begin_with_ts,
|
|
806
|
-
end_with_ts);
|
|
926
|
+
end_with_ts, "" /*trim_ts*/);
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
Status DBImpl::IncreaseFullHistoryTsLow(ColumnFamilyHandle* column_family,
|
|
930
|
+
std::string ts_low) {
|
|
931
|
+
ColumnFamilyData* cfd = nullptr;
|
|
932
|
+
if (column_family == nullptr) {
|
|
933
|
+
cfd = default_cf_handle_->cfd();
|
|
934
|
+
} else {
|
|
935
|
+
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
|
|
936
|
+
assert(cfh != nullptr);
|
|
937
|
+
cfd = cfh->cfd();
|
|
938
|
+
}
|
|
939
|
+
assert(cfd != nullptr && cfd->user_comparator() != nullptr);
|
|
940
|
+
if (cfd->user_comparator()->timestamp_size() == 0) {
|
|
941
|
+
return Status::InvalidArgument(
|
|
942
|
+
"Timestamp is not enabled in this column family");
|
|
943
|
+
}
|
|
944
|
+
if (cfd->user_comparator()->timestamp_size() != ts_low.size()) {
|
|
945
|
+
return Status::InvalidArgument("ts_low size mismatch");
|
|
946
|
+
}
|
|
947
|
+
return IncreaseFullHistoryTsLowImpl(cfd, ts_low);
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
Status DBImpl::IncreaseFullHistoryTsLowImpl(ColumnFamilyData* cfd,
|
|
951
|
+
std::string ts_low) {
|
|
952
|
+
VersionEdit edit;
|
|
953
|
+
edit.SetColumnFamily(cfd->GetID());
|
|
954
|
+
edit.SetFullHistoryTsLow(ts_low);
|
|
955
|
+
|
|
956
|
+
InstrumentedMutexLock l(&mutex_);
|
|
957
|
+
std::string current_ts_low = cfd->GetFullHistoryTsLow();
|
|
958
|
+
const Comparator* ucmp = cfd->user_comparator();
|
|
959
|
+
assert(ucmp->timestamp_size() == ts_low.size() && !ts_low.empty());
|
|
960
|
+
if (!current_ts_low.empty() &&
|
|
961
|
+
ucmp->CompareTimestamp(ts_low, current_ts_low) < 0) {
|
|
962
|
+
return Status::InvalidArgument(
|
|
963
|
+
"Cannot decrease full_history_timestamp_low");
|
|
964
|
+
}
|
|
965
|
+
|
|
966
|
+
return versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(), &edit,
|
|
967
|
+
&mutex_);
|
|
807
968
|
}
|
|
808
969
|
|
|
809
970
|
Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
810
971
|
ColumnFamilyHandle* column_family,
|
|
811
|
-
const Slice* begin, const Slice* end
|
|
972
|
+
const Slice* begin, const Slice* end,
|
|
973
|
+
const std::string& trim_ts) {
|
|
812
974
|
auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
|
|
813
975
|
auto cfd = cfh->cfd();
|
|
814
976
|
|
|
@@ -817,6 +979,22 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
817
979
|
}
|
|
818
980
|
|
|
819
981
|
bool flush_needed = true;
|
|
982
|
+
|
|
983
|
+
// Update full_history_ts_low if it's set
|
|
984
|
+
if (options.full_history_ts_low != nullptr &&
|
|
985
|
+
!options.full_history_ts_low->empty()) {
|
|
986
|
+
std::string ts_low = options.full_history_ts_low->ToString();
|
|
987
|
+
if (begin != nullptr || end != nullptr) {
|
|
988
|
+
return Status::InvalidArgument(
|
|
989
|
+
"Cannot specify compaction range with full_history_ts_low");
|
|
990
|
+
}
|
|
991
|
+
Status s = IncreaseFullHistoryTsLowImpl(cfd, ts_low);
|
|
992
|
+
if (!s.ok()) {
|
|
993
|
+
LogFlush(immutable_db_options_.info_log);
|
|
994
|
+
return s;
|
|
995
|
+
}
|
|
996
|
+
}
|
|
997
|
+
|
|
820
998
|
Status s;
|
|
821
999
|
if (begin != nullptr && end != nullptr) {
|
|
822
1000
|
// TODO(ajkr): We could also optimize away the flush in certain cases where
|
|
@@ -863,7 +1041,7 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
863
1041
|
}
|
|
864
1042
|
s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
|
|
865
1043
|
final_output_level, options, begin, end, exclusive,
|
|
866
|
-
false, port::kMaxUint64);
|
|
1044
|
+
false, port::kMaxUint64, trim_ts);
|
|
867
1045
|
} else {
|
|
868
1046
|
int first_overlapped_level = kInvalidLevel;
|
|
869
1047
|
int max_overlapped_level = kInvalidLevel;
|
|
@@ -949,9 +1127,13 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
949
1127
|
disallow_trivial_move = true;
|
|
950
1128
|
}
|
|
951
1129
|
}
|
|
1130
|
+
// trim_ts need real compaction to remove latest record
|
|
1131
|
+
if (!trim_ts.empty()) {
|
|
1132
|
+
disallow_trivial_move = true;
|
|
1133
|
+
}
|
|
952
1134
|
s = RunManualCompaction(cfd, level, output_level, options, begin, end,
|
|
953
1135
|
exclusive, disallow_trivial_move,
|
|
954
|
-
max_file_num_to_ignore);
|
|
1136
|
+
max_file_num_to_ignore, trim_ts);
|
|
955
1137
|
if (!s.ok()) {
|
|
956
1138
|
break;
|
|
957
1139
|
}
|
|
@@ -987,6 +1169,8 @@ Status DBImpl::CompactRangeInternal(const CompactRangeOptions& options,
|
|
|
987
1169
|
assert(temp_s.ok());
|
|
988
1170
|
}
|
|
989
1171
|
EnableManualCompaction();
|
|
1172
|
+
TEST_SYNC_POINT(
|
|
1173
|
+
"DBImpl::CompactRange:PostRefitLevel:ManualCompactionEnabled");
|
|
990
1174
|
}
|
|
991
1175
|
LogFlush(immutable_db_options_.info_log);
|
|
992
1176
|
|
|
@@ -1026,7 +1210,7 @@ Status DBImpl::CompactFiles(const CompactionOptions& compact_options,
|
|
|
1026
1210
|
assert(cfd);
|
|
1027
1211
|
|
|
1028
1212
|
Status s;
|
|
1029
|
-
JobContext job_context(
|
|
1213
|
+
JobContext job_context(next_job_id_.fetch_add(1), true);
|
|
1030
1214
|
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
|
|
1031
1215
|
immutable_db_options_.info_log.get());
|
|
1032
1216
|
|
|
@@ -1177,18 +1361,19 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1177
1361
|
assert(is_snapshot_supported_ || snapshots_.empty());
|
|
1178
1362
|
CompactionJobStats compaction_job_stats;
|
|
1179
1363
|
CompactionJob compaction_job(
|
|
1180
|
-
job_context->job_id, c.get(), immutable_db_options_,
|
|
1364
|
+
job_context->job_id, c.get(), immutable_db_options_, mutable_db_options_,
|
|
1181
1365
|
file_options_for_compaction_, versions_.get(), &shutting_down_,
|
|
1182
|
-
|
|
1366
|
+
log_buffer, directories_.GetDbDir(),
|
|
1183
1367
|
GetDataDir(c->column_family_data(), c->output_path_id()),
|
|
1184
1368
|
GetDataDir(c->column_family_data(), 0), stats_, &mutex_, &error_handler_,
|
|
1185
1369
|
snapshot_seqs, earliest_write_conflict_snapshot, snapshot_checker,
|
|
1186
|
-
table_cache_, &event_logger_,
|
|
1370
|
+
job_context, table_cache_, &event_logger_,
|
|
1187
1371
|
c->mutable_cf_options()->paranoid_file_checks,
|
|
1188
1372
|
c->mutable_cf_options()->report_bg_io_stats, dbname_,
|
|
1189
1373
|
&compaction_job_stats, Env::Priority::USER, io_tracer_,
|
|
1190
|
-
&manual_compaction_paused_, db_id_, db_session_id_,
|
|
1191
|
-
c->column_family_data()->GetFullHistoryTsLow())
|
|
1374
|
+
&manual_compaction_paused_, nullptr, db_id_, db_session_id_,
|
|
1375
|
+
c->column_family_data()->GetFullHistoryTsLow(), c->trim_ts(),
|
|
1376
|
+
&blob_callback_);
|
|
1192
1377
|
|
|
1193
1378
|
// Creating a compaction influences the compaction score because the score
|
|
1194
1379
|
// takes running compactions into account (by skipping files that are already
|
|
@@ -1261,10 +1446,15 @@ Status DBImpl::CompactFilesImpl(
|
|
|
1261
1446
|
|
|
1262
1447
|
if (output_file_names != nullptr) {
|
|
1263
1448
|
for (const auto& newf : c->edit()->GetNewFiles()) {
|
|
1264
|
-
(
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1449
|
+
output_file_names->push_back(TableFileName(
|
|
1450
|
+
c->immutable_options()->cf_paths, newf.second.fd.GetNumber(),
|
|
1451
|
+
newf.second.fd.GetPathId()));
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) {
|
|
1455
|
+
output_file_names->push_back(
|
|
1456
|
+
BlobFileName(c->immutable_options()->cf_paths.front().path,
|
|
1457
|
+
blob_file.GetBlobFileNumber()));
|
|
1268
1458
|
}
|
|
1269
1459
|
}
|
|
1270
1460
|
|
|
@@ -1325,6 +1515,8 @@ void DBImpl::NotifyOnCompactionBegin(ColumnFamilyData* cfd, Compaction* c,
|
|
|
1325
1515
|
manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
|
|
1326
1516
|
return;
|
|
1327
1517
|
}
|
|
1518
|
+
|
|
1519
|
+
c->SetNotifyOnCompactionCompleted();
|
|
1328
1520
|
Version* current = cfd->current();
|
|
1329
1521
|
current->Ref();
|
|
1330
1522
|
// release lock while notifying events
|
|
@@ -1360,10 +1552,11 @@ void DBImpl::NotifyOnCompactionCompleted(
|
|
|
1360
1552
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
1361
1553
|
return;
|
|
1362
1554
|
}
|
|
1363
|
-
|
|
1364
|
-
|
|
1555
|
+
|
|
1556
|
+
if (c->ShouldNotifyOnCompactionCompleted() == false) {
|
|
1365
1557
|
return;
|
|
1366
1558
|
}
|
|
1559
|
+
|
|
1367
1560
|
Version* current = cfd->current();
|
|
1368
1561
|
current->Ref();
|
|
1369
1562
|
// release lock while notifying events
|
|
@@ -1452,12 +1645,12 @@ Status DBImpl::ReFitLevel(ColumnFamilyData* cfd, int level, int target_level) {
|
|
|
1452
1645
|
edit.SetColumnFamily(cfd->GetID());
|
|
1453
1646
|
for (const auto& f : vstorage->LevelFiles(level)) {
|
|
1454
1647
|
edit.DeleteFile(level, f->fd.GetNumber());
|
|
1455
|
-
edit.AddFile(
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1648
|
+
edit.AddFile(
|
|
1649
|
+
to_level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(),
|
|
1650
|
+
f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
|
|
1651
|
+
f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
|
|
1652
|
+
f->oldest_ancester_time, f->file_creation_time, f->file_checksum,
|
|
1653
|
+
f->file_checksum_func_name, f->min_timestamp, f->max_timestamp);
|
|
1461
1654
|
}
|
|
1462
1655
|
ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
|
|
1463
1656
|
"[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
|
|
@@ -1569,25 +1762,21 @@ Status DBImpl::RunManualCompaction(
|
|
|
1569
1762
|
ColumnFamilyData* cfd, int input_level, int output_level,
|
|
1570
1763
|
const CompactRangeOptions& compact_range_options, const Slice* begin,
|
|
1571
1764
|
const Slice* end, bool exclusive, bool disallow_trivial_move,
|
|
1572
|
-
uint64_t max_file_num_to_ignore) {
|
|
1765
|
+
uint64_t max_file_num_to_ignore, const std::string& trim_ts) {
|
|
1573
1766
|
assert(input_level == ColumnFamilyData::kCompactAllLevels ||
|
|
1574
1767
|
input_level >= 0);
|
|
1575
1768
|
|
|
1576
1769
|
InternalKey begin_storage, end_storage;
|
|
1577
|
-
CompactionArg* ca;
|
|
1770
|
+
CompactionArg* ca = nullptr;
|
|
1578
1771
|
|
|
1579
1772
|
bool scheduled = false;
|
|
1773
|
+
bool unscheduled = false;
|
|
1774
|
+
Env::Priority thread_pool_priority = Env::Priority::TOTAL;
|
|
1580
1775
|
bool manual_conflict = false;
|
|
1581
|
-
|
|
1582
|
-
manual
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
manual.output_path_id = compact_range_options.target_path_id;
|
|
1586
|
-
manual.done = false;
|
|
1587
|
-
manual.in_progress = false;
|
|
1588
|
-
manual.incomplete = false;
|
|
1589
|
-
manual.exclusive = exclusive;
|
|
1590
|
-
manual.disallow_trivial_move = disallow_trivial_move;
|
|
1776
|
+
|
|
1777
|
+
ManualCompactionState manual(
|
|
1778
|
+
cfd, input_level, output_level, compact_range_options.target_path_id,
|
|
1779
|
+
exclusive, disallow_trivial_move, compact_range_options.canceled);
|
|
1591
1780
|
// For universal compaction, we enforce every manual compaction to compact
|
|
1592
1781
|
// all files.
|
|
1593
1782
|
if (begin == nullptr ||
|
|
@@ -1611,10 +1800,24 @@ Status DBImpl::RunManualCompaction(
|
|
|
1611
1800
|
TEST_SYNC_POINT("DBImpl::RunManualCompaction:1");
|
|
1612
1801
|
InstrumentedMutexLock l(&mutex_);
|
|
1613
1802
|
|
|
1803
|
+
if (manual_compaction_paused_ > 0) {
|
|
1804
|
+
// Does not make sense to `AddManualCompaction()` in this scenario since
|
|
1805
|
+
// `DisableManualCompaction()` just waited for the manual compaction queue
|
|
1806
|
+
// to drain. So return immediately.
|
|
1807
|
+
TEST_SYNC_POINT("DBImpl::RunManualCompaction:PausedAtStart");
|
|
1808
|
+
manual.status =
|
|
1809
|
+
Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
1810
|
+
manual.done = true;
|
|
1811
|
+
return manual.status;
|
|
1812
|
+
}
|
|
1813
|
+
|
|
1614
1814
|
// When a manual compaction arrives, temporarily disable scheduling of
|
|
1615
1815
|
// non-manual compactions and wait until the number of scheduled compaction
|
|
1616
|
-
// jobs drops to zero. This
|
|
1617
|
-
// can compact any range of keys/files.
|
|
1816
|
+
// jobs drops to zero. This used to be needed to ensure that this manual
|
|
1817
|
+
// compaction can compact any range of keys/files. Now it is optional
|
|
1818
|
+
// (see `CompactRangeOptions::exclusive_manual_compaction`). The use case for
|
|
1819
|
+
// `exclusive_manual_compaction=true` (the default) is unclear beyond not
|
|
1820
|
+
// trusting the new code.
|
|
1618
1821
|
//
|
|
1619
1822
|
// HasPendingManualCompaction() is true when at least one thread is inside
|
|
1620
1823
|
// RunManualCompaction(), i.e. during that time no other compaction will
|
|
@@ -1628,8 +1831,20 @@ Status DBImpl::RunManualCompaction(
|
|
|
1628
1831
|
AddManualCompaction(&manual);
|
|
1629
1832
|
TEST_SYNC_POINT_CALLBACK("DBImpl::RunManualCompaction:NotScheduled", &mutex_);
|
|
1630
1833
|
if (exclusive) {
|
|
1834
|
+
// Limitation: there's no way to wake up the below loop when user sets
|
|
1835
|
+
// `*manual.canceled`. So `CompactRangeOptions::exclusive_manual_compaction`
|
|
1836
|
+
// and `CompactRangeOptions::canceled` might not work well together.
|
|
1631
1837
|
while (bg_bottom_compaction_scheduled_ > 0 ||
|
|
1632
1838
|
bg_compaction_scheduled_ > 0) {
|
|
1839
|
+
if (manual_compaction_paused_ > 0 ||
|
|
1840
|
+
(manual.canceled != nullptr && *manual.canceled == true)) {
|
|
1841
|
+
// Pretend the error came from compaction so the below cleanup/error
|
|
1842
|
+
// handling code can process it.
|
|
1843
|
+
manual.done = true;
|
|
1844
|
+
manual.status =
|
|
1845
|
+
Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
1846
|
+
break;
|
|
1847
|
+
}
|
|
1633
1848
|
TEST_SYNC_POINT("DBImpl::RunManualCompaction:WaitScheduled");
|
|
1634
1849
|
ROCKS_LOG_INFO(
|
|
1635
1850
|
immutable_db_options_.info_log,
|
|
@@ -1659,13 +1874,30 @@ Status DBImpl::RunManualCompaction(
|
|
|
1659
1874
|
*manual.cfd->GetLatestMutableCFOptions(), mutable_db_options_,
|
|
1660
1875
|
manual.input_level, manual.output_level, compact_range_options,
|
|
1661
1876
|
manual.begin, manual.end, &manual.manual_end, &manual_conflict,
|
|
1662
|
-
max_file_num_to_ignore)) == nullptr &&
|
|
1877
|
+
max_file_num_to_ignore, trim_ts)) == nullptr &&
|
|
1663
1878
|
manual_conflict))) {
|
|
1664
1879
|
// exclusive manual compactions should not see a conflict during
|
|
1665
1880
|
// CompactRange
|
|
1666
1881
|
assert(!exclusive || !manual_conflict);
|
|
1667
1882
|
// Running either this or some other manual compaction
|
|
1668
1883
|
bg_cv_.Wait();
|
|
1884
|
+
if (manual_compaction_paused_ > 0 && scheduled && !unscheduled) {
|
|
1885
|
+
assert(thread_pool_priority != Env::Priority::TOTAL);
|
|
1886
|
+
// unschedule all manual compactions
|
|
1887
|
+
auto unscheduled_task_num = env_->UnSchedule(
|
|
1888
|
+
GetTaskTag(TaskType::kManualCompaction), thread_pool_priority);
|
|
1889
|
+
if (unscheduled_task_num > 0) {
|
|
1890
|
+
ROCKS_LOG_INFO(
|
|
1891
|
+
immutable_db_options_.info_log,
|
|
1892
|
+
"[%s] Unscheduled %d number of manual compactions from the "
|
|
1893
|
+
"thread-pool",
|
|
1894
|
+
cfd->GetName().c_str(), unscheduled_task_num);
|
|
1895
|
+
// it may unschedule other manual compactions, notify others.
|
|
1896
|
+
bg_cv_.SignalAll();
|
|
1897
|
+
}
|
|
1898
|
+
unscheduled = true;
|
|
1899
|
+
TEST_SYNC_POINT("DBImpl::RunManualCompaction:Unscheduled");
|
|
1900
|
+
}
|
|
1669
1901
|
if (scheduled && manual.incomplete == true) {
|
|
1670
1902
|
assert(!manual.in_progress);
|
|
1671
1903
|
scheduled = false;
|
|
@@ -1688,15 +1920,25 @@ Status DBImpl::RunManualCompaction(
|
|
|
1688
1920
|
assert(false);
|
|
1689
1921
|
}
|
|
1690
1922
|
manual.incomplete = false;
|
|
1691
|
-
bg_compaction_scheduled_++;
|
|
1692
|
-
Env::Priority thread_pool_pri = Env::Priority::LOW;
|
|
1693
1923
|
if (compaction->bottommost_level() &&
|
|
1694
1924
|
env_->GetBackgroundThreads(Env::Priority::BOTTOM) > 0) {
|
|
1695
|
-
|
|
1925
|
+
bg_bottom_compaction_scheduled_++;
|
|
1926
|
+
ca->compaction_pri_ = Env::Priority::BOTTOM;
|
|
1927
|
+
env_->Schedule(&DBImpl::BGWorkBottomCompaction, ca,
|
|
1928
|
+
Env::Priority::BOTTOM,
|
|
1929
|
+
GetTaskTag(TaskType::kManualCompaction),
|
|
1930
|
+
&DBImpl::UnscheduleCompactionCallback);
|
|
1931
|
+
thread_pool_priority = Env::Priority::BOTTOM;
|
|
1932
|
+
} else {
|
|
1933
|
+
bg_compaction_scheduled_++;
|
|
1934
|
+
ca->compaction_pri_ = Env::Priority::LOW;
|
|
1935
|
+
env_->Schedule(&DBImpl::BGWorkCompaction, ca, Env::Priority::LOW,
|
|
1936
|
+
GetTaskTag(TaskType::kManualCompaction),
|
|
1937
|
+
&DBImpl::UnscheduleCompactionCallback);
|
|
1938
|
+
thread_pool_priority = Env::Priority::LOW;
|
|
1696
1939
|
}
|
|
1697
|
-
env_->Schedule(&DBImpl::BGWorkCompaction, ca, thread_pool_pri, this,
|
|
1698
|
-
&DBImpl::UnscheduleCompactionCallback);
|
|
1699
1940
|
scheduled = true;
|
|
1941
|
+
TEST_SYNC_POINT("DBImpl::RunManualCompaction:Scheduled");
|
|
1700
1942
|
}
|
|
1701
1943
|
}
|
|
1702
1944
|
|
|
@@ -1704,6 +1946,13 @@ Status DBImpl::RunManualCompaction(
|
|
|
1704
1946
|
assert(!manual.in_progress);
|
|
1705
1947
|
assert(HasPendingManualCompaction());
|
|
1706
1948
|
RemoveManualCompaction(&manual);
|
|
1949
|
+
// if the manual job is unscheduled, try schedule other jobs in case there's
|
|
1950
|
+
// any unscheduled compaction job which was blocked by exclusive manual
|
|
1951
|
+
// compaction.
|
|
1952
|
+
if (manual.status.IsIncomplete() &&
|
|
1953
|
+
manual.status.subcode() == Status::SubCode::kManualCompactionPaused) {
|
|
1954
|
+
MaybeScheduleFlushOrCompaction();
|
|
1955
|
+
}
|
|
1707
1956
|
bg_cv_.SignalAll();
|
|
1708
1957
|
return manual.status;
|
|
1709
1958
|
}
|
|
@@ -2026,12 +2275,12 @@ Status DBImpl::WaitUntilFlushWouldNotStallWrites(ColumnFamilyData* cfd,
|
|
|
2026
2275
|
// check whether one extra immutable memtable or an extra L0 file would
|
|
2027
2276
|
// cause write stalling mode to be entered. It could still enter stall
|
|
2028
2277
|
// mode due to pending compaction bytes, but that's less common
|
|
2029
|
-
write_stall_condition =
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
|
|
2033
|
-
|
|
2034
|
-
|
|
2278
|
+
write_stall_condition = ColumnFamilyData::GetWriteStallConditionAndCause(
|
|
2279
|
+
cfd->imm()->NumNotFlushed() + 1,
|
|
2280
|
+
vstorage->l0_delay_trigger_count() + 1,
|
|
2281
|
+
vstorage->estimated_compaction_needed_bytes(),
|
|
2282
|
+
mutable_cf_options, *cfd->ioptions())
|
|
2283
|
+
.first;
|
|
2035
2284
|
} while (write_stall_condition != WriteStallCondition::kNormal);
|
|
2036
2285
|
}
|
|
2037
2286
|
return Status::OK();
|
|
@@ -2054,21 +2303,27 @@ Status DBImpl::WaitForFlushMemTables(
|
|
|
2054
2303
|
int num = static_cast<int>(cfds.size());
|
|
2055
2304
|
// Wait until the compaction completes
|
|
2056
2305
|
InstrumentedMutexLock l(&mutex_);
|
|
2306
|
+
Status s;
|
|
2057
2307
|
// If the caller is trying to resume from bg error, then
|
|
2058
2308
|
// error_handler_.IsDBStopped() is true.
|
|
2059
2309
|
while (resuming_from_bg_err || !error_handler_.IsDBStopped()) {
|
|
2060
2310
|
if (shutting_down_.load(std::memory_order_acquire)) {
|
|
2061
|
-
|
|
2311
|
+
s = Status::ShutdownInProgress();
|
|
2312
|
+
return s;
|
|
2062
2313
|
}
|
|
2063
2314
|
// If an error has occurred during resumption, then no need to wait.
|
|
2315
|
+
// But flush operation may fail because of this error, so need to
|
|
2316
|
+
// return the status.
|
|
2064
2317
|
if (!error_handler_.GetRecoveryError().ok()) {
|
|
2318
|
+
s = error_handler_.GetRecoveryError();
|
|
2065
2319
|
break;
|
|
2066
2320
|
}
|
|
2067
2321
|
// If BGWorkStopped, which indicate that there is a BG error and
|
|
2068
2322
|
// 1) soft error but requires no BG work, 2) no in auto_recovery_
|
|
2069
2323
|
if (!resuming_from_bg_err && error_handler_.IsBGWorkStopped() &&
|
|
2070
2324
|
error_handler_.GetBGError().severity() < Status::Severity::kHardError) {
|
|
2071
|
-
|
|
2325
|
+
s = error_handler_.GetBGError();
|
|
2326
|
+
return s;
|
|
2072
2327
|
}
|
|
2073
2328
|
|
|
2074
2329
|
// Number of column families that have been dropped.
|
|
@@ -2086,7 +2341,8 @@ Status DBImpl::WaitForFlushMemTables(
|
|
|
2086
2341
|
}
|
|
2087
2342
|
}
|
|
2088
2343
|
if (1 == num_dropped && 1 == num) {
|
|
2089
|
-
|
|
2344
|
+
s = Status::ColumnFamilyDropped();
|
|
2345
|
+
return s;
|
|
2090
2346
|
}
|
|
2091
2347
|
// Column families involved in this flush request have either been dropped
|
|
2092
2348
|
// or finished flush. Then it's time to finish waiting.
|
|
@@ -2095,7 +2351,6 @@ Status DBImpl::WaitForFlushMemTables(
|
|
|
2095
2351
|
}
|
|
2096
2352
|
bg_cv_.Wait();
|
|
2097
2353
|
}
|
|
2098
|
-
Status s;
|
|
2099
2354
|
// If not resuming from bg error, and an error has caused the DB to stop,
|
|
2100
2355
|
// then report the bg error to caller.
|
|
2101
2356
|
if (!resuming_from_bg_err && error_handler_.IsDBStopped()) {
|
|
@@ -2121,6 +2376,10 @@ Status DBImpl::EnableAutoCompaction(
|
|
|
2121
2376
|
void DBImpl::DisableManualCompaction() {
|
|
2122
2377
|
InstrumentedMutexLock l(&mutex_);
|
|
2123
2378
|
manual_compaction_paused_.fetch_add(1, std::memory_order_release);
|
|
2379
|
+
|
|
2380
|
+
// Wake up manual compactions waiting to start.
|
|
2381
|
+
bg_cv_.SignalAll();
|
|
2382
|
+
|
|
2124
2383
|
// Wait for any pending manual compactions to finish (typically through
|
|
2125
2384
|
// failing with `Status::Incomplete`) prior to returning. This way we are
|
|
2126
2385
|
// guaranteed no pending manual compaction will commit while manual
|
|
@@ -2206,10 +2465,12 @@ void DBImpl::MaybeScheduleFlushOrCompaction() {
|
|
|
2206
2465
|
return;
|
|
2207
2466
|
}
|
|
2208
2467
|
|
|
2209
|
-
while (bg_compaction_scheduled_
|
|
2468
|
+
while (bg_compaction_scheduled_ + bg_bottom_compaction_scheduled_ <
|
|
2469
|
+
bg_job_limits.max_compactions &&
|
|
2210
2470
|
unscheduled_compactions_ > 0) {
|
|
2211
2471
|
CompactionArg* ca = new CompactionArg;
|
|
2212
2472
|
ca->db = this;
|
|
2473
|
+
ca->compaction_pri_ = Env::Priority::LOW;
|
|
2213
2474
|
ca->prepicked_compaction = nullptr;
|
|
2214
2475
|
bg_compaction_scheduled_++;
|
|
2215
2476
|
unscheduled_compactions_--;
|
|
@@ -2322,6 +2583,17 @@ void DBImpl::SchedulePendingFlush(const FlushRequest& flush_req,
|
|
|
2322
2583
|
assert(flush_req.size() == 1);
|
|
2323
2584
|
ColumnFamilyData* cfd = flush_req[0].first;
|
|
2324
2585
|
assert(cfd);
|
|
2586
|
+
// Note: SchedulePendingFlush is always preceded
|
|
2587
|
+
// with an imm()->FlushRequested() call. However,
|
|
2588
|
+
// we want to make this code snipper more resilient to
|
|
2589
|
+
// future changes. Therefore, we add the following if
|
|
2590
|
+
// statement - note that calling it twice (or more)
|
|
2591
|
+
// doesn't break anything.
|
|
2592
|
+
if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
|
|
2593
|
+
// If imm() contains silent memtables,
|
|
2594
|
+
// requesting a flush will mark the imm_needed as true.
|
|
2595
|
+
cfd->imm()->FlushRequested();
|
|
2596
|
+
}
|
|
2325
2597
|
if (!cfd->queued_for_flush() && cfd->imm()->IsFlushPending()) {
|
|
2326
2598
|
cfd->Ref();
|
|
2327
2599
|
cfd->set_queued_for_flush(true);
|
|
@@ -2383,8 +2655,7 @@ void DBImpl::BGWorkBottomCompaction(void* arg) {
|
|
|
2383
2655
|
IOSTATS_SET_THREAD_POOL_ID(Env::Priority::BOTTOM);
|
|
2384
2656
|
TEST_SYNC_POINT("DBImpl::BGWorkBottomCompaction");
|
|
2385
2657
|
auto* prepicked_compaction = ca.prepicked_compaction;
|
|
2386
|
-
assert(prepicked_compaction && prepicked_compaction->compaction
|
|
2387
|
-
!prepicked_compaction->manual_compaction_state);
|
|
2658
|
+
assert(prepicked_compaction && prepicked_compaction->compaction);
|
|
2388
2659
|
ca.db->BackgroundCallCompaction(prepicked_compaction, Env::Priority::BOTTOM);
|
|
2389
2660
|
delete prepicked_compaction;
|
|
2390
2661
|
}
|
|
@@ -2397,10 +2668,27 @@ void DBImpl::BGWorkPurge(void* db) {
|
|
|
2397
2668
|
}
|
|
2398
2669
|
|
|
2399
2670
|
void DBImpl::UnscheduleCompactionCallback(void* arg) {
|
|
2400
|
-
CompactionArg
|
|
2671
|
+
CompactionArg* ca_ptr = reinterpret_cast<CompactionArg*>(arg);
|
|
2672
|
+
Env::Priority compaction_pri = ca_ptr->compaction_pri_;
|
|
2673
|
+
if (Env::Priority::BOTTOM == compaction_pri) {
|
|
2674
|
+
// Decrement bg_bottom_compaction_scheduled_ if priority is BOTTOM
|
|
2675
|
+
ca_ptr->db->bg_bottom_compaction_scheduled_--;
|
|
2676
|
+
} else if (Env::Priority::LOW == compaction_pri) {
|
|
2677
|
+
// Decrement bg_compaction_scheduled_ if priority is LOW
|
|
2678
|
+
ca_ptr->db->bg_compaction_scheduled_--;
|
|
2679
|
+
}
|
|
2680
|
+
CompactionArg ca = *(ca_ptr);
|
|
2401
2681
|
delete reinterpret_cast<CompactionArg*>(arg);
|
|
2402
2682
|
if (ca.prepicked_compaction != nullptr) {
|
|
2683
|
+
// if it's a manual compaction, set status to ManualCompactionPaused
|
|
2684
|
+
if (ca.prepicked_compaction->manual_compaction_state) {
|
|
2685
|
+
ca.prepicked_compaction->manual_compaction_state->done = true;
|
|
2686
|
+
ca.prepicked_compaction->manual_compaction_state->status =
|
|
2687
|
+
Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
2688
|
+
}
|
|
2403
2689
|
if (ca.prepicked_compaction->compaction != nullptr) {
|
|
2690
|
+
ca.prepicked_compaction->compaction->ReleaseCompactionFiles(
|
|
2691
|
+
Status::Incomplete(Status::SubCode::kManualCompactionPaused));
|
|
2404
2692
|
delete ca.prepicked_compaction->compaction;
|
|
2405
2693
|
}
|
|
2406
2694
|
delete ca.prepicked_compaction;
|
|
@@ -2409,6 +2697,14 @@ void DBImpl::UnscheduleCompactionCallback(void* arg) {
|
|
|
2409
2697
|
}
|
|
2410
2698
|
|
|
2411
2699
|
void DBImpl::UnscheduleFlushCallback(void* arg) {
|
|
2700
|
+
// Decrement bg_flush_scheduled_ in flush callback
|
|
2701
|
+
reinterpret_cast<FlushThreadArg*>(arg)->db_->bg_flush_scheduled_--;
|
|
2702
|
+
Env::Priority flush_pri = reinterpret_cast<FlushThreadArg*>(arg)->thread_pri_;
|
|
2703
|
+
if (Env::Priority::LOW == flush_pri) {
|
|
2704
|
+
TEST_SYNC_POINT("DBImpl::UnscheduleLowFlushCallback");
|
|
2705
|
+
} else if (Env::Priority::HIGH == flush_pri) {
|
|
2706
|
+
TEST_SYNC_POINT("DBImpl::UnscheduleHighFlushCallback");
|
|
2707
|
+
}
|
|
2412
2708
|
delete reinterpret_cast<FlushThreadArg*>(arg);
|
|
2413
2709
|
TEST_SYNC_POINT("DBImpl::UnscheduleFlushCallback");
|
|
2414
2710
|
}
|
|
@@ -2446,6 +2742,11 @@ Status DBImpl::BackgroundFlush(bool* made_progress, JobContext* job_context,
|
|
|
2446
2742
|
|
|
2447
2743
|
for (const auto& iter : flush_req) {
|
|
2448
2744
|
ColumnFamilyData* cfd = iter.first;
|
|
2745
|
+
if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
|
|
2746
|
+
// If imm() contains silent memtables,
|
|
2747
|
+
// requesting a flush will mark the imm_needed as true.
|
|
2748
|
+
cfd->imm()->FlushRequested();
|
|
2749
|
+
}
|
|
2449
2750
|
if (cfd->IsDropped() || !cfd->imm()->IsFlushPending()) {
|
|
2450
2751
|
// can't flush this CF, try next one
|
|
2451
2752
|
column_families_not_to_flush.push_back(cfd);
|
|
@@ -2497,10 +2798,12 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
2497
2798
|
bool made_progress = false;
|
|
2498
2799
|
JobContext job_context(next_job_id_.fetch_add(1), true);
|
|
2499
2800
|
|
|
2500
|
-
|
|
2801
|
+
TEST_SYNC_POINT_CALLBACK("DBImpl::BackgroundCallFlush:start", nullptr);
|
|
2501
2802
|
|
|
2502
2803
|
LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
|
|
2503
2804
|
immutable_db_options_.info_log.get());
|
|
2805
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:Start:1");
|
|
2806
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCallFlush:Start:2");
|
|
2504
2807
|
{
|
|
2505
2808
|
InstrumentedMutexLock l(&mutex_);
|
|
2506
2809
|
assert(bg_flush_scheduled_);
|
|
@@ -2529,7 +2832,7 @@ void DBImpl::BackgroundCallFlush(Env::Priority thread_pri) {
|
|
|
2529
2832
|
s.ToString().c_str(), error_cnt);
|
|
2530
2833
|
log_buffer.FlushBufferToLog();
|
|
2531
2834
|
LogFlush(immutable_db_options_.info_log);
|
|
2532
|
-
|
|
2835
|
+
immutable_db_options_.clock->SleepForMicroseconds(1000000);
|
|
2533
2836
|
mutex_.Lock();
|
|
2534
2837
|
}
|
|
2535
2838
|
|
|
@@ -2602,7 +2905,8 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
|
|
|
2602
2905
|
if (s.IsBusy()) {
|
|
2603
2906
|
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
|
|
2604
2907
|
mutex_.Unlock();
|
|
2605
|
-
|
|
2908
|
+
immutable_db_options_.clock->SleepForMicroseconds(
|
|
2909
|
+
10000); // prevent hot loop
|
|
2606
2910
|
mutex_.Lock();
|
|
2607
2911
|
} else if (!s.ok() && !s.IsShutdownInProgress() &&
|
|
2608
2912
|
!s.IsManualCompactionPaused() && !s.IsColumnFamilyDropped()) {
|
|
@@ -2620,9 +2924,10 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
|
|
|
2620
2924
|
"Accumulated background error counts: %" PRIu64,
|
|
2621
2925
|
s.ToString().c_str(), error_cnt);
|
|
2622
2926
|
LogFlush(immutable_db_options_.info_log);
|
|
2623
|
-
|
|
2927
|
+
immutable_db_options_.clock->SleepForMicroseconds(1000000);
|
|
2624
2928
|
mutex_.Lock();
|
|
2625
2929
|
} else if (s.IsManualCompactionPaused()) {
|
|
2930
|
+
assert(prepicked_compaction);
|
|
2626
2931
|
ManualCompactionState* m = prepicked_compaction->manual_compaction_state;
|
|
2627
2932
|
assert(m);
|
|
2628
2933
|
ROCKS_LOG_BUFFER(&log_buffer, "[%s] [JOB %d] Manual compaction paused",
|
|
@@ -2631,9 +2936,9 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
|
|
|
2631
2936
|
|
|
2632
2937
|
ReleaseFileNumberFromPendingOutputs(pending_outputs_inserted_elem);
|
|
2633
2938
|
|
|
2634
|
-
// If compaction failed, we want to delete all temporary files that we
|
|
2635
|
-
// have created (they might not be all recorded in job_context in
|
|
2636
|
-
// failure). Thus, we force full scan in FindObsoleteFiles()
|
|
2939
|
+
// If compaction failed, we want to delete all temporary files that we
|
|
2940
|
+
// might have created (they might not be all recorded in job_context in
|
|
2941
|
+
// case of a failure). Thus, we force full scan in FindObsoleteFiles()
|
|
2637
2942
|
FindObsoleteFiles(&job_context, !s.ok() && !s.IsShutdownInProgress() &&
|
|
2638
2943
|
!s.IsManualCompactionPaused() &&
|
|
2639
2944
|
!s.IsColumnFamilyDropped() &&
|
|
@@ -2660,6 +2965,7 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
|
|
|
2660
2965
|
|
|
2661
2966
|
assert(num_running_compactions_ > 0);
|
|
2662
2967
|
num_running_compactions_--;
|
|
2968
|
+
|
|
2663
2969
|
if (bg_thread_pri == Env::Priority::LOW) {
|
|
2664
2970
|
bg_compaction_scheduled_--;
|
|
2665
2971
|
} else {
|
|
@@ -2667,10 +2973,17 @@ void DBImpl::BackgroundCallCompaction(PrepickedCompaction* prepicked_compaction,
|
|
|
2667
2973
|
bg_bottom_compaction_scheduled_--;
|
|
2668
2974
|
}
|
|
2669
2975
|
|
|
2670
|
-
versions_->GetColumnFamilySet()->FreeDeadColumnFamilies();
|
|
2671
|
-
|
|
2672
2976
|
// See if there's more work to be done
|
|
2673
2977
|
MaybeScheduleFlushOrCompaction();
|
|
2978
|
+
|
|
2979
|
+
if (prepicked_compaction != nullptr &&
|
|
2980
|
+
prepicked_compaction->task_token != nullptr) {
|
|
2981
|
+
// Releasing task tokens affects (and asserts on) the DB state, so
|
|
2982
|
+
// must be done before we potentially signal the DB close process to
|
|
2983
|
+
// proceed below.
|
|
2984
|
+
prepicked_compaction->task_token.reset();
|
|
2985
|
+
}
|
|
2986
|
+
|
|
2674
2987
|
if (made_progress ||
|
|
2675
2988
|
(bg_compaction_scheduled_ == 0 &&
|
|
2676
2989
|
bg_bottom_compaction_scheduled_ == 0) ||
|
|
@@ -2723,6 +3036,9 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
2723
3036
|
} else if (is_manual &&
|
|
2724
3037
|
manual_compaction_paused_.load(std::memory_order_acquire) > 0) {
|
|
2725
3038
|
status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
3039
|
+
} else if (is_manual && manual_compaction->canceled &&
|
|
3040
|
+
manual_compaction->canceled->load(std::memory_order_acquire)) {
|
|
3041
|
+
status = Status::Incomplete(Status::SubCode::kManualCompactionPaused);
|
|
2726
3042
|
}
|
|
2727
3043
|
} else {
|
|
2728
3044
|
status = error_handler_.GetBGError();
|
|
@@ -2752,6 +3068,8 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
2752
3068
|
manual_compaction->in_progress = true;
|
|
2753
3069
|
}
|
|
2754
3070
|
|
|
3071
|
+
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:InProgress");
|
|
3072
|
+
|
|
2755
3073
|
std::unique_ptr<TaskLimiterToken> task_token;
|
|
2756
3074
|
|
|
2757
3075
|
// InternalKey manual_end_storage;
|
|
@@ -2850,7 +3168,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
2850
3168
|
c->column_family_data()
|
|
2851
3169
|
->current()
|
|
2852
3170
|
->storage_info()
|
|
2853
|
-
->ComputeCompactionScore(*(c->
|
|
3171
|
+
->ComputeCompactionScore(*(c->immutable_options()),
|
|
2854
3172
|
*(c->mutable_cf_options()));
|
|
2855
3173
|
AddToCompactionQueue(cfd);
|
|
2856
3174
|
++unscheduled_compactions_;
|
|
@@ -2861,8 +3179,12 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
2861
3179
|
status = Status::CompactionTooLarge();
|
|
2862
3180
|
} else {
|
|
2863
3181
|
// update statistics
|
|
2864
|
-
|
|
2865
|
-
|
|
3182
|
+
size_t num_files = 0;
|
|
3183
|
+
for (auto& each_level : *c->inputs()) {
|
|
3184
|
+
num_files += each_level.files.size();
|
|
3185
|
+
}
|
|
3186
|
+
RecordInHistogram(stats_, NUM_FILES_IN_SINGLE_COMPACTION, num_files);
|
|
3187
|
+
|
|
2866
3188
|
// There are three things that can change compaction score:
|
|
2867
3189
|
// 1) When flush or compaction finish. This case is covered by
|
|
2868
3190
|
// InstallSuperVersionAndScheduleWork
|
|
@@ -2947,13 +3269,13 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
2947
3269
|
for (size_t i = 0; i < c->num_input_files(l); i++) {
|
|
2948
3270
|
FileMetaData* f = c->input(l, i);
|
|
2949
3271
|
c->edit()->DeleteFile(c->level(l), f->fd.GetNumber());
|
|
2950
|
-
c->edit()->AddFile(
|
|
2951
|
-
|
|
2952
|
-
|
|
2953
|
-
|
|
2954
|
-
|
|
2955
|
-
|
|
2956
|
-
|
|
3272
|
+
c->edit()->AddFile(
|
|
3273
|
+
c->output_level(), f->fd.GetNumber(), f->fd.GetPathId(),
|
|
3274
|
+
f->fd.GetFileSize(), f->smallest, f->largest, f->fd.smallest_seqno,
|
|
3275
|
+
f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
|
|
3276
|
+
f->oldest_blob_file_number, f->oldest_ancester_time,
|
|
3277
|
+
f->file_creation_time, f->file_checksum, f->file_checksum_func_name,
|
|
3278
|
+
f->min_timestamp, f->max_timestamp);
|
|
2957
3279
|
|
|
2958
3280
|
ROCKS_LOG_BUFFER(
|
|
2959
3281
|
log_buffer,
|
|
@@ -3010,6 +3332,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3010
3332
|
TEST_SYNC_POINT("DBImpl::BackgroundCompaction:ForwardToBottomPriPool");
|
|
3011
3333
|
CompactionArg* ca = new CompactionArg;
|
|
3012
3334
|
ca->db = this;
|
|
3335
|
+
ca->compaction_pri_ = Env::Priority::BOTTOM;
|
|
3013
3336
|
ca->prepicked_compaction = new PrepickedCompaction;
|
|
3014
3337
|
ca->prepicked_compaction->compaction = c.release();
|
|
3015
3338
|
ca->prepicked_compaction->manual_compaction_state = nullptr;
|
|
@@ -3033,17 +3356,19 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3033
3356
|
assert(is_snapshot_supported_ || snapshots_.empty());
|
|
3034
3357
|
CompactionJob compaction_job(
|
|
3035
3358
|
job_context->job_id, c.get(), immutable_db_options_,
|
|
3036
|
-
file_options_for_compaction_, versions_.get(),
|
|
3037
|
-
|
|
3359
|
+
mutable_db_options_, file_options_for_compaction_, versions_.get(),
|
|
3360
|
+
&shutting_down_, log_buffer, directories_.GetDbDir(),
|
|
3038
3361
|
GetDataDir(c->column_family_data(), c->output_path_id()),
|
|
3039
3362
|
GetDataDir(c->column_family_data(), 0), stats_, &mutex_,
|
|
3040
3363
|
&error_handler_, snapshot_seqs, earliest_write_conflict_snapshot,
|
|
3041
|
-
snapshot_checker, table_cache_, &event_logger_,
|
|
3364
|
+
snapshot_checker, job_context, table_cache_, &event_logger_,
|
|
3042
3365
|
c->mutable_cf_options()->paranoid_file_checks,
|
|
3043
3366
|
c->mutable_cf_options()->report_bg_io_stats, dbname_,
|
|
3044
3367
|
&compaction_job_stats, thread_pri, io_tracer_,
|
|
3045
|
-
is_manual ? &manual_compaction_paused_ : nullptr,
|
|
3046
|
-
|
|
3368
|
+
is_manual ? &manual_compaction_paused_ : nullptr,
|
|
3369
|
+
is_manual ? manual_compaction->canceled : nullptr, db_id_,
|
|
3370
|
+
db_session_id_, c->column_family_data()->GetFullHistoryTsLow(),
|
|
3371
|
+
c->trim_ts(), &blob_callback_);
|
|
3047
3372
|
compaction_job.Prepare();
|
|
3048
3373
|
|
|
3049
3374
|
NotifyOnCompactionBegin(c->column_family_data(), c.get(), status,
|
|
@@ -3122,7 +3447,7 @@ Status DBImpl::BackgroundCompaction(bool* made_progress,
|
|
|
3122
3447
|
c->column_family_data()
|
|
3123
3448
|
->current()
|
|
3124
3449
|
->storage_info()
|
|
3125
|
-
->ComputeCompactionScore(*(c->
|
|
3450
|
+
->ComputeCompactionScore(*(c->immutable_options()),
|
|
3126
3451
|
*(c->mutable_cf_options()));
|
|
3127
3452
|
if (!cfd->queued_for_compaction()) {
|
|
3128
3453
|
AddToCompactionQueue(cfd);
|
|
@@ -3178,6 +3503,7 @@ bool DBImpl::HasPendingManualCompaction() {
|
|
|
3178
3503
|
}
|
|
3179
3504
|
|
|
3180
3505
|
void DBImpl::AddManualCompaction(DBImpl::ManualCompactionState* m) {
|
|
3506
|
+
assert(manual_compaction_paused_ == 0);
|
|
3181
3507
|
manual_compaction_dequeue_.push_back(m);
|
|
3182
3508
|
}
|
|
3183
3509
|
|
|
@@ -3263,7 +3589,7 @@ bool DBImpl::MCOverlap(ManualCompactionState* m, ManualCompactionState* m1) {
|
|
|
3263
3589
|
if (m->cfd != m1->cfd) {
|
|
3264
3590
|
return false;
|
|
3265
3591
|
}
|
|
3266
|
-
return
|
|
3592
|
+
return false;
|
|
3267
3593
|
}
|
|
3268
3594
|
|
|
3269
3595
|
#ifndef ROCKSDB_LITE
|
|
@@ -3287,7 +3613,7 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
3287
3613
|
for (const auto fmd : *c->inputs(i)) {
|
|
3288
3614
|
const FileDescriptor& desc = fmd->fd;
|
|
3289
3615
|
const uint64_t file_number = desc.GetNumber();
|
|
3290
|
-
auto fn = TableFileName(c->
|
|
3616
|
+
auto fn = TableFileName(c->immutable_options()->cf_paths, file_number,
|
|
3291
3617
|
desc.GetPathId());
|
|
3292
3618
|
compaction_job_info->input_files.push_back(fn);
|
|
3293
3619
|
compaction_job_info->input_file_infos.push_back(CompactionFileInfo{
|
|
@@ -3306,10 +3632,34 @@ void DBImpl::BuildCompactionJobInfo(
|
|
|
3306
3632
|
const FileDescriptor& desc = meta.fd;
|
|
3307
3633
|
const uint64_t file_number = desc.GetNumber();
|
|
3308
3634
|
compaction_job_info->output_files.push_back(TableFileName(
|
|
3309
|
-
c->
|
|
3635
|
+
c->immutable_options()->cf_paths, file_number, desc.GetPathId()));
|
|
3310
3636
|
compaction_job_info->output_file_infos.push_back(CompactionFileInfo{
|
|
3311
3637
|
newf.first, file_number, meta.oldest_blob_file_number});
|
|
3312
3638
|
}
|
|
3639
|
+
compaction_job_info->blob_compression_type =
|
|
3640
|
+
c->mutable_cf_options()->blob_compression_type;
|
|
3641
|
+
|
|
3642
|
+
// Update BlobFilesInfo.
|
|
3643
|
+
for (const auto& blob_file : c->edit()->GetBlobFileAdditions()) {
|
|
3644
|
+
BlobFileAdditionInfo blob_file_addition_info(
|
|
3645
|
+
BlobFileName(c->immutable_options()->cf_paths.front().path,
|
|
3646
|
+
blob_file.GetBlobFileNumber()) /*blob_file_path*/,
|
|
3647
|
+
blob_file.GetBlobFileNumber(), blob_file.GetTotalBlobCount(),
|
|
3648
|
+
blob_file.GetTotalBlobBytes());
|
|
3649
|
+
compaction_job_info->blob_file_addition_infos.emplace_back(
|
|
3650
|
+
std::move(blob_file_addition_info));
|
|
3651
|
+
}
|
|
3652
|
+
|
|
3653
|
+
// Update BlobFilesGarbageInfo.
|
|
3654
|
+
for (const auto& blob_file : c->edit()->GetBlobFileGarbages()) {
|
|
3655
|
+
BlobFileGarbageInfo blob_file_garbage_info(
|
|
3656
|
+
BlobFileName(c->immutable_options()->cf_paths.front().path,
|
|
3657
|
+
blob_file.GetBlobFileNumber()) /*blob_file_path*/,
|
|
3658
|
+
blob_file.GetBlobFileNumber(), blob_file.GetGarbageBlobCount(),
|
|
3659
|
+
blob_file.GetGarbageBlobBytes());
|
|
3660
|
+
compaction_job_info->blob_file_garbage_infos.emplace_back(
|
|
3661
|
+
std::move(blob_file_garbage_info));
|
|
3662
|
+
}
|
|
3313
3663
|
}
|
|
3314
3664
|
#endif
|
|
3315
3665
|
|
|
@@ -3342,7 +3692,7 @@ void DBImpl::InstallSuperVersionAndScheduleWork(
|
|
|
3342
3692
|
if (UNLIKELY(sv_context->new_superversion == nullptr)) {
|
|
3343
3693
|
sv_context->NewSuperVersion();
|
|
3344
3694
|
}
|
|
3345
|
-
cfd->InstallSuperVersion(sv_context,
|
|
3695
|
+
cfd->InstallSuperVersion(sv_context, mutable_cf_options);
|
|
3346
3696
|
|
|
3347
3697
|
// There may be a small data race here. The snapshot tricking bottommost
|
|
3348
3698
|
// compaction may already be released here. But assuming there will always be
|
|
@@ -3418,4 +3768,22 @@ void DBImpl::GetSnapshotContext(
|
|
|
3418
3768
|
}
|
|
3419
3769
|
*snapshot_seqs = snapshots_.GetAll(earliest_write_conflict_snapshot);
|
|
3420
3770
|
}
|
|
3771
|
+
|
|
3772
|
+
Status DBImpl::WaitForCompact(bool wait_unscheduled) {
|
|
3773
|
+
// Wait until the compaction completes
|
|
3774
|
+
|
|
3775
|
+
// TODO: a bug here. This function actually does not necessarily
|
|
3776
|
+
// wait for compact. It actually waits for scheduled compaction
|
|
3777
|
+
// OR flush to finish.
|
|
3778
|
+
|
|
3779
|
+
InstrumentedMutexLock l(&mutex_);
|
|
3780
|
+
while ((bg_bottom_compaction_scheduled_ || bg_compaction_scheduled_ ||
|
|
3781
|
+
bg_flush_scheduled_ ||
|
|
3782
|
+
(wait_unscheduled && unscheduled_compactions_)) &&
|
|
3783
|
+
(error_handler_.GetBGError().ok())) {
|
|
3784
|
+
bg_cv_.Wait();
|
|
3785
|
+
}
|
|
3786
|
+
return error_handler_.GetBGError();
|
|
3787
|
+
}
|
|
3788
|
+
|
|
3421
3789
|
} // namespace ROCKSDB_NAMESPACE
|