rocksdb-native 2.2.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.c +92 -10
- package/index.js +9 -0
- package/lib/batch.js +11 -1
- package/lib/iterator.js +3 -1
- package/lib/snapshot.js +21 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/rocksdb-native.bare +0 -0
- package/prebuilds/darwin-arm64/rocksdb-native.node +0 -0
- package/prebuilds/darwin-x64/rocksdb-native.bare +0 -0
- package/prebuilds/darwin-x64/rocksdb-native.node +0 -0
- package/prebuilds/linux-arm64/rocksdb-native.bare +0 -0
- package/prebuilds/linux-arm64/rocksdb-native.node +0 -0
- package/prebuilds/linux-x64/rocksdb-native.bare +0 -0
- package/prebuilds/linux-x64/rocksdb-native.node +0 -0
- package/prebuilds/win32-x64/rocksdb-native.bare +0 -0
- package/prebuilds/win32-x64/rocksdb-native.node +0 -0
- package/vendor/librocksdb/include/rocksdb.h +38 -4
- package/vendor/librocksdb/src/rocksdb.cc +114 -14
- package/vendor/librocksdb/vendor/rocksdb/CMakeLists.txt +21 -4
- package/vendor/librocksdb/vendor/rocksdb/cache/secondary_cache_adapter.cc +6 -3
- package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
- package/vendor/librocksdb/vendor/rocksdb/db/arena_wrapped_db_iter.h +4 -2
- package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.cc +20 -0
- package/vendor/librocksdb/vendor/rocksdb/db/attribute_group_iterator_impl.h +83 -0
- package/vendor/librocksdb/vendor/rocksdb/db/builder.cc +9 -5
- package/vendor/librocksdb/vendor/rocksdb/db/builder.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/db/c.cc +231 -6
- package/vendor/librocksdb/vendor/rocksdb/db/c_test.c +202 -2
- package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.cc +47 -0
- package/vendor/librocksdb/vendor/rocksdb/db/coalescing_iterator.h +79 -0
- package/vendor/librocksdb/vendor/rocksdb/db/column_family.cc +28 -0
- package/vendor/librocksdb/vendor/rocksdb/db/column_family.h +17 -0
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.cc +8 -1
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction.h +11 -9
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.cc +50 -23
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_iterator.h +13 -0
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.cc +22 -25
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_job.h +2 -0
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.cc +8 -1
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_outputs.h +1 -0
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.cc +40 -17
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker.h +20 -14
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_level.cc +11 -6
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_picker_universal.cc +77 -24
- package/vendor/librocksdb/vendor/rocksdb/db/compaction/compaction_service_job.cc +2 -0
- package/vendor/librocksdb/vendor/rocksdb/db/convenience.cc +3 -0
- package/vendor/librocksdb/vendor/rocksdb/db/db_filesnapshot.cc +125 -31
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.cc +457 -231
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl.h +172 -73
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_compaction_flush.cc +152 -133
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_debug.cc +5 -0
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_files.cc +58 -52
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.cc +348 -0
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_follower.h +54 -0
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_open.cc +136 -117
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.cc +4 -3
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_secondary.h +7 -6
- package/vendor/librocksdb/vendor/rocksdb/db/db_impl/db_impl_write.cc +134 -80
- package/vendor/librocksdb/vendor/rocksdb/db/db_iter.cc +11 -0
- package/vendor/librocksdb/vendor/rocksdb/db/db_test2.cc +1 -1
- package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.cc +11 -1
- package/vendor/librocksdb/vendor/rocksdb/db/db_test_util.h +11 -7
- package/vendor/librocksdb/vendor/rocksdb/db/dbformat.cc +19 -4
- package/vendor/librocksdb/vendor/rocksdb/db/dbformat.h +3 -2
- package/vendor/librocksdb/vendor/rocksdb/db/error_handler.cc +34 -39
- package/vendor/librocksdb/vendor/rocksdb/db/error_handler.h +3 -4
- package/vendor/librocksdb/vendor/rocksdb/db/event_helpers.cc +6 -3
- package/vendor/librocksdb/vendor/rocksdb/db/experimental.cc +3 -2
- package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.cc +76 -18
- package/vendor/librocksdb/vendor/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
- package/vendor/librocksdb/vendor/rocksdb/db/flush_job.cc +37 -5
- package/vendor/librocksdb/vendor/rocksdb/db/flush_job.h +14 -0
- package/vendor/librocksdb/vendor/rocksdb/db/import_column_family_job.cc +49 -45
- package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.cc +60 -1
- package/vendor/librocksdb/vendor/rocksdb/db/internal_stats.h +20 -1
- package/vendor/librocksdb/vendor/rocksdb/db/log_reader.cc +15 -6
- package/vendor/librocksdb/vendor/rocksdb/db/log_writer.cc +59 -10
- package/vendor/librocksdb/vendor/rocksdb/db/log_writer.h +8 -0
- package/vendor/librocksdb/vendor/rocksdb/db/memtable.cc +24 -40
- package/vendor/librocksdb/vendor/rocksdb/db/memtable.h +10 -10
- package/vendor/librocksdb/vendor/rocksdb/db/memtable_list.cc +9 -8
- package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator_impl.h +296 -0
- package/vendor/librocksdb/vendor/rocksdb/db/range_tombstone_fragmenter.h +8 -10
- package/vendor/librocksdb/vendor/rocksdb/db/repair.cc +4 -3
- package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.cc +30 -0
- package/vendor/librocksdb/vendor/rocksdb/db/seqno_to_time_mapping.h +9 -0
- package/vendor/librocksdb/vendor/rocksdb/db/table_cache.cc +17 -2
- package/vendor/librocksdb/vendor/rocksdb/db/table_cache.h +9 -1
- package/vendor/librocksdb/vendor/rocksdb/db/table_properties_collector.h +9 -2
- package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.cc +3 -3
- package/vendor/librocksdb/vendor/rocksdb/db/transaction_log_impl.h +7 -7
- package/vendor/librocksdb/vendor/rocksdb/db/version_edit.cc +0 -1
- package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.cc +39 -5
- package/vendor/librocksdb/vendor/rocksdb/db/version_edit_handler.h +24 -15
- package/vendor/librocksdb/vendor/rocksdb/db/version_set.cc +117 -64
- package/vendor/librocksdb/vendor/rocksdb/db/version_set.h +27 -10
- package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.cc +37 -29
- package/vendor/librocksdb/vendor/rocksdb/db/wal_manager.h +6 -5
- package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns.cc +2 -3
- package/vendor/librocksdb/vendor/rocksdb/db/wide/wide_columns_helper.cc +6 -0
- package/vendor/librocksdb/vendor/rocksdb/db/write_batch.cc +89 -31
- package/vendor/librocksdb/vendor/rocksdb/db/write_thread.cc +53 -5
- package/vendor/librocksdb/vendor/rocksdb/db/write_thread.h +36 -4
- package/vendor/librocksdb/vendor/rocksdb/env/composite_env_wrapper.h +21 -0
- package/vendor/librocksdb/vendor/rocksdb/env/env.cc +15 -0
- package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.cc +331 -0
- package/vendor/librocksdb/vendor/rocksdb/env/fs_on_demand.h +139 -0
- package/vendor/librocksdb/vendor/rocksdb/env/io_posix.cc +8 -6
- package/vendor/librocksdb/vendor/rocksdb/env/io_posix.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.cc +130 -27
- package/vendor/librocksdb/vendor/rocksdb/file/delete_scheduler.h +61 -8
- package/vendor/librocksdb/vendor/rocksdb/file/file_util.cc +25 -4
- package/vendor/librocksdb/vendor/rocksdb/file/file_util.h +15 -0
- package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.cc +1 -0
- package/vendor/librocksdb/vendor/rocksdb/file/sequence_file_reader.h +9 -4
- package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.cc +18 -0
- package/vendor/librocksdb/vendor/rocksdb/file/sst_file_manager_impl.h +31 -4
- package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.cc +40 -38
- package/vendor/librocksdb/vendor/rocksdb/file/writable_file_writer.h +48 -15
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/advanced_options.h +12 -3
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/attribute_groups.h +114 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/c.h +90 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/cache.h +5 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/comparator.h +27 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/db.h +71 -12
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/env.h +9 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/experimental.h +5 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/file_system.h +14 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator.h +9 -71
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/iterator_base.h +90 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/listener.h +21 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/options.h +125 -12
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/perf_context.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/sst_file_reader.h +11 -1
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table.h +6 -6
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/table_properties.h +19 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/transaction_log.h +12 -6
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/types.h +12 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/universal_compaction.h +31 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/user_write_callback.h +29 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/cache_dump_load.h +4 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/env_mirror.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/ldb_cmd.h +24 -7
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/option_change_migration.h +4 -4
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/stackable_db.h +24 -5
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction.h +42 -17
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/transaction_db.h +5 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/types_util.h +36 -0
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +71 -3
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/version.h +2 -2
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/wide_columns.h +87 -72
- package/vendor/librocksdb/vendor/rocksdb/include/rocksdb/write_batch_base.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/memory/memory_allocator.cc +1 -0
- package/vendor/librocksdb/vendor/rocksdb/options/cf_options.cc +13 -2
- package/vendor/librocksdb/vendor/rocksdb/options/cf_options.h +6 -2
- package/vendor/librocksdb/vendor/rocksdb/options/db_options.cc +27 -1
- package/vendor/librocksdb/vendor/rocksdb/options/db_options.h +10 -3
- package/vendor/librocksdb/vendor/rocksdb/options/options.cc +3 -0
- package/vendor/librocksdb/vendor/rocksdb/options/options_helper.cc +1 -0
- package/vendor/librocksdb/vendor/rocksdb/port/jemalloc_helper.h +2 -2
- package/vendor/librocksdb/vendor/rocksdb/port/stack_trace.cc +1 -0
- package/vendor/librocksdb/vendor/rocksdb/port/win/port_win.cc +3 -2
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_builder.cc +47 -31
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_factory.cc +15 -0
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.cc +37 -18
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_iterator.h +10 -3
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader.h +15 -7
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_cache.h +31 -0
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/block_prefetcher.cc +6 -0
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/cachable_entry.h +10 -5
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block.h +34 -28
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/filter_policy.cc +12 -3
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.cc +37 -30
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/full_filter_block.h +11 -13
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/hash_index_reader.cc +1 -2
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.cc +62 -53
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_builder.h +60 -38
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.cc +14 -9
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/index_reader_common.h +4 -1
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.cc +135 -94
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_filter_block.h +52 -46
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
- package/vendor/librocksdb/vendor/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
- package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.cc +8 -10
- package/vendor/librocksdb/vendor/rocksdb/table/block_fetcher.h +2 -1
- package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.cc +9 -10
- package/vendor/librocksdb/vendor/rocksdb/table/compaction_merging_iterator.h +3 -2
- package/vendor/librocksdb/vendor/rocksdb/table/format.cc +1 -2
- package/vendor/librocksdb/vendor/rocksdb/table/iterator.cc +4 -0
- package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.cc +18 -13
- package/vendor/librocksdb/vendor/rocksdb/table/merging_iterator.h +5 -3
- package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.cc +18 -4
- package/vendor/librocksdb/vendor/rocksdb/table/meta_blocks.h +4 -0
- package/vendor/librocksdb/vendor/rocksdb/table/plain/plain_table_builder.cc +2 -2
- package/vendor/librocksdb/vendor/rocksdb/table/sst_file_dumper.cc +6 -6
- package/vendor/librocksdb/vendor/rocksdb/table/sst_file_reader.cc +24 -2
- package/vendor/librocksdb/vendor/rocksdb/table/sst_file_writer_collectors.h +3 -1
- package/vendor/librocksdb/vendor/rocksdb/table/table_builder.h +8 -7
- package/vendor/librocksdb/vendor/rocksdb/table/table_iterator.h +69 -0
- package/vendor/librocksdb/vendor/rocksdb/table/table_reader.h +9 -0
- package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.cc +25 -0
- package/vendor/librocksdb/vendor/rocksdb/test_util/testutil.h +12 -0
- package/vendor/librocksdb/vendor/rocksdb/tools/db_bench_tool.cc +32 -0
- package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd.cc +618 -124
- package/vendor/librocksdb/vendor/rocksdb/tools/ldb_cmd_impl.h +19 -1
- package/vendor/librocksdb/vendor/rocksdb/tools/ldb_tool.cc +9 -0
- package/vendor/librocksdb/vendor/rocksdb/util/aligned_storage.h +24 -0
- package/vendor/librocksdb/vendor/rocksdb/util/autovector.h +4 -0
- package/vendor/librocksdb/vendor/rocksdb/util/comparator.cc +12 -0
- package/vendor/librocksdb/vendor/rocksdb/util/filter_bench.cc +1 -1
- package/vendor/librocksdb/vendor/rocksdb/util/random.cc +2 -1
- package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.cc +3 -4
- package/vendor/librocksdb/vendor/rocksdb/util/stderr_logger.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/util/udt_util.cc +33 -0
- package/vendor/librocksdb/vendor/rocksdb/util/udt_util.h +7 -0
- package/vendor/librocksdb/vendor/rocksdb/util/write_batch_util.h +5 -0
- package/vendor/librocksdb/vendor/rocksdb/util/xxhash.h +36 -29
- package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl.h +3 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc +20 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.cc +29 -9
- package/vendor/librocksdb/vendor/rocksdb/utilities/cache_dump_load_impl.h +14 -3
- package/vendor/librocksdb/vendor/rocksdb/utilities/debug.cc +16 -4
- package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.cc +677 -248
- package/vendor/librocksdb/vendor/rocksdb/utilities/fault_injection_fs.h +325 -158
- package/vendor/librocksdb/vendor/rocksdb/utilities/option_change_migration/option_change_migration.cc +1 -8
- package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/lock/range/range_tree/lib/util/growable_array.h +3 -3
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.cc +116 -20
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction.h +33 -1
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +78 -13
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/pessimistic_transaction_db.h +33 -1
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.cc +106 -7
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_base.h +68 -10
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_test.h +7 -3
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.cc +8 -5
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/transaction_util.h +7 -4
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -12
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_prepared_txn_db.h +17 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn.cc +11 -9
- package/vendor/librocksdb/vendor/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
- package/vendor/librocksdb/vendor/rocksdb/utilities/types_util.cc +88 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +313 -14
- package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +7 -0
- package/vendor/librocksdb/vendor/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
- package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.cc +0 -102
- package/vendor/librocksdb/vendor/rocksdb/db/multi_cf_iterator.h +0 -159
|
@@ -250,8 +250,6 @@ void ErrorHandler::CancelErrorRecovery() {
|
|
|
250
250
|
EndAutoRecovery();
|
|
251
251
|
}
|
|
252
252
|
|
|
253
|
-
STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
|
|
254
|
-
|
|
255
253
|
// This is the main function for looking at an error during a background
|
|
256
254
|
// operation and deciding the severity, and error recovery strategy. The high
|
|
257
255
|
// level algorithm is as follows -
|
|
@@ -270,11 +268,11 @@ STATIC_AVOID_DESTRUCTION(const Status, kOkStatus){Status::OK()};
|
|
|
270
268
|
// This can also get called as part of a recovery operation. In that case, we
|
|
271
269
|
// also track the error separately in recovery_error_ so we can tell in the
|
|
272
270
|
// end whether recovery succeeded or not
|
|
273
|
-
|
|
274
|
-
|
|
271
|
+
void ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|
272
|
+
BackgroundErrorReason reason) {
|
|
275
273
|
db_mutex_->AssertHeld();
|
|
276
274
|
if (bg_err.ok()) {
|
|
277
|
-
return
|
|
275
|
+
return;
|
|
278
276
|
}
|
|
279
277
|
|
|
280
278
|
ROCKS_LOG_INFO(db_options_.info_log,
|
|
@@ -339,7 +337,7 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|
|
339
337
|
} else {
|
|
340
338
|
// This error is less severe than previously encountered error. Don't
|
|
341
339
|
// take any further action
|
|
342
|
-
return
|
|
340
|
+
return;
|
|
343
341
|
}
|
|
344
342
|
}
|
|
345
343
|
|
|
@@ -356,7 +354,6 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|
|
356
354
|
if (bg_error_.severity() >= Status::Severity::kHardError) {
|
|
357
355
|
is_db_stopped_.store(true, std::memory_order_release);
|
|
358
356
|
}
|
|
359
|
-
return bg_error_;
|
|
360
357
|
}
|
|
361
358
|
|
|
362
359
|
// This is the main function for looking at IO related error during the
|
|
@@ -383,14 +380,14 @@ const Status& ErrorHandler::HandleKnownErrors(const Status& bg_err,
|
|
|
383
380
|
// 3) for other cases, HandleKnownErrors(const Status& bg_err,
|
|
384
381
|
// BackgroundErrorReason reason) will be called to handle other error cases
|
|
385
382
|
// such as delegating to SstFileManager to handle no space error.
|
|
386
|
-
|
|
387
|
-
|
|
383
|
+
void ErrorHandler::SetBGError(const Status& bg_status,
|
|
384
|
+
BackgroundErrorReason reason) {
|
|
388
385
|
db_mutex_->AssertHeld();
|
|
389
386
|
Status tmp_status = bg_status;
|
|
390
387
|
IOStatus bg_io_err = status_to_io_status(std::move(tmp_status));
|
|
391
388
|
|
|
392
389
|
if (bg_io_err.ok()) {
|
|
393
|
-
return
|
|
390
|
+
return;
|
|
394
391
|
}
|
|
395
392
|
ROCKS_LOG_WARN(db_options_.info_log, "Background IO error %s",
|
|
396
393
|
bg_io_err.ToString().c_str());
|
|
@@ -413,11 +410,11 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|
|
413
410
|
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
|
414
411
|
&bg_err, db_mutex_, &auto_recovery);
|
|
415
412
|
recover_context_ = context;
|
|
416
|
-
return
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
413
|
+
return;
|
|
414
|
+
}
|
|
415
|
+
if (bg_io_err.subcode() != IOStatus::SubCode::kNoSpace &&
|
|
416
|
+
(bg_io_err.GetScope() == IOStatus::IOErrorScope::kIOErrorScopeFile ||
|
|
417
|
+
bg_io_err.GetRetryable())) {
|
|
421
418
|
// Second, check if the error is a retryable IO error (file scope IO error
|
|
422
419
|
// is also treated as retryable IO error in RocksDB write path). if it is
|
|
423
420
|
// retryable error and its severity is higher than bg_error_, overwrite the
|
|
@@ -426,10 +423,6 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|
|
426
423
|
// IO error as hard error. Note that, all the NoSpace error should be
|
|
427
424
|
// handled by the SstFileManager::StartErrorRecovery(). Therefore, no matter
|
|
428
425
|
// it is retryable or file scope, this logic will be bypassed.
|
|
429
|
-
bool auto_recovery = false;
|
|
430
|
-
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
|
431
|
-
&new_bg_io_err, db_mutex_,
|
|
432
|
-
&auto_recovery);
|
|
433
426
|
|
|
434
427
|
RecordStats({ERROR_HANDLER_BG_RETRYABLE_IO_ERROR_COUNT},
|
|
435
428
|
{} /* int_histograms */);
|
|
@@ -445,9 +438,13 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|
|
445
438
|
ROCKS_LOG_INFO(
|
|
446
439
|
db_options_.info_log,
|
|
447
440
|
"ErrorHandler: Compaction will schedule by itself to resume\n");
|
|
441
|
+
bool auto_recovery = false;
|
|
442
|
+
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
|
443
|
+
&new_bg_io_err, db_mutex_,
|
|
444
|
+
&auto_recovery);
|
|
448
445
|
// Not used in this code path.
|
|
449
446
|
new_bg_io_err.PermitUncheckedError();
|
|
450
|
-
return
|
|
447
|
+
return;
|
|
451
448
|
}
|
|
452
449
|
|
|
453
450
|
Status::Severity severity;
|
|
@@ -469,10 +466,14 @@ const Status& ErrorHandler::SetBGError(const Status& bg_status,
|
|
|
469
466
|
Status bg_err(new_bg_io_err, severity);
|
|
470
467
|
CheckAndSetRecoveryAndBGError(bg_err);
|
|
471
468
|
recover_context_ = context;
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
469
|
+
bool auto_recovery = db_options_.max_bgerror_resume_count > 0;
|
|
470
|
+
EventHelpers::NotifyOnBackgroundError(db_options_.listeners, reason,
|
|
471
|
+
&new_bg_io_err, db_mutex_,
|
|
472
|
+
&auto_recovery);
|
|
473
|
+
StartRecoverFromRetryableBGIOError(bg_io_err);
|
|
474
|
+
return;
|
|
475
475
|
}
|
|
476
|
+
HandleKnownErrors(new_bg_io_err, reason);
|
|
476
477
|
}
|
|
477
478
|
|
|
478
479
|
void ErrorHandler::AddFilesToQuarantine(
|
|
@@ -620,23 +621,23 @@ Status ErrorHandler::RecoverFromBGError(bool is_manual) {
|
|
|
620
621
|
return s;
|
|
621
622
|
}
|
|
622
623
|
|
|
623
|
-
|
|
624
|
+
void ErrorHandler::StartRecoverFromRetryableBGIOError(
|
|
624
625
|
const IOStatus& io_error) {
|
|
625
626
|
db_mutex_->AssertHeld();
|
|
626
|
-
if (bg_error_.ok()) {
|
|
627
|
-
return
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
627
|
+
if (bg_error_.ok() || io_error.ok()) {
|
|
628
|
+
return;
|
|
629
|
+
}
|
|
630
|
+
if (db_options_.max_bgerror_resume_count <= 0 || recovery_in_prog_) {
|
|
631
|
+
// Auto resume BG error is not enabled
|
|
632
|
+
return;
|
|
633
|
+
}
|
|
634
|
+
if (end_recovery_) {
|
|
634
635
|
// Can temporarily release db mutex
|
|
635
636
|
EventHelpers::NotifyOnErrorRecoveryEnd(db_options_.listeners, bg_error_,
|
|
636
637
|
Status::ShutdownInProgress(),
|
|
637
638
|
db_mutex_);
|
|
638
639
|
db_mutex_->AssertHeld();
|
|
639
|
-
return
|
|
640
|
+
return;
|
|
640
641
|
}
|
|
641
642
|
RecordStats({ERROR_HANDLER_AUTORESUME_COUNT}, {} /* int_histograms */);
|
|
642
643
|
ROCKS_LOG_INFO(
|
|
@@ -664,12 +665,6 @@ const Status& ErrorHandler::StartRecoverFromRetryableBGIOError(
|
|
|
664
665
|
|
|
665
666
|
recovery_thread_.reset(
|
|
666
667
|
new port::Thread(&ErrorHandler::RecoverFromRetryableBGIOError, this));
|
|
667
|
-
|
|
668
|
-
if (recovery_error_.ok()) {
|
|
669
|
-
return recovery_error_;
|
|
670
|
-
} else {
|
|
671
|
-
return bg_error_;
|
|
672
|
-
}
|
|
673
668
|
}
|
|
674
669
|
|
|
675
670
|
// Automatic recover from Retryable BG IO error. Must be called after db
|
|
@@ -56,7 +56,7 @@ class ErrorHandler {
|
|
|
56
56
|
Status::Severity GetErrorSeverity(BackgroundErrorReason reason,
|
|
57
57
|
Status::Code code, Status::SubCode subcode);
|
|
58
58
|
|
|
59
|
-
|
|
59
|
+
void SetBGError(const Status& bg_err, BackgroundErrorReason reason);
|
|
60
60
|
|
|
61
61
|
Status GetBGError() const { return bg_error_; }
|
|
62
62
|
|
|
@@ -135,11 +135,10 @@ class ErrorHandler {
|
|
|
135
135
|
// unsorted.
|
|
136
136
|
autovector<uint64_t> files_to_quarantine_;
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
BackgroundErrorReason reason);
|
|
138
|
+
void HandleKnownErrors(const Status& bg_err, BackgroundErrorReason reason);
|
|
140
139
|
Status OverrideNoSpaceError(const Status& bg_error, bool* auto_recovery);
|
|
141
140
|
void RecoverFromNoSpace();
|
|
142
|
-
|
|
141
|
+
void StartRecoverFromRetryableBGIOError(const IOStatus& io_error);
|
|
143
142
|
void RecoverFromRetryableBGIOError();
|
|
144
143
|
// First, if it is in recovery and the recovery_error is ok. Set the
|
|
145
144
|
// recovery_error_ to bg_err. Second, if the severity is higher than the
|
|
@@ -228,15 +228,18 @@ void EventHelpers::NotifyOnErrorRecoveryEnd(
|
|
|
228
228
|
InstrumentedMutex* db_mutex) {
|
|
229
229
|
if (!listeners.empty()) {
|
|
230
230
|
db_mutex->AssertHeld();
|
|
231
|
+
// Make copies before releasing mutex to avoid race.
|
|
232
|
+
Status old_bg_error_cp = old_bg_error;
|
|
233
|
+
Status new_bg_error_cp = new_bg_error;
|
|
231
234
|
// release lock while notifying events
|
|
232
235
|
db_mutex->Unlock();
|
|
233
236
|
TEST_SYNC_POINT("NotifyOnErrorRecoveryEnd:MutexUnlocked:1");
|
|
234
237
|
TEST_SYNC_POINT("NotifyOnErrorRecoveryEnd:MutexUnlocked:2");
|
|
235
238
|
for (auto& listener : listeners) {
|
|
236
239
|
BackgroundErrorRecoveryInfo info;
|
|
237
|
-
info.old_bg_error =
|
|
238
|
-
info.new_bg_error =
|
|
239
|
-
listener->OnErrorRecoveryCompleted(
|
|
240
|
+
info.old_bg_error = old_bg_error_cp;
|
|
241
|
+
info.new_bg_error = new_bg_error_cp;
|
|
242
|
+
listener->OnErrorRecoveryCompleted(old_bg_error_cp);
|
|
240
243
|
listener->OnErrorRecoveryEnd(info);
|
|
241
244
|
info.old_bg_error.PermitUncheckedError();
|
|
242
245
|
info.new_bg_error.PermitUncheckedError();
|
|
@@ -711,7 +711,7 @@ class SstQueryFilterConfigsManagerImpl : public SstQueryFilterConfigsManager {
|
|
|
711
711
|
uint64_t /*file_size*/) override {
|
|
712
712
|
// FIXME later: `key` might contain user timestamp. That should be
|
|
713
713
|
// exposed properly in a future update to TablePropertiesCollector
|
|
714
|
-
|
|
714
|
+
extracted.Reset();
|
|
715
715
|
if (extractor) {
|
|
716
716
|
extractor->Extract(key, KeySegmentsExtractor::kFullUserKey, &extracted);
|
|
717
717
|
if (UNLIKELY(extracted.category >=
|
|
@@ -750,7 +750,7 @@ class SstQueryFilterConfigsManagerImpl : public SstQueryFilterConfigsManager {
|
|
|
750
750
|
}
|
|
751
751
|
}
|
|
752
752
|
prev_key.assign(key.data(), key.size());
|
|
753
|
-
|
|
753
|
+
std::swap(prev_extracted, extracted);
|
|
754
754
|
first_key = false;
|
|
755
755
|
return Status::OK();
|
|
756
756
|
}
|
|
@@ -859,6 +859,7 @@ class SstQueryFilterConfigsManagerImpl : public SstQueryFilterConfigsManager {
|
|
|
859
859
|
std::vector<std::shared_ptr<SstQueryFilterBuilder>> builders;
|
|
860
860
|
bool first_key = true;
|
|
861
861
|
std::string prev_key;
|
|
862
|
+
KeySegmentsExtractor::Result extracted;
|
|
862
863
|
KeySegmentsExtractor::Result prev_extracted;
|
|
863
864
|
KeySegmentsExtractor::KeyCategorySet categories_seen;
|
|
864
865
|
};
|
|
@@ -44,9 +44,12 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
44
44
|
return status;
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
+
// Files generated in another DB or CF may have a different column family
|
|
48
|
+
// ID, so we let it pass here.
|
|
47
49
|
if (file_to_ingest.cf_id !=
|
|
48
50
|
TablePropertiesCollectorFactory::Context::kUnknownColumnFamily &&
|
|
49
|
-
file_to_ingest.cf_id != cfd_->GetID()
|
|
51
|
+
file_to_ingest.cf_id != cfd_->GetID() &&
|
|
52
|
+
!ingestion_options_.allow_db_generated_files) {
|
|
50
53
|
return Status::InvalidArgument(
|
|
51
54
|
"External file column family id don't match");
|
|
52
55
|
}
|
|
@@ -111,6 +114,7 @@ Status ExternalSstFileIngestionJob::Prepare(
|
|
|
111
114
|
const std::string path_inside_db = TableFileName(
|
|
112
115
|
cfd_->ioptions()->cf_paths, f.fd.GetNumber(), f.fd.GetPathId());
|
|
113
116
|
if (ingestion_options_.move_files) {
|
|
117
|
+
assert(!ingestion_options_.allow_db_generated_files);
|
|
114
118
|
status =
|
|
115
119
|
fs_->LinkFile(path_outside_db, path_inside_db, IOOptions(), nullptr);
|
|
116
120
|
if (status.ok()) {
|
|
@@ -342,8 +346,7 @@ Status ExternalSstFileIngestionJob::NeedsFlush(bool* flush_needed,
|
|
|
342
346
|
autovector<UserKeyRange> ranges;
|
|
343
347
|
ranges.reserve(n);
|
|
344
348
|
for (const IngestedFileInfo& file_to_ingest : files_to_ingest_) {
|
|
345
|
-
ranges.emplace_back(file_to_ingest.
|
|
346
|
-
file_to_ingest.largest_internal_key.user_key());
|
|
349
|
+
ranges.emplace_back(file_to_ingest.start_ukey, file_to_ingest.limit_ukey);
|
|
347
350
|
}
|
|
348
351
|
Status status = cfd_->RangesOverlapWithMemtables(
|
|
349
352
|
ranges, super_version, db_options_.allow_data_in_errors, flush_needed);
|
|
@@ -705,9 +708,16 @@ Status ExternalSstFileIngestionJob::SanityCheckTableProperties(
|
|
|
705
708
|
// Get table version
|
|
706
709
|
auto version_iter = uprops.find(ExternalSstFilePropertyNames::kVersion);
|
|
707
710
|
if (version_iter == uprops.end()) {
|
|
708
|
-
|
|
711
|
+
if (!ingestion_options_.allow_db_generated_files) {
|
|
712
|
+
return Status::Corruption("External file version not found");
|
|
713
|
+
} else {
|
|
714
|
+
// 0 is special version for when a file from live DB does not have the
|
|
715
|
+
// version table property
|
|
716
|
+
file_to_ingest->version = 0;
|
|
717
|
+
}
|
|
718
|
+
} else {
|
|
719
|
+
file_to_ingest->version = DecodeFixed32(version_iter->second.c_str());
|
|
709
720
|
}
|
|
710
|
-
file_to_ingest->version = DecodeFixed32(version_iter->second.c_str());
|
|
711
721
|
|
|
712
722
|
auto seqno_iter = uprops.find(ExternalSstFilePropertyNames::kGlobalSeqno);
|
|
713
723
|
if (file_to_ingest->version == 2) {
|
|
@@ -734,8 +744,15 @@ Status ExternalSstFileIngestionJob::SanityCheckTableProperties(
|
|
|
734
744
|
return Status::InvalidArgument(
|
|
735
745
|
"External SST file V1 does not support global seqno");
|
|
736
746
|
}
|
|
747
|
+
} else if (file_to_ingest->version == 0) {
|
|
748
|
+
// allow_db_generated_files is true
|
|
749
|
+
assert(seqno_iter == uprops.end());
|
|
750
|
+
file_to_ingest->original_seqno = 0;
|
|
751
|
+
file_to_ingest->global_seqno_offset = 0;
|
|
737
752
|
} else {
|
|
738
|
-
return Status::InvalidArgument("External file version
|
|
753
|
+
return Status::InvalidArgument("External file version " +
|
|
754
|
+
std::to_string(file_to_ingest->version) +
|
|
755
|
+
" is not supported");
|
|
739
756
|
}
|
|
740
757
|
|
|
741
758
|
file_to_ingest->cf_id = static_cast<uint32_t>(props->column_family_id);
|
|
@@ -897,6 +914,25 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
897
914
|
} else if (!iter->status().ok()) {
|
|
898
915
|
return iter->status();
|
|
899
916
|
}
|
|
917
|
+
if (ingestion_options_.allow_db_generated_files) {
|
|
918
|
+
// Verify that all keys have seqno zero.
|
|
919
|
+
// TODO: store largest seqno in table property and validate it instead.
|
|
920
|
+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
921
|
+
Status pik_status =
|
|
922
|
+
ParseInternalKey(iter->key(), &key, allow_data_in_errors);
|
|
923
|
+
if (!pik_status.ok()) {
|
|
924
|
+
return Status::Corruption("Corrupted key in external file. ",
|
|
925
|
+
pik_status.getState());
|
|
926
|
+
}
|
|
927
|
+
if (key.sequence != 0) {
|
|
928
|
+
return Status::NotSupported(
|
|
929
|
+
"External file has a key with non zero sequence number.");
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
if (!iter->status().ok()) {
|
|
933
|
+
return iter->status();
|
|
934
|
+
}
|
|
935
|
+
}
|
|
900
936
|
|
|
901
937
|
std::unique_ptr<InternalIterator> range_del_iter(
|
|
902
938
|
table_reader->NewRangeTombstoneIterator(ro));
|
|
@@ -912,6 +948,11 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
912
948
|
return Status::Corruption("Corrupted key in external file. ",
|
|
913
949
|
pik_status.getState());
|
|
914
950
|
}
|
|
951
|
+
if (key.sequence != 0) {
|
|
952
|
+
return Status::Corruption(
|
|
953
|
+
"External file has a range deletion with non zero sequence "
|
|
954
|
+
"number.");
|
|
955
|
+
}
|
|
915
956
|
RangeTombstone tombstone(key, range_del_iter->value());
|
|
916
957
|
|
|
917
958
|
InternalKey start_key = tombstone.SerializeKey();
|
|
@@ -930,6 +971,17 @@ Status ExternalSstFileIngestionJob::GetIngestedFileInfo(
|
|
|
930
971
|
}
|
|
931
972
|
}
|
|
932
973
|
|
|
974
|
+
const size_t ts_sz = ucmp->timestamp_size();
|
|
975
|
+
Slice smallest = file_to_ingest->smallest_internal_key.user_key();
|
|
976
|
+
Slice largest = file_to_ingest->largest_internal_key.user_key();
|
|
977
|
+
if (ts_sz > 0) {
|
|
978
|
+
AppendUserKeyWithMaxTimestamp(&file_to_ingest->start_ukey, smallest, ts_sz);
|
|
979
|
+
AppendUserKeyWithMinTimestamp(&file_to_ingest->limit_ukey, largest, ts_sz);
|
|
980
|
+
} else {
|
|
981
|
+
file_to_ingest->start_ukey.assign(smallest.data(), smallest.size());
|
|
982
|
+
file_to_ingest->limit_ukey.assign(largest.data(), largest.size());
|
|
983
|
+
}
|
|
984
|
+
|
|
933
985
|
auto s =
|
|
934
986
|
GetSstInternalUniqueId(file_to_ingest->table_properties.db_id,
|
|
935
987
|
file_to_ingest->table_properties.db_session_id,
|
|
@@ -953,13 +1005,15 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
|
953
1005
|
*assigned_seqno = 0;
|
|
954
1006
|
auto ucmp = cfd_->user_comparator();
|
|
955
1007
|
const size_t ts_sz = ucmp->timestamp_size();
|
|
956
|
-
if (force_global_seqno || files_overlap_
|
|
1008
|
+
if (force_global_seqno || files_overlap_ ||
|
|
1009
|
+
compaction_style == kCompactionStyleFIFO) {
|
|
957
1010
|
*assigned_seqno = last_seqno + 1;
|
|
958
1011
|
// If files overlap, we have to ingest them at level 0.
|
|
959
|
-
if (files_overlap_) {
|
|
1012
|
+
if (files_overlap_ || compaction_style == kCompactionStyleFIFO) {
|
|
960
1013
|
assert(ts_sz == 0);
|
|
961
1014
|
file_to_ingest->picked_level = 0;
|
|
962
|
-
if (ingestion_options_.fail_if_not_bottommost_level
|
|
1015
|
+
if (ingestion_options_.fail_if_not_bottommost_level &&
|
|
1016
|
+
cfd_->NumberLevels() > 1) {
|
|
963
1017
|
status = Status::TryAgain(
|
|
964
1018
|
"Files cannot be ingested to Lmax. Please make sure key range of "
|
|
965
1019
|
"Lmax does not overlap with files to ingest.");
|
|
@@ -980,9 +1034,8 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
|
980
1034
|
if (lvl > 0 && lvl < vstorage->base_level()) {
|
|
981
1035
|
continue;
|
|
982
1036
|
}
|
|
983
|
-
if (cfd_->RangeOverlapWithCompaction(
|
|
984
|
-
|
|
985
|
-
file_to_ingest->largest_internal_key.user_key(), lvl)) {
|
|
1037
|
+
if (cfd_->RangeOverlapWithCompaction(file_to_ingest->start_ukey,
|
|
1038
|
+
file_to_ingest->limit_ukey, lvl)) {
|
|
986
1039
|
// We must use L0 or any level higher than `lvl` to be able to overwrite
|
|
987
1040
|
// the compaction output keys that we overlap with in this level, We also
|
|
988
1041
|
// need to assign this file a seqno to overwrite the compaction output
|
|
@@ -992,9 +1045,8 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
|
992
1045
|
} else if (vstorage->NumLevelFiles(lvl) > 0) {
|
|
993
1046
|
bool overlap_with_level = false;
|
|
994
1047
|
status = sv->current->OverlapWithLevelIterator(
|
|
995
|
-
ro, env_options_, file_to_ingest->
|
|
996
|
-
file_to_ingest->
|
|
997
|
-
&overlap_with_level);
|
|
1048
|
+
ro, env_options_, file_to_ingest->start_ukey,
|
|
1049
|
+
file_to_ingest->limit_ukey, lvl, &overlap_with_level);
|
|
998
1050
|
if (!status.ok()) {
|
|
999
1051
|
return status;
|
|
1000
1052
|
}
|
|
@@ -1035,11 +1087,18 @@ Status ExternalSstFileIngestionJob::AssignLevelAndSeqnoForIngestedFile(
|
|
|
1035
1087
|
"Column family enables user-defined timestamps, please make sure the "
|
|
1036
1088
|
"key range (without timestamp) of external file does not overlap "
|
|
1037
1089
|
"with key range (without timestamp) in the db");
|
|
1090
|
+
return status;
|
|
1038
1091
|
}
|
|
1039
1092
|
if (*assigned_seqno == 0) {
|
|
1040
1093
|
*assigned_seqno = last_seqno + 1;
|
|
1041
1094
|
}
|
|
1042
1095
|
}
|
|
1096
|
+
|
|
1097
|
+
if (ingestion_options_.allow_db_generated_files && *assigned_seqno != 0) {
|
|
1098
|
+
return Status::InvalidArgument(
|
|
1099
|
+
"An ingested file is assigned to a non-zero sequence number, which is "
|
|
1100
|
+
"incompatible with ingestion option allow_db_generated_files.");
|
|
1101
|
+
}
|
|
1043
1102
|
return status;
|
|
1044
1103
|
}
|
|
1045
1104
|
|
|
@@ -1163,9 +1222,8 @@ bool ExternalSstFileIngestionJob::IngestedFileFitInLevel(
|
|
|
1163
1222
|
}
|
|
1164
1223
|
|
|
1165
1224
|
auto* vstorage = cfd_->current()->storage_info();
|
|
1166
|
-
Slice file_smallest_user_key(
|
|
1167
|
-
|
|
1168
|
-
Slice file_largest_user_key(file_to_ingest->largest_internal_key.user_key());
|
|
1225
|
+
Slice file_smallest_user_key(file_to_ingest->start_ukey);
|
|
1226
|
+
Slice file_largest_user_key(file_to_ingest->limit_ukey);
|
|
1169
1227
|
|
|
1170
1228
|
if (vstorage->OverlapInLevel(level, &file_smallest_user_key,
|
|
1171
1229
|
&file_largest_user_key)) {
|
|
@@ -32,6 +32,17 @@ struct IngestedFileInfo {
|
|
|
32
32
|
InternalKey smallest_internal_key;
|
|
33
33
|
// Largest internal key in external file
|
|
34
34
|
InternalKey largest_internal_key;
|
|
35
|
+
// NOTE: use below two fields for all `*Overlap*` types of checks instead of
|
|
36
|
+
// smallest_internal_key.user_key() and largest_internal_key.user_key().
|
|
37
|
+
// The smallest / largest user key contained in the file for key range checks.
|
|
38
|
+
// These could be different from smallest_internal_key.user_key(), and
|
|
39
|
+
// largest_internal_key.user_key() when user-defined timestamps are enabled,
|
|
40
|
+
// because the check is about making sure the user key without timestamps part
|
|
41
|
+
// does not overlap. To achieve that, the smallest user key will be updated
|
|
42
|
+
// with the maximum timestamp while the largest user key will be updated with
|
|
43
|
+
// the min timestamp. It's otherwise the same.
|
|
44
|
+
std::string start_ukey;
|
|
45
|
+
std::string limit_ukey;
|
|
35
46
|
// Sequence number for keys in external file
|
|
36
47
|
SequenceNumber original_seqno;
|
|
37
48
|
// Offset of the global sequence number field in the file, will
|
|
@@ -115,6 +115,9 @@ FlushJob::FlushJob(
|
|
|
115
115
|
db_mutex_(db_mutex),
|
|
116
116
|
shutting_down_(shutting_down),
|
|
117
117
|
existing_snapshots_(std::move(existing_snapshots)),
|
|
118
|
+
earliest_snapshot_(existing_snapshots_.empty()
|
|
119
|
+
? kMaxSequenceNumber
|
|
120
|
+
: existing_snapshots_.at(0)),
|
|
118
121
|
earliest_write_conflict_snapshot_(earliest_write_conflict_snapshot),
|
|
119
122
|
snapshot_checker_(snapshot_checker),
|
|
120
123
|
job_context_(job_context),
|
|
@@ -194,6 +197,7 @@ void FlushJob::PickMemTable() {
|
|
|
194
197
|
// Track effective cutoff user-defined timestamp during flush if
|
|
195
198
|
// user-defined timestamps can be stripped.
|
|
196
199
|
GetEffectiveCutoffUDTForPickedMemTables();
|
|
200
|
+
GetPrecludeLastLevelMinSeqno();
|
|
197
201
|
|
|
198
202
|
ReportFlushInputSize(mems_);
|
|
199
203
|
|
|
@@ -231,7 +235,7 @@ Status FlushJob::Run(LogsWithPrepTracker* prep_tracker, FileMetaData* file_meta,
|
|
|
231
235
|
|
|
232
236
|
AutoThreadOperationStageUpdater stage_run(ThreadStatus::STAGE_FLUSH_RUN);
|
|
233
237
|
if (mems_.empty()) {
|
|
234
|
-
ROCKS_LOG_BUFFER(log_buffer_, "[%s]
|
|
238
|
+
ROCKS_LOG_BUFFER(log_buffer_, "[%s] No memtable to flush",
|
|
235
239
|
cfd_->GetName().c_str());
|
|
236
240
|
return Status::OK();
|
|
237
241
|
}
|
|
@@ -502,7 +506,7 @@ Status FlushJob::MemPurge() {
|
|
|
502
506
|
const std::atomic<bool> kManualCompactionCanceledFalse{false};
|
|
503
507
|
CompactionIterator c_iter(
|
|
504
508
|
iter.get(), (cfd_->internal_comparator()).user_comparator(), &merge,
|
|
505
|
-
kMaxSequenceNumber, &existing_snapshots_,
|
|
509
|
+
kMaxSequenceNumber, &existing_snapshots_, earliest_snapshot_,
|
|
506
510
|
earliest_write_conflict_snapshot_, job_snapshot_seq, snapshot_checker_,
|
|
507
511
|
env, ShouldReportDetailedTime(env, ioptions->stats),
|
|
508
512
|
true /* internal key corruption is not ok */, range_del_agg.get(),
|
|
@@ -968,14 +972,17 @@ Status FlushJob::WriteLevel0Table() {
|
|
|
968
972
|
cfd_->GetID(), cfd_->GetName(), 0 /* level */,
|
|
969
973
|
false /* is_bottommost */, TableFileCreationReason::kFlush,
|
|
970
974
|
oldest_key_time, current_time, db_id_, db_session_id_,
|
|
971
|
-
0 /* target_file_size */, meta_.fd.GetNumber()
|
|
975
|
+
0 /* target_file_size */, meta_.fd.GetNumber(),
|
|
976
|
+
preclude_last_level_min_seqno_ == kMaxSequenceNumber
|
|
977
|
+
? preclude_last_level_min_seqno_
|
|
978
|
+
: std::min(earliest_snapshot_, preclude_last_level_min_seqno_));
|
|
972
979
|
const SequenceNumber job_snapshot_seq =
|
|
973
980
|
job_context_->GetJobSnapshotSequence();
|
|
974
981
|
|
|
975
982
|
s = BuildTable(
|
|
976
983
|
dbname_, versions_, db_options_, tboptions, file_options_,
|
|
977
984
|
cfd_->table_cache(), iter.get(), std::move(range_del_iters), &meta_,
|
|
978
|
-
&blob_file_additions, existing_snapshots_,
|
|
985
|
+
&blob_file_additions, existing_snapshots_, earliest_snapshot_,
|
|
979
986
|
earliest_write_conflict_snapshot_, job_snapshot_seq,
|
|
980
987
|
snapshot_checker_, mutable_cf_options_.paranoid_file_checks,
|
|
981
988
|
cfd_->internal_stats(), &io_s, io_tracer_,
|
|
@@ -1010,10 +1017,15 @@ Status FlushJob::WriteLevel0Table() {
|
|
|
1010
1017
|
ROCKS_LOG_BUFFER(log_buffer_,
|
|
1011
1018
|
"[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": %" PRIu64
|
|
1012
1019
|
" bytes %s"
|
|
1013
|
-
"%s"
|
|
1020
|
+
" %s"
|
|
1021
|
+
" %s",
|
|
1014
1022
|
cfd_->GetName().c_str(), job_context_->job_id,
|
|
1015
1023
|
meta_.fd.GetNumber(), meta_.fd.GetFileSize(),
|
|
1016
1024
|
s.ToString().c_str(),
|
|
1025
|
+
s.ok() && meta_.fd.GetFileSize() == 0
|
|
1026
|
+
? "It's an empty SST file from a successful flush so "
|
|
1027
|
+
"won't be kept in the DB"
|
|
1028
|
+
: "",
|
|
1017
1029
|
meta_.marked_for_compaction ? " (needs compaction)" : "");
|
|
1018
1030
|
|
|
1019
1031
|
if (s.ok() && output_file_directory_ != nullptr && sync_output_directory_) {
|
|
@@ -1154,6 +1166,26 @@ void FlushJob::GetEffectiveCutoffUDTForPickedMemTables() {
|
|
|
1154
1166
|
}
|
|
1155
1167
|
}
|
|
1156
1168
|
|
|
1169
|
+
void FlushJob::GetPrecludeLastLevelMinSeqno() {
|
|
1170
|
+
if (cfd_->ioptions()->preclude_last_level_data_seconds == 0) {
|
|
1171
|
+
return;
|
|
1172
|
+
}
|
|
1173
|
+
int64_t current_time = 0;
|
|
1174
|
+
Status s = db_options_.clock->GetCurrentTime(¤t_time);
|
|
1175
|
+
if (!s.ok()) {
|
|
1176
|
+
ROCKS_LOG_WARN(db_options_.info_log,
|
|
1177
|
+
"Failed to get current time in Flush: Status: %s",
|
|
1178
|
+
s.ToString().c_str());
|
|
1179
|
+
} else {
|
|
1180
|
+
SequenceNumber preserve_time_min_seqno;
|
|
1181
|
+
seqno_to_time_mapping_->GetCurrentTieringCutoffSeqnos(
|
|
1182
|
+
static_cast<uint64_t>(current_time),
|
|
1183
|
+
cfd_->ioptions()->preserve_internal_time_seconds,
|
|
1184
|
+
cfd_->ioptions()->preclude_last_level_data_seconds,
|
|
1185
|
+
&preserve_time_min_seqno, &preclude_last_level_min_seqno_);
|
|
1186
|
+
}
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1157
1189
|
Status FlushJob::MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT() {
|
|
1158
1190
|
db_mutex_->AssertHeld();
|
|
1159
1191
|
const auto* ucmp = cfd_->user_comparator();
|
|
@@ -143,6 +143,13 @@ class FlushJob {
|
|
|
143
143
|
// `MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT` for details.
|
|
144
144
|
void GetEffectiveCutoffUDTForPickedMemTables();
|
|
145
145
|
|
|
146
|
+
// If this column family enables tiering feature, it will find the current
|
|
147
|
+
// `preclude_last_level_min_seqno_`, and the smaller one between this and
|
|
148
|
+
// the `earliset_snapshot_` will later be announced to user property
|
|
149
|
+
// collectors. It indicates to tiering use cases which data are old enough to
|
|
150
|
+
// be placed on the last level.
|
|
151
|
+
void GetPrecludeLastLevelMinSeqno();
|
|
152
|
+
|
|
146
153
|
Status MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT();
|
|
147
154
|
|
|
148
155
|
const std::string& dbname_;
|
|
@@ -161,6 +168,7 @@ class FlushJob {
|
|
|
161
168
|
InstrumentedMutex* db_mutex_;
|
|
162
169
|
std::atomic<bool>* shutting_down_;
|
|
163
170
|
std::vector<SequenceNumber> existing_snapshots_;
|
|
171
|
+
SequenceNumber earliest_snapshot_;
|
|
164
172
|
SequenceNumber earliest_write_conflict_snapshot_;
|
|
165
173
|
SnapshotChecker* snapshot_checker_;
|
|
166
174
|
JobContext* job_context_;
|
|
@@ -221,6 +229,12 @@ class FlushJob {
|
|
|
221
229
|
// Keeps track of the newest user-defined timestamp for this flush job if
|
|
222
230
|
// `persist_user_defined_timestamps` flag is false.
|
|
223
231
|
std::string cutoff_udt_;
|
|
232
|
+
|
|
233
|
+
// The current minimum seqno that compaction jobs will preclude the data from
|
|
234
|
+
// the last level. Data with seqnos larger than this or larger than
|
|
235
|
+
// `earliest_snapshot_` will be output to the penultimate level had it gone
|
|
236
|
+
// through a compaction to the last level.
|
|
237
|
+
SequenceNumber preclude_last_level_min_seqno_ = kMaxSequenceNumber;
|
|
224
238
|
};
|
|
225
239
|
|
|
226
240
|
} // namespace ROCKSDB_NAMESPACE
|