@nxtedition/rocksdb 8.2.7 → 9.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +7 -1
- package/deps/rocksdb/rocksdb/Makefile +22 -19
- package/deps/rocksdb/rocksdb/TARGETS +8 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +157 -61
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +43 -92
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +632 -455
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +244 -149
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +41 -13
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +11 -1
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +216 -17
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +7 -5
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +279 -199
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +159 -8
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +28 -2
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -0
- package/deps/rocksdb/rocksdb/crash_test.mk +14 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -21
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.h +1 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_sequential_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +32 -7
- package/deps/rocksdb/rocksdb/db/c.cc +169 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +104 -6
- package/deps/rocksdb/rocksdb/db/column_family.cc +98 -47
- package/deps/rocksdb/rocksdb/db/column_family.h +25 -2
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +213 -2
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +93 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +33 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +7 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +107 -43
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +15 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +25 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +29 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +24 -31
- package/deps/rocksdb/rocksdb/db/compaction/file_pri.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +19 -19
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/convenience.cc +20 -3
- package/deps/rocksdb/rocksdb/db/convenience_impl.h +15 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/cuckoo_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +17 -3
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +15 -15
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +666 -44
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +2 -29
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +274 -1
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +40 -19
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +6 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +250 -116
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +51 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +354 -96
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +6 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +26 -13
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +13 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +61 -21
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -87
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +7 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +2 -2
- package/deps/rocksdb/rocksdb/db/db_iter.h +1 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +4 -11
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +39 -29
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +26 -36
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +106 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +12 -3
- package/deps/rocksdb/rocksdb/db/db_statistics_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +279 -166
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -21
- package/deps/rocksdb/rocksdb/db/db_test2.cc +81 -12
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +14 -6
- package/deps/rocksdb/rocksdb/db/db_test_util.h +40 -0
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +13 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +233 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/db_write_buffer_manager_test.cc +6 -6
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/dbformat.cc +36 -0
- package/deps/rocksdb/rocksdb/db/dbformat.h +169 -20
- package/deps/rocksdb/rocksdb/db/dbformat_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +16 -0
- package/deps/rocksdb/rocksdb/db/error_handler.h +6 -3
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +4 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +2 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +17 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +86 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/file_indexer.cc +2 -4
- package/deps/rocksdb/rocksdb/db/flush_job.cc +101 -11
- package/deps/rocksdb/rocksdb/db/flush_job.h +24 -1
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +88 -11
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +2 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +159 -91
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +19 -10
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +143 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +13 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -0
- package/deps/rocksdb/rocksdb/db/listener_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/log_reader.h +3 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +17 -21
- package/deps/rocksdb/rocksdb/db/log_writer.cc +1 -1
- package/deps/rocksdb/rocksdb/db/log_writer.h +3 -2
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -3
- package/deps/rocksdb/rocksdb/db/memtable.cc +52 -13
- package/deps/rocksdb/rocksdb/db/memtable.h +45 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +44 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.h +32 -1
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +90 -4
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +21 -4
- package/deps/rocksdb/rocksdb/db/repair_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +5 -4
- package/deps/rocksdb/rocksdb/db/table_cache.cc +44 -35
- package/deps/rocksdb/rocksdb/db/table_cache.h +6 -6
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +236 -204
- package/deps/rocksdb/rocksdb/db/version_edit.cc +66 -4
- package/deps/rocksdb/rocksdb/db/version_edit.h +48 -6
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +80 -8
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +12 -0
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +86 -17
- package/deps/rocksdb/rocksdb/db/version_set.cc +136 -41
- package/deps/rocksdb/rocksdb/db/version_set.h +28 -7
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +25 -15
- package/deps/rocksdb/rocksdb/db/write_batch.cc +11 -0
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +42 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +32 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +247 -120
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +9 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +13 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_value.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +15 -27
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +264 -69
- package/deps/rocksdb/rocksdb/env/env.cc +1 -2
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +11 -165
- package/deps/rocksdb/rocksdb/env/env_encryption_ctr.h +0 -17
- package/deps/rocksdb/rocksdb/env/env_posix.cc +6 -2
- package/deps/rocksdb/rocksdb/env/env_test.cc +86 -2
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +6 -4
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +78 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.h +34 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler.cc +1 -0
- package/deps/rocksdb/rocksdb/file/delete_scheduler_test.cc +15 -4
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +52 -43
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +34 -18
- package/deps/rocksdb/rocksdb/file/file_util.cc +10 -5
- package/deps/rocksdb/rocksdb/file/file_util.h +13 -1
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +724 -79
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +64 -33
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +3 -16
- package/deps/rocksdb/rocksdb/file/random_access_file_reader_test.cc +23 -12
- package/deps/rocksdb/rocksdb/file/sequence_file_reader.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +153 -88
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +70 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +50 -11
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +16 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +55 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +32 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -109
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +90 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +85 -17
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +13 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +5 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +21 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +7 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +33 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +0 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +3 -0
- package/deps/rocksdb/rocksdb/memory/arena_test.cc +18 -11
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -1
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +69 -34
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +16 -1
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +10 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +19 -0
- package/deps/rocksdb/rocksdb/options/cf_options.h +10 -2
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/options/db_options.cc +7 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +1 -0
- package/deps/rocksdb/rocksdb/options/options.cc +15 -1
- package/deps/rocksdb/rocksdb/options/options_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +11 -3
- package/deps/rocksdb/rocksdb/options/options_test.cc +8 -0
- package/deps/rocksdb/rocksdb/port/mmap.h +20 -0
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +27 -12
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -1
- package/deps/rocksdb/rocksdb/src.mk +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +48 -22
- package/deps/rocksdb/rocksdb/table/block_based/block.h +60 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +115 -42
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +60 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +62 -44
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +36 -14
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +38 -15
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +219 -51
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +41 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +25 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +50 -21
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +11 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +195 -55
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +31 -16
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +97 -58
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +27 -12
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +114 -70
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +9 -6
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.cc +15 -3
- package/deps/rocksdb/rocksdb/table/block_based/reader_common.h +6 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +11 -11
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +3 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +1 -0
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc +1 -2
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc +2 -3
- package/deps/rocksdb/rocksdb/table/format.cc +175 -33
- package/deps/rocksdb/rocksdb/table/format.h +63 -10
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +10 -2
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +12 -4
- package/deps/rocksdb/rocksdb/table/meta_blocks.h +1 -0
- package/deps/rocksdb/rocksdb/table/mock_table.cc +8 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +10 -5
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.h +10 -1
- package/deps/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc +1 -2
- package/deps/rocksdb/rocksdb/table/plain/plain_table_reader.cc +3 -3
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +12 -3
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +26 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +6 -2
- package/deps/rocksdb/rocksdb/table/table_properties.cc +6 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +52 -22
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +19 -7
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +3 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +29 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +19 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +65 -26
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +4 -0
- package/deps/rocksdb/rocksdb/unreleased_history/README.txt +73 -0
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +27 -0
- package/deps/rocksdb/rocksdb/unreleased_history/behavior_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/bug_fixes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/new_features/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/performance_improvements/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/public_api_changes/.gitkeep +0 -0
- package/deps/rocksdb/rocksdb/unreleased_history/release.sh +104 -0
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +5 -0
- package/deps/rocksdb/rocksdb/util/bloom_impl.h +3 -3
- package/deps/rocksdb/rocksdb/util/cast_util.h +14 -0
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +29 -7
- package/deps/rocksdb/rocksdb/util/compression.cc +4 -4
- package/deps/rocksdb/rocksdb/util/compression.h +110 -32
- package/deps/rocksdb/rocksdb/util/core_local.h +2 -1
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +4 -4
- package/deps/rocksdb/rocksdb/util/filelock_test.cc +3 -0
- package/deps/rocksdb/rocksdb/util/hash.h +7 -3
- package/deps/rocksdb/rocksdb/util/hash_test.cc +44 -0
- package/deps/rocksdb/rocksdb/util/math.h +58 -6
- package/deps/rocksdb/rocksdb/util/math128.h +29 -7
- package/deps/rocksdb/rocksdb/util/mutexlock.h +35 -27
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +1 -0
- package/deps/rocksdb/rocksdb/util/stop_watch.h +1 -1
- package/deps/rocksdb/rocksdb/util/thread_operation.h +8 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +343 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +173 -1
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +447 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.cc +25 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +80 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +69 -25
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h +7 -6
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_file.cc +6 -11
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -2
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +4 -5
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +2 -1
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc +1 -2
- package/deps/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc +2 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc +23 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc +9 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h +37 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +231 -33
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +76 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +40 -23
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +13 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +7 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +41 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +71 -24
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h +19 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +60 -107
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +39 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h +6 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +14 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +10 -5
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +6 -6
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -182,9 +182,8 @@ class LogTest
|
|
|
182
182
|
|
|
183
183
|
Slice* get_reader_contents() { return &reader_contents_; }
|
|
184
184
|
|
|
185
|
-
void Write(
|
|
186
|
-
|
|
187
|
-
const std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
|
|
185
|
+
void Write(const std::string& msg,
|
|
186
|
+
const UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
|
|
188
187
|
if (cf_to_ts_sz != nullptr && !cf_to_ts_sz->empty()) {
|
|
189
188
|
ASSERT_OK(writer_->MaybeAddUserDefinedTimestampSizeRecord(*cf_to_ts_sz));
|
|
190
189
|
}
|
|
@@ -193,10 +192,9 @@ class LogTest
|
|
|
193
192
|
|
|
194
193
|
size_t WrittenBytes() const { return dest_contents().size(); }
|
|
195
194
|
|
|
196
|
-
std::string Read(
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
std::unordered_map<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
|
|
195
|
+
std::string Read(const WALRecoveryMode wal_recovery_mode =
|
|
196
|
+
WALRecoveryMode::kTolerateCorruptedTailRecords,
|
|
197
|
+
UnorderedMap<uint32_t, size_t>* cf_to_ts_sz = nullptr) {
|
|
200
198
|
std::string scratch;
|
|
201
199
|
Slice record;
|
|
202
200
|
bool ret = false;
|
|
@@ -270,9 +268,8 @@ class LogTest
|
|
|
270
268
|
}
|
|
271
269
|
|
|
272
270
|
void CheckRecordAndTimestampSize(
|
|
273
|
-
std::string record,
|
|
274
|
-
|
|
275
|
-
std::unordered_map<uint32_t, size_t> recorded_ts_sz;
|
|
271
|
+
std::string record, UnorderedMap<uint32_t, size_t>& expected_ts_sz) {
|
|
272
|
+
UnorderedMap<uint32_t, size_t> recorded_ts_sz;
|
|
276
273
|
ASSERT_EQ(record,
|
|
277
274
|
Read(WALRecoveryMode::
|
|
278
275
|
kTolerateCorruptedTailRecords /* wal_recovery_mode */,
|
|
@@ -297,18 +294,18 @@ TEST_P(LogTest, ReadWrite) {
|
|
|
297
294
|
}
|
|
298
295
|
|
|
299
296
|
TEST_P(LogTest, ReadWriteWithTimestampSize) {
|
|
300
|
-
|
|
297
|
+
UnorderedMap<uint32_t, size_t> ts_sz_one = {
|
|
301
298
|
{1, sizeof(uint64_t)},
|
|
302
299
|
};
|
|
303
300
|
Write("foo", &ts_sz_one);
|
|
304
301
|
Write("bar");
|
|
305
|
-
|
|
302
|
+
UnorderedMap<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
|
|
306
303
|
Write("", &ts_sz_two);
|
|
307
304
|
Write("xxxx");
|
|
308
305
|
|
|
309
306
|
CheckRecordAndTimestampSize("foo", ts_sz_one);
|
|
310
307
|
CheckRecordAndTimestampSize("bar", ts_sz_one);
|
|
311
|
-
|
|
308
|
+
UnorderedMap<uint32_t, size_t> expected_ts_sz_two;
|
|
312
309
|
// User-defined timestamp size records are accumulated and applied to
|
|
313
310
|
// subsequent records.
|
|
314
311
|
expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
|
|
@@ -320,10 +317,9 @@ TEST_P(LogTest, ReadWriteWithTimestampSize) {
|
|
|
320
317
|
}
|
|
321
318
|
|
|
322
319
|
TEST_P(LogTest, ReadWriteWithTimestampSizeZeroTimestampIgnored) {
|
|
323
|
-
|
|
320
|
+
UnorderedMap<uint32_t, size_t> ts_sz_one = {{1, sizeof(uint64_t)}};
|
|
324
321
|
Write("foo", &ts_sz_one);
|
|
325
|
-
|
|
326
|
-
ts_sz_one.end());
|
|
322
|
+
UnorderedMap<uint32_t, size_t> ts_sz_two(ts_sz_one.begin(), ts_sz_one.end());
|
|
327
323
|
ts_sz_two.insert(std::make_pair(2, 0));
|
|
328
324
|
Write("bar", &ts_sz_two);
|
|
329
325
|
|
|
@@ -749,7 +745,7 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
|
|
|
749
745
|
if (!recyclable_log) {
|
|
750
746
|
return; // test is only valid for recycled logs
|
|
751
747
|
}
|
|
752
|
-
|
|
748
|
+
UnorderedMap<uint32_t, size_t> ts_sz_one = {
|
|
753
749
|
{1, sizeof(uint32_t)},
|
|
754
750
|
};
|
|
755
751
|
Write("foo", &ts_sz_one);
|
|
@@ -765,7 +761,7 @@ TEST_P(LogTest, RecycleWithTimestampSize) {
|
|
|
765
761
|
std::unique_ptr<WritableFileWriter> dest_holder(new WritableFileWriter(
|
|
766
762
|
std::move(sink), "" /* don't care */, FileOptions()));
|
|
767
763
|
Writer recycle_writer(std::move(dest_holder), 123, true);
|
|
768
|
-
|
|
764
|
+
UnorderedMap<uint32_t, size_t> ts_sz_two = {
|
|
769
765
|
{2, sizeof(uint64_t)},
|
|
770
766
|
};
|
|
771
767
|
ASSERT_OK(recycle_writer.MaybeAddUserDefinedTimestampSizeRecord(ts_sz_two));
|
|
@@ -1039,18 +1035,18 @@ TEST_P(CompressionLogTest, ReadWriteWithTimestampSize) {
|
|
|
1039
1035
|
return;
|
|
1040
1036
|
}
|
|
1041
1037
|
ASSERT_OK(SetupTestEnv());
|
|
1042
|
-
|
|
1038
|
+
UnorderedMap<uint32_t, size_t> ts_sz_one = {
|
|
1043
1039
|
{1, sizeof(uint64_t)},
|
|
1044
1040
|
};
|
|
1045
1041
|
Write("foo", &ts_sz_one);
|
|
1046
1042
|
Write("bar");
|
|
1047
|
-
|
|
1043
|
+
UnorderedMap<uint32_t, size_t> ts_sz_two = {{2, sizeof(char)}};
|
|
1048
1044
|
Write("", &ts_sz_two);
|
|
1049
1045
|
Write("xxxx");
|
|
1050
1046
|
|
|
1051
1047
|
CheckRecordAndTimestampSize("foo", ts_sz_one);
|
|
1052
1048
|
CheckRecordAndTimestampSize("bar", ts_sz_one);
|
|
1053
|
-
|
|
1049
|
+
UnorderedMap<uint32_t, size_t> expected_ts_sz_two;
|
|
1054
1050
|
// User-defined timestamp size records are accumulated and applied to
|
|
1055
1051
|
// subsequent records.
|
|
1056
1052
|
expected_ts_sz_two.insert(ts_sz_one.begin(), ts_sz_one.end());
|
|
@@ -197,7 +197,7 @@ IOStatus Writer::AddCompressionTypeRecord() {
|
|
|
197
197
|
}
|
|
198
198
|
|
|
199
199
|
IOStatus Writer::MaybeAddUserDefinedTimestampSizeRecord(
|
|
200
|
-
const
|
|
200
|
+
const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
|
|
201
201
|
Env::IOPriority rate_limiter_priority) {
|
|
202
202
|
std::vector<std::pair<uint32_t, size_t>> ts_sz_to_record;
|
|
203
203
|
for (const auto& [cf_id, ts_sz] : cf_to_ts_sz) {
|
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
#include "rocksdb/slice.h"
|
|
21
21
|
#include "rocksdb/status.h"
|
|
22
22
|
#include "util/compression.h"
|
|
23
|
+
#include "util/hash_containers.h"
|
|
23
24
|
|
|
24
25
|
namespace ROCKSDB_NAMESPACE {
|
|
25
26
|
|
|
@@ -95,7 +96,7 @@ class Writer {
|
|
|
95
96
|
// kRecyclableUserDefinedTimestampSizeType for these column families.
|
|
96
97
|
// This timestamp size record applies to all subsequent records.
|
|
97
98
|
IOStatus MaybeAddUserDefinedTimestampSizeRecord(
|
|
98
|
-
const
|
|
99
|
+
const UnorderedMap<uint32_t, size_t>& cf_to_ts_sz,
|
|
99
100
|
Env::IOPriority rate_limiter_priority = Env::IO_TOTAL);
|
|
100
101
|
|
|
101
102
|
WritableFileWriter* file() { return dest_.get(); }
|
|
@@ -137,7 +138,7 @@ class Writer {
|
|
|
137
138
|
// The recorded user-defined timestamp size that have been written so far.
|
|
138
139
|
// Since the user-defined timestamp size cannot be changed while the DB is
|
|
139
140
|
// running, existing entry in this map cannot be updated.
|
|
140
|
-
|
|
141
|
+
UnorderedMap<uint32_t, size_t> recorded_cf_to_ts_sz_;
|
|
141
142
|
};
|
|
142
143
|
|
|
143
144
|
} // namespace log
|
|
@@ -190,6 +190,7 @@ TEST_F(ManualCompactionTest, Test) {
|
|
|
190
190
|
TEST_F(ManualCompactionTest, SkipLevel) {
|
|
191
191
|
DB* db;
|
|
192
192
|
Options options;
|
|
193
|
+
options.level_compaction_dynamic_level_bytes = false;
|
|
193
194
|
options.num_levels = 3;
|
|
194
195
|
// Initially, flushed L0 files won't exceed 100.
|
|
195
196
|
options.level0_file_num_compaction_trigger = 100;
|
|
@@ -286,9 +287,9 @@ TEST_F(ManualCompactionTest, SkipLevel) {
|
|
|
286
287
|
filter->Reset();
|
|
287
288
|
ASSERT_OK(db->CompactRange(CompactRangeOptions(), &start, nullptr));
|
|
288
289
|
ASSERT_EQ(4, filter->NumKeys());
|
|
289
|
-
// 1 is first compacted to L1 and then
|
|
290
|
-
//
|
|
291
|
-
ASSERT_EQ(
|
|
290
|
+
// 1 is first compacted from L0 to L1, and then L1 intra level compaction
|
|
291
|
+
// compacts [2, 4, 8] only.
|
|
292
|
+
ASSERT_EQ(0, filter->KeyLevel("1"));
|
|
292
293
|
ASSERT_EQ(1, filter->KeyLevel("2"));
|
|
293
294
|
ASSERT_EQ(1, filter->KeyLevel("4"));
|
|
294
295
|
ASSERT_EQ(1, filter->KeyLevel("8"));
|
|
@@ -95,6 +95,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
|
95
95
|
data_size_(0),
|
|
96
96
|
num_entries_(0),
|
|
97
97
|
num_deletes_(0),
|
|
98
|
+
num_range_deletes_(0),
|
|
98
99
|
write_buffer_size_(mutable_cf_options.write_buffer_size),
|
|
99
100
|
flush_in_progress_(false),
|
|
100
101
|
flush_completed_(false),
|
|
@@ -114,7 +115,9 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
|
114
115
|
ioptions.memtable_insert_with_hint_prefix_extractor.get()),
|
|
115
116
|
oldest_key_time_(std::numeric_limits<uint64_t>::max()),
|
|
116
117
|
atomic_flush_seqno_(kMaxSequenceNumber),
|
|
117
|
-
approximate_memory_usage_(0)
|
|
118
|
+
approximate_memory_usage_(0),
|
|
119
|
+
memtable_max_range_deletions_(
|
|
120
|
+
mutable_cf_options.memtable_max_range_deletions) {
|
|
118
121
|
UpdateFlushState();
|
|
119
122
|
// something went wrong if we need to flush before inserting anything
|
|
120
123
|
assert(!ShouldScheduleFlush());
|
|
@@ -143,6 +146,10 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
|
143
146
|
new_cache.get()),
|
|
144
147
|
std::memory_order_relaxed);
|
|
145
148
|
}
|
|
149
|
+
const Comparator* ucmp = cmp.user_comparator();
|
|
150
|
+
assert(ucmp);
|
|
151
|
+
ts_sz_ = ucmp->timestamp_size();
|
|
152
|
+
persist_user_defined_timestamps_ = ioptions.persist_user_defined_timestamps;
|
|
146
153
|
}
|
|
147
154
|
|
|
148
155
|
MemTable::~MemTable() {
|
|
@@ -170,6 +177,14 @@ size_t MemTable::ApproximateMemoryUsage() {
|
|
|
170
177
|
}
|
|
171
178
|
|
|
172
179
|
bool MemTable::ShouldFlushNow() {
|
|
180
|
+
// This is set if memtable_max_range_deletions is > 0,
|
|
181
|
+
// and that many range deletions are done
|
|
182
|
+
if (memtable_max_range_deletions_ > 0 &&
|
|
183
|
+
num_range_deletes_.load(std::memory_order_relaxed) >=
|
|
184
|
+
static_cast<uint64_t>(memtable_max_range_deletions_)) {
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
|
|
173
188
|
size_t write_buffer_size = write_buffer_size_.load(std::memory_order_relaxed);
|
|
174
189
|
// In a lot of times, we cannot allocate arena blocks that exactly matches the
|
|
175
190
|
// buffer size. Thus we have to decide if we should over-allocate or
|
|
@@ -357,7 +372,8 @@ class MemTableIterator : public InternalIterator {
|
|
|
357
372
|
!mem.GetImmutableMemTableOptions()->inplace_update_support),
|
|
358
373
|
protection_bytes_per_key_(mem.moptions_.protection_bytes_per_key),
|
|
359
374
|
status_(Status::OK()),
|
|
360
|
-
logger_(mem.moptions_.info_log)
|
|
375
|
+
logger_(mem.moptions_.info_log),
|
|
376
|
+
ts_sz_(mem.ts_sz_) {
|
|
361
377
|
if (use_range_del_table) {
|
|
362
378
|
iter_ = mem.range_del_table_->GetIterator(arena);
|
|
363
379
|
} else if (prefix_extractor_ != nullptr && !read_options.total_order_seek &&
|
|
@@ -400,8 +416,7 @@ class MemTableIterator : public InternalIterator {
|
|
|
400
416
|
PERF_COUNTER_ADD(seek_on_memtable_count, 1);
|
|
401
417
|
if (bloom_) {
|
|
402
418
|
// iterator should only use prefix bloom filter
|
|
403
|
-
|
|
404
|
-
Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
|
|
419
|
+
Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz_));
|
|
405
420
|
if (prefix_extractor_->InDomain(user_k_without_ts)) {
|
|
406
421
|
if (!bloom_->MayContain(
|
|
407
422
|
prefix_extractor_->Transform(user_k_without_ts))) {
|
|
@@ -421,8 +436,7 @@ class MemTableIterator : public InternalIterator {
|
|
|
421
436
|
PERF_TIMER_GUARD(seek_on_memtable_time);
|
|
422
437
|
PERF_COUNTER_ADD(seek_on_memtable_count, 1);
|
|
423
438
|
if (bloom_) {
|
|
424
|
-
|
|
425
|
-
Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz));
|
|
439
|
+
Slice user_k_without_ts(ExtractUserKeyAndStripTimestamp(k, ts_sz_));
|
|
426
440
|
if (prefix_extractor_->InDomain(user_k_without_ts)) {
|
|
427
441
|
if (!bloom_->MayContain(
|
|
428
442
|
prefix_extractor_->Transform(user_k_without_ts))) {
|
|
@@ -512,6 +526,7 @@ class MemTableIterator : public InternalIterator {
|
|
|
512
526
|
uint32_t protection_bytes_per_key_;
|
|
513
527
|
Status status_;
|
|
514
528
|
Logger* logger_;
|
|
529
|
+
size_t ts_sz_;
|
|
515
530
|
|
|
516
531
|
void VerifyEntryChecksum() {
|
|
517
532
|
if (protection_bytes_per_key_ > 0 && Valid()) {
|
|
@@ -625,8 +640,7 @@ Status MemTable::VerifyEncodedEntry(Slice encoded,
|
|
|
625
640
|
if (!GetVarint32(&encoded, &ikey_len)) {
|
|
626
641
|
return Status::Corruption("Unable to parse internal key length");
|
|
627
642
|
}
|
|
628
|
-
|
|
629
|
-
if (ikey_len < 8 + ts_sz) {
|
|
643
|
+
if (ikey_len < 8 + ts_sz_) {
|
|
630
644
|
return Status::Corruption("Internal key length too short");
|
|
631
645
|
}
|
|
632
646
|
if (ikey_len > encoded.size()) {
|
|
@@ -725,8 +739,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
725
739
|
}
|
|
726
740
|
}
|
|
727
741
|
|
|
728
|
-
|
|
729
|
-
Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz);
|
|
742
|
+
Slice key_without_ts = StripTimestampFromUserKey(key, ts_sz_);
|
|
730
743
|
|
|
731
744
|
if (!allow_concurrent) {
|
|
732
745
|
// Extract prefix for insert with hint.
|
|
@@ -754,6 +767,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
754
767
|
type == kTypeDeletionWithTimestamp) {
|
|
755
768
|
num_deletes_.store(num_deletes_.load(std::memory_order_relaxed) + 1,
|
|
756
769
|
std::memory_order_relaxed);
|
|
770
|
+
} else if (type == kTypeRangeDeletion) {
|
|
771
|
+
uint64_t val = num_range_deletes_.load(std::memory_order_relaxed) + 1;
|
|
772
|
+
num_range_deletes_.store(val, std::memory_order_relaxed);
|
|
757
773
|
}
|
|
758
774
|
|
|
759
775
|
if (bloom_filter_ && prefix_extractor_ &&
|
|
@@ -776,6 +792,9 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
776
792
|
assert(first_seqno_.load() >= earliest_seqno_.load());
|
|
777
793
|
}
|
|
778
794
|
assert(post_process_info == nullptr);
|
|
795
|
+
// TODO(yuzhangyu): support updating newest UDT for when `allow_concurrent`
|
|
796
|
+
// is true.
|
|
797
|
+
MaybeUpdateNewestUDT(key_slice);
|
|
779
798
|
UpdateFlushState();
|
|
780
799
|
} else {
|
|
781
800
|
bool res = (hint == nullptr)
|
|
@@ -810,13 +829,14 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
810
829
|
earliest_seqno_.load(std::memory_order_relaxed);
|
|
811
830
|
while (
|
|
812
831
|
(cur_earliest_seqno == kMaxSequenceNumber || s < cur_earliest_seqno) &&
|
|
813
|
-
!
|
|
832
|
+
!earliest_seqno_.compare_exchange_weak(cur_earliest_seqno, s)) {
|
|
814
833
|
}
|
|
815
834
|
}
|
|
816
835
|
if (type == kTypeRangeDeletion) {
|
|
817
836
|
auto new_cache = std::make_shared<FragmentedRangeTombstoneListCache>();
|
|
818
837
|
size_t size = cached_range_tombstone_.Size();
|
|
819
838
|
if (allow_concurrent) {
|
|
839
|
+
post_process_info->num_range_deletes++;
|
|
820
840
|
range_del_mutex_.lock();
|
|
821
841
|
}
|
|
822
842
|
for (size_t i = 0; i < size; ++i) {
|
|
@@ -835,6 +855,7 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
835
855
|
new_local_cache_ref, new_cache.get()),
|
|
836
856
|
std::memory_order_relaxed);
|
|
837
857
|
}
|
|
858
|
+
|
|
838
859
|
if (allow_concurrent) {
|
|
839
860
|
range_del_mutex_.unlock();
|
|
840
861
|
}
|
|
@@ -1263,6 +1284,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
|
|
|
1263
1284
|
// Avoiding recording stats for speed.
|
|
1264
1285
|
return false;
|
|
1265
1286
|
}
|
|
1287
|
+
|
|
1266
1288
|
PERF_TIMER_GUARD(get_from_memtable_time);
|
|
1267
1289
|
|
|
1268
1290
|
std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
|
|
@@ -1286,8 +1308,7 @@ bool MemTable::Get(const LookupKey& key, std::string* value,
|
|
|
1286
1308
|
bool found_final_value = false;
|
|
1287
1309
|
bool merge_in_progress = s->IsMergeInProgress();
|
|
1288
1310
|
bool may_contain = true;
|
|
1289
|
-
|
|
1290
|
-
Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz);
|
|
1311
|
+
Slice user_key_without_ts = StripTimestampFromUserKey(key.user_key(), ts_sz_);
|
|
1291
1312
|
bool bloom_checked = false;
|
|
1292
1313
|
if (bloom_filter_) {
|
|
1293
1314
|
// when both memtable_whole_key_filtering and prefix_extractor_ are set,
|
|
@@ -1672,4 +1693,22 @@ uint64_t MemTable::GetMinLogContainingPrepSection() {
|
|
|
1672
1693
|
return min_prep_log_referenced_.load();
|
|
1673
1694
|
}
|
|
1674
1695
|
|
|
1696
|
+
void MemTable::MaybeUpdateNewestUDT(const Slice& user_key) {
|
|
1697
|
+
if (ts_sz_ == 0 || persist_user_defined_timestamps_) {
|
|
1698
|
+
return;
|
|
1699
|
+
}
|
|
1700
|
+
const Comparator* ucmp = GetInternalKeyComparator().user_comparator();
|
|
1701
|
+
Slice udt = ExtractTimestampFromUserKey(user_key, ts_sz_);
|
|
1702
|
+
if (newest_udt_.empty() || ucmp->CompareTimestamp(udt, newest_udt_) > 0) {
|
|
1703
|
+
newest_udt_ = udt;
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
|
|
1707
|
+
const Slice& MemTable::GetNewestUDT() const {
|
|
1708
|
+
// This path should not be invoked for MemTables that does not enable the UDT
|
|
1709
|
+
// in Memtable only feature.
|
|
1710
|
+
assert(ts_sz_ > 0 && !persist_user_defined_timestamps_);
|
|
1711
|
+
return newest_udt_;
|
|
1712
|
+
}
|
|
1713
|
+
|
|
1675
1714
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -68,6 +68,7 @@ struct MemTablePostProcessInfo {
|
|
|
68
68
|
uint64_t data_size = 0;
|
|
69
69
|
uint64_t num_entries = 0;
|
|
70
70
|
uint64_t num_deletes = 0;
|
|
71
|
+
uint64_t num_range_deletes = 0;
|
|
71
72
|
};
|
|
72
73
|
|
|
73
74
|
using MultiGetRange = MultiGetContext::Range;
|
|
@@ -332,6 +333,10 @@ class MemTable {
|
|
|
332
333
|
num_deletes_.fetch_add(update_counters.num_deletes,
|
|
333
334
|
std::memory_order_relaxed);
|
|
334
335
|
}
|
|
336
|
+
if (update_counters.num_range_deletes > 0) {
|
|
337
|
+
num_range_deletes_.fetch_add(update_counters.num_range_deletes,
|
|
338
|
+
std::memory_order_relaxed);
|
|
339
|
+
}
|
|
335
340
|
UpdateFlushState();
|
|
336
341
|
}
|
|
337
342
|
|
|
@@ -349,10 +354,21 @@ class MemTable {
|
|
|
349
354
|
return num_deletes_.load(std::memory_order_relaxed);
|
|
350
355
|
}
|
|
351
356
|
|
|
357
|
+
// Get total number of range deletions in the mem table.
|
|
358
|
+
// REQUIRES: external synchronization to prevent simultaneous
|
|
359
|
+
// operations on the same MemTable (unless this Memtable is immutable).
|
|
360
|
+
uint64_t num_range_deletes() const {
|
|
361
|
+
return num_range_deletes_.load(std::memory_order_relaxed);
|
|
362
|
+
}
|
|
363
|
+
|
|
352
364
|
uint64_t get_data_size() const {
|
|
353
365
|
return data_size_.load(std::memory_order_relaxed);
|
|
354
366
|
}
|
|
355
367
|
|
|
368
|
+
size_t write_buffer_size() const {
|
|
369
|
+
return write_buffer_size_.load(std::memory_order_relaxed);
|
|
370
|
+
}
|
|
371
|
+
|
|
356
372
|
// Dynamically change the memtable's capacity. If set below the current usage,
|
|
357
373
|
// the next key added will trigger a flush. Can only increase size when
|
|
358
374
|
// memtable prefix bloom is disabled, since we can't easily allocate more
|
|
@@ -527,6 +543,14 @@ class MemTable {
|
|
|
527
543
|
}
|
|
528
544
|
}
|
|
529
545
|
|
|
546
|
+
// Get the newest user-defined timestamp contained in this MemTable. Check
|
|
547
|
+
// `newest_udt_` for what newer means. This method should only be invoked for
|
|
548
|
+
// an MemTable that has enabled user-defined timestamp feature and set
|
|
549
|
+
// `persist_user_defined_timestamps` to false. The tracked newest UDT will be
|
|
550
|
+
// used by flush job in the background to help check the MemTable's
|
|
551
|
+
// eligibility for Flush.
|
|
552
|
+
const Slice& GetNewestUDT() const;
|
|
553
|
+
|
|
530
554
|
// Returns Corruption status if verification fails.
|
|
531
555
|
static Status VerifyEntryChecksum(const char* entry,
|
|
532
556
|
uint32_t protection_bytes_per_key,
|
|
@@ -553,6 +577,7 @@ class MemTable {
|
|
|
553
577
|
std::atomic<uint64_t> data_size_;
|
|
554
578
|
std::atomic<uint64_t> num_entries_;
|
|
555
579
|
std::atomic<uint64_t> num_deletes_;
|
|
580
|
+
std::atomic<uint64_t> num_range_deletes_;
|
|
556
581
|
|
|
557
582
|
// Dynamically changeable memtable option
|
|
558
583
|
std::atomic<size_t> write_buffer_size_;
|
|
@@ -596,7 +621,7 @@ class MemTable {
|
|
|
596
621
|
const SliceTransform* insert_with_hint_prefix_extractor_;
|
|
597
622
|
|
|
598
623
|
// Insert hints for each prefix.
|
|
599
|
-
UnorderedMapH<Slice, void*,
|
|
624
|
+
UnorderedMapH<Slice, void*, SliceHasher32> insert_hints_;
|
|
600
625
|
|
|
601
626
|
// Timestamp of oldest key
|
|
602
627
|
std::atomic<uint64_t> oldest_key_time_;
|
|
@@ -614,9 +639,26 @@ class MemTable {
|
|
|
614
639
|
// Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow`
|
|
615
640
|
std::atomic<uint64_t> approximate_memory_usage_;
|
|
616
641
|
|
|
642
|
+
// max range deletions in a memtable, before automatic flushing, 0 for
|
|
643
|
+
// unlimited.
|
|
644
|
+
uint32_t memtable_max_range_deletions_ = 0;
|
|
645
|
+
|
|
617
646
|
// Flush job info of the current memtable.
|
|
618
647
|
std::unique_ptr<FlushJobInfo> flush_job_info_;
|
|
619
648
|
|
|
649
|
+
// Size in bytes for the user-defined timestamps.
|
|
650
|
+
size_t ts_sz_;
|
|
651
|
+
|
|
652
|
+
// Whether to persist user-defined timestamps
|
|
653
|
+
bool persist_user_defined_timestamps_;
|
|
654
|
+
|
|
655
|
+
// Newest user-defined timestamp contained in this MemTable. For ts1, and ts2
|
|
656
|
+
// if Comparator::CompareTimestamp(ts1, ts2) > 0, ts1 is considered newer than
|
|
657
|
+
// ts2. We track this field for a MemTable if its column family has UDT
|
|
658
|
+
// feature enabled and the `persist_user_defined_timestamp` flag is false.
|
|
659
|
+
// Otherwise, this field just contains an empty Slice.
|
|
660
|
+
Slice newest_udt_;
|
|
661
|
+
|
|
620
662
|
// Updates flush_state_ using ShouldFlushNow()
|
|
621
663
|
void UpdateFlushState();
|
|
622
664
|
|
|
@@ -653,6 +695,8 @@ class MemTable {
|
|
|
653
695
|
void UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
|
|
654
696
|
const Slice& key, const Slice& value, ValueType type,
|
|
655
697
|
SequenceNumber s, char* checksum_ptr);
|
|
698
|
+
|
|
699
|
+
void MaybeUpdateNewestUDT(const Slice& user_key);
|
|
656
700
|
};
|
|
657
701
|
|
|
658
702
|
extern const char* EncodeKey(std::string* scratch, const Slice& target);
|
|
@@ -434,23 +434,57 @@ void MemTableList::PickMemtablesToFlush(uint64_t max_memtable_id,
|
|
|
434
434
|
}
|
|
435
435
|
|
|
436
436
|
void MemTableList::RollbackMemtableFlush(const autovector<MemTable*>& mems,
|
|
437
|
-
|
|
437
|
+
bool rollback_succeeding_memtables) {
|
|
438
|
+
TEST_SYNC_POINT("RollbackMemtableFlush");
|
|
438
439
|
AutoThreadOperationStageUpdater stage_updater(
|
|
439
440
|
ThreadStatus::STAGE_MEMTABLE_ROLLBACK);
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
// If the flush was not successful, then just reset state.
|
|
443
|
-
// Maybe a succeeding attempt to flush will be successful.
|
|
441
|
+
#ifndef NDEBUG
|
|
444
442
|
for (MemTable* m : mems) {
|
|
445
443
|
assert(m->flush_in_progress_);
|
|
446
444
|
assert(m->file_number_ == 0);
|
|
445
|
+
}
|
|
446
|
+
#endif
|
|
447
|
+
|
|
448
|
+
if (rollback_succeeding_memtables && !mems.empty()) {
|
|
449
|
+
std::list<MemTable*>& memlist = current_->memlist_;
|
|
450
|
+
auto it = memlist.rbegin();
|
|
451
|
+
for (; *it != mems[0] && it != memlist.rend(); ++it) {
|
|
452
|
+
}
|
|
453
|
+
// mems should be in memlist
|
|
454
|
+
assert(*it == mems[0]);
|
|
455
|
+
if (*it == mems[0]) {
|
|
456
|
+
++it;
|
|
457
|
+
}
|
|
458
|
+
while (it != memlist.rend()) {
|
|
459
|
+
MemTable* m = *it;
|
|
460
|
+
// Only rollback complete, not in-progress,
|
|
461
|
+
// in_progress can be flushes that are still writing SSTs
|
|
462
|
+
if (m->flush_completed_) {
|
|
463
|
+
m->flush_in_progress_ = false;
|
|
464
|
+
m->flush_completed_ = false;
|
|
465
|
+
m->edit_.Clear();
|
|
466
|
+
m->file_number_ = 0;
|
|
467
|
+
num_flush_not_started_++;
|
|
468
|
+
++it;
|
|
469
|
+
} else {
|
|
470
|
+
break;
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
447
474
|
|
|
448
|
-
|
|
449
|
-
m->
|
|
450
|
-
|
|
451
|
-
|
|
475
|
+
for (MemTable* m : mems) {
|
|
476
|
+
if (m->flush_in_progress_) {
|
|
477
|
+
assert(m->file_number_ == 0);
|
|
478
|
+
m->file_number_ = 0;
|
|
479
|
+
m->flush_in_progress_ = false;
|
|
480
|
+
m->flush_completed_ = false;
|
|
481
|
+
m->edit_.Clear();
|
|
482
|
+
num_flush_not_started_++;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
if (!mems.empty()) {
|
|
486
|
+
imm_flush_needed.store(true, std::memory_order_release);
|
|
452
487
|
}
|
|
453
|
-
imm_flush_needed.store(true, std::memory_order_release);
|
|
454
488
|
}
|
|
455
489
|
|
|
456
490
|
// Try record a successful flush in the manifest file. It might just return
|
|
@@ -271,8 +271,20 @@ class MemTableList {
|
|
|
271
271
|
|
|
272
272
|
// Reset status of the given memtable list back to pending state so that
|
|
273
273
|
// they can get picked up again on the next round of flush.
|
|
274
|
+
//
|
|
275
|
+
// @param rollback_succeeding_memtables If true, will rollback adjacent
|
|
276
|
+
// younger memtables whose flush is completed. Specifically, suppose the
|
|
277
|
+
// current immutable memtables are M_0,M_1...M_N ordered from youngest to
|
|
278
|
+
// oldest. Suppose that the youngest memtable in `mems` is M_K. We will try to
|
|
279
|
+
// rollback M_K-1, M_K-2... until the first memtable whose flush is
|
|
280
|
+
// not completed. These are the memtables that would have been installed
|
|
281
|
+
// by this flush job if it were to succeed. This flag is currently used
|
|
282
|
+
// by non atomic_flush rollback.
|
|
283
|
+
// Note that we also do rollback in `write_manifest_cb` by calling
|
|
284
|
+
// `RemoveMemTablesOrRestoreFlags()`. There we rollback the entire batch so
|
|
285
|
+
// it is similar to what we do here with rollback_succeeding_memtables=true.
|
|
274
286
|
void RollbackMemtableFlush(const autovector<MemTable*>& mems,
|
|
275
|
-
|
|
287
|
+
bool rollback_succeeding_memtables);
|
|
276
288
|
|
|
277
289
|
// Try commit a successful flush in the manifest file. It might just return
|
|
278
290
|
// Status::OK letting a concurrent flush to do the actual the recording.
|
|
@@ -382,6 +394,25 @@ class MemTableList {
|
|
|
382
394
|
return memlist.front()->GetID();
|
|
383
395
|
}
|
|
384
396
|
|
|
397
|
+
// DB mutex held.
|
|
398
|
+
// Gets the newest user-defined timestamp for the Memtables in ascending ID
|
|
399
|
+
// order, up to the `max_memtable_id`. Used by background flush job
|
|
400
|
+
// to check Memtables' eligibility for flush w.r.t retaining UDTs.
|
|
401
|
+
std::vector<Slice> GetTablesNewestUDT(uint64_t max_memtable_id) {
|
|
402
|
+
std::vector<Slice> newest_udts;
|
|
403
|
+
auto& memlist = current_->memlist_;
|
|
404
|
+
// Iterating through the memlist starting at the end, the vector<MemTable*>
|
|
405
|
+
// ret is filled with memtables already sorted in increasing MemTable ID.
|
|
406
|
+
for (auto it = memlist.rbegin(); it != memlist.rend(); ++it) {
|
|
407
|
+
MemTable* m = *it;
|
|
408
|
+
if (m->GetID() > max_memtable_id) {
|
|
409
|
+
break;
|
|
410
|
+
}
|
|
411
|
+
newest_udts.push_back(m->GetNewestUDT());
|
|
412
|
+
}
|
|
413
|
+
return newest_udts;
|
|
414
|
+
}
|
|
415
|
+
|
|
385
416
|
void AssignAtomicFlushSeq(const SequenceNumber& seq) {
|
|
386
417
|
const auto& memlist = current_->memlist_;
|
|
387
418
|
// Scan the memtable list from new to old
|