@nxtedition/rocksdb 9.0.0 → 10.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +244 -177
- package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -9
- package/deps/rocksdb/rocksdb/Makefile +15 -6
- package/deps/rocksdb/rocksdb/README.md +29 -0
- package/deps/rocksdb/rocksdb/TARGETS +17 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +35 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +74 -15
- package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +2 -1
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +4 -3
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +16 -4
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +4 -2
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -3
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +2024 -14
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +349 -23
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +126 -51
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -0
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +202 -7
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +31 -14
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +0 -33
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +314 -25
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +29 -4
- package/deps/rocksdb/rocksdb/cache/sharded_cache.cc +10 -0
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +8 -3
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.cc +119 -0
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache.h +155 -0
- package/deps/rocksdb/rocksdb/cache/tiered_secondary_cache_test.cc +711 -0
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +17 -11
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +25 -11
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +2 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +8 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_index_test.cc +7 -3
- package/deps/rocksdb/rocksdb/db/builder.cc +3 -3
- package/deps/rocksdb/rocksdb/db/c.cc +64 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +36 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +23 -15
- package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +101 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +36 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +24 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +3 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +42 -18
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +7 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +3 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +61 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +146 -64
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +13 -39
- package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +29 -7
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +8 -3
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +59 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_filter_test.cc +27 -3
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +186 -2
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.cc +17 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +519 -240
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +104 -43
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +169 -66
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +12 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +50 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +85 -53
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +3 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +99 -82
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +4 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +24 -21
- package/deps/rocksdb/rocksdb/db/db_info_dumper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +83 -55
- package/deps/rocksdb/rocksdb/db/db_iter.h +10 -2
- package/deps/rocksdb/rocksdb/db/db_iter_test.cc +29 -0
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +276 -21
- package/deps/rocksdb/rocksdb/db/db_log_iter_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +187 -1
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +258 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +258 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_readonly_with_timestamp_test.cc +52 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +74 -1
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +22 -4
- package/deps/rocksdb/rocksdb/db/db_tailing_iter_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_test.cc +134 -30
- package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -6
- package/deps/rocksdb/rocksdb/db/db_test_util.h +5 -2
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +12 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +337 -1
- package/deps/rocksdb/rocksdb/db/deletefile_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/error_handler.cc +51 -34
- package/deps/rocksdb/rocksdb/db/error_handler.h +7 -6
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +58 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -19
- package/deps/rocksdb/rocksdb/db/flush_job.h +3 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +18 -70
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +1 -1
- package/deps/rocksdb/rocksdb/db/memtable_list.h +11 -1
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +330 -115
- package/deps/rocksdb/rocksdb/db/merge_helper.h +100 -12
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +82 -0
- package/deps/rocksdb/rocksdb/db/merge_test.cc +267 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +3 -0
- package/deps/rocksdb/rocksdb/db/periodic_task_scheduler.h +4 -4
- package/deps/rocksdb/rocksdb/db/plain_table_db_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/prefix_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +4 -0
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +4 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +454 -70
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +105 -69
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +83 -46
- package/deps/rocksdb/rocksdb/db/table_cache.cc +32 -19
- package/deps/rocksdb/rocksdb/db/table_cache.h +12 -6
- package/deps/rocksdb/rocksdb/db/version_edit.h +10 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +75 -73
- package/deps/rocksdb/rocksdb/db/version_set.h +8 -8
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +2 -5
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +22 -11
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +525 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -22
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -20
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +0 -29
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +46 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +40 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper_test.cc +39 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +44 -20
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +4 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +4 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +88 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +15 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +108 -58
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +36 -14
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +34 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +195 -130
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +12 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.cc +51 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_wide_merge_operator.h +27 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +3 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +14 -11
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +44 -38
- package/deps/rocksdb/rocksdb/env/env.cc +5 -0
- package/deps/rocksdb/rocksdb/env/unique_id_gen.cc +1 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +50 -29
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +32 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +513 -30
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +38 -13
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +14 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +42 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +65 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +26 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +37 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/filter_policy.h +8 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +10 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memory_allocator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +55 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +45 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/port_defs.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/rate_limiter.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +79 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +35 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/system_clock.h +15 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +14 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +2 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +53 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +0 -2
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc +2 -2
- package/deps/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h +1 -1
- package/deps/rocksdb/rocksdb/microbench/README.md +60 -0
- package/deps/rocksdb/rocksdb/monitoring/instrumented_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
- package/deps/rocksdb/rocksdb/monitoring/stats_history_test.cc +18 -7
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +4 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +47 -2
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +12 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +6 -1
- package/deps/rocksdb/rocksdb/plugin/README.md +43 -0
- package/deps/rocksdb/rocksdb/port/README +10 -0
- package/deps/rocksdb/rocksdb/port/port_example.h +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.cc +1 -1
- package/deps/rocksdb/rocksdb/port/port_posix.h +7 -4
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +5 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.h +5 -2
- package/deps/rocksdb/rocksdb/src.mk +7 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +275 -61
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +96 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +179 -62
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +35 -22
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +12 -8
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +14 -9
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +26 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +15 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +10 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_test.cc +39 -18
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -6
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +44 -26
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +10 -8
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_fetcher.cc +3 -2
- package/deps/rocksdb/rocksdb/table/block_fetcher.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +6 -2
- package/deps/rocksdb/rocksdb/table/get_context.cc +52 -89
- package/deps/rocksdb/rocksdb/table/get_context.h +12 -3
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +11 -0
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +29 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +12 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +33 -6
- package/deps/rocksdb/rocksdb/table/sst_file_reader_test.cc +1 -0
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +2 -4
- package/deps/rocksdb/rocksdb/table/table_reader.h +6 -0
- package/deps/rocksdb/rocksdb/test_util/mock_time_env.h +31 -0
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +2 -1
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +3 -3
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -43
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +213 -28
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +36 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +0 -1
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +33 -10
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +32 -11
- package/deps/rocksdb/rocksdb/util/cast_util.h +10 -0
- package/deps/rocksdb/rocksdb/util/comparator.cc +26 -1
- package/deps/rocksdb/rocksdb/util/compression.h +9 -3
- package/deps/rocksdb/rocksdb/util/crc32c.cc +7 -1
- package/deps/rocksdb/rocksdb/util/distributed_mutex.h +1 -1
- package/deps/rocksdb/rocksdb/util/overload.h +23 -0
- package/deps/rocksdb/rocksdb/util/rate_limiter.cc +53 -18
- package/deps/rocksdb/rocksdb/util/rate_limiter_impl.h +6 -1
- package/deps/rocksdb/rocksdb/util/rate_limiter_test.cc +90 -19
- package/deps/rocksdb/rocksdb/util/slice_test.cc +30 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +39 -0
- package/deps/rocksdb/rocksdb/util/string_util.h +10 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -0
- package/deps/rocksdb/rocksdb/util/udt_util.cc +42 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +19 -0
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +14 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +0 -3
- package/deps/rocksdb/rocksdb/util/xxph3.h +0 -4
- package/deps/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +1 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +19 -15
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +11 -7
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +9 -0
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +13 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +41 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +15 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +155 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +81 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +2 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +7 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +3 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -27
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +127 -120
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +129 -59
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +105 -8
- package/deps/rocksdb/rocksdb.gyp +4 -2
- package/index.js +38 -55
- package/package.json +4 -4
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/util.h +7 -1
- package/deps/rocksdb/rocksdb/cmake/modules/CxxFlags.cmake +0 -7
- package/deps/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindSnappy.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/FindTBB.cmake +0 -33
- package/deps/rocksdb/rocksdb/cmake/modules/Findgflags.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/Findlz4.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/Finduring.cmake +0 -26
- package/deps/rocksdb/rocksdb/cmake/modules/Findzstd.cmake +0 -29
- package/deps/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake +0 -10
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -20,6 +20,7 @@
|
|
|
20
20
|
#include "cache/sharded_cache.h"
|
|
21
21
|
#include "port/lang.h"
|
|
22
22
|
#include "port/malloc.h"
|
|
23
|
+
#include "port/mmap.h"
|
|
23
24
|
#include "port/port.h"
|
|
24
25
|
#include "rocksdb/cache.h"
|
|
25
26
|
#include "rocksdb/secondary_cache.h"
|
|
@@ -39,24 +40,31 @@ class ClockCacheTest;
|
|
|
39
40
|
//
|
|
40
41
|
// Benefits
|
|
41
42
|
// --------
|
|
42
|
-
// *
|
|
43
|
+
// * Lock/wait free (no waits or spins) for efficiency under high concurrency
|
|
44
|
+
// * Fixed version (estimated_entry_charge > 0) is fully lock/wait free
|
|
45
|
+
// * Automatic version (estimated_entry_charge = 0) has rare waits among
|
|
46
|
+
// certain insertion or erase operations that involve the same very small
|
|
47
|
+
// set of entries.
|
|
43
48
|
// * Optimized for hot path reads. For concurrency control, most Lookup() and
|
|
44
49
|
// essentially all Release() are a single atomic add operation.
|
|
45
|
-
// * Eviction on insertion is fully parallel
|
|
50
|
+
// * Eviction on insertion is fully parallel.
|
|
46
51
|
// * Uses a generalized + aging variant of CLOCK eviction that might outperform
|
|
47
52
|
// LRU in some cases. (For background, see
|
|
48
53
|
// https://en.wikipedia.org/wiki/Page_replacement_algorithm)
|
|
49
54
|
//
|
|
50
55
|
// Costs
|
|
51
56
|
// -----
|
|
52
|
-
// *
|
|
53
|
-
//
|
|
57
|
+
// * FixedHyperClockCache (estimated_entry_charge > 0) - Hash table is not
|
|
58
|
+
// resizable (for lock-free efficiency) so capacity is not dynamically
|
|
59
|
+
// changeable. Rely on an estimated average value (block) size for
|
|
54
60
|
// space+time efficiency. (See estimated_entry_charge option details.)
|
|
61
|
+
// EXPERIMENTAL - This limitation is fixed in AutoHyperClockCache, activated
|
|
62
|
+
// with estimated_entry_charge == 0.
|
|
55
63
|
// * Insert usually does not (but might) overwrite a previous entry associated
|
|
56
|
-
// with a cache key. This is OK for RocksDB uses of Cache
|
|
64
|
+
// with a cache key. This is OK for RocksDB uses of Cache, though it does mess
|
|
65
|
+
// up our REDUNDANT block cache insertion statistics.
|
|
57
66
|
// * Only supports keys of exactly 16 bytes, which is what RocksDB uses for
|
|
58
|
-
// block cache (not row cache or table cache).
|
|
59
|
-
// * SecondaryCache is not supported.
|
|
67
|
+
// block cache (but not row cache or table cache).
|
|
60
68
|
// * Cache priorities are less aggressively enforced. Unlike LRUCache, enough
|
|
61
69
|
// transient LOW or BOTTOM priority items can evict HIGH priority entries that
|
|
62
70
|
// are not referenced recently (or often) enough.
|
|
@@ -139,7 +147,8 @@ class ClockCacheTest;
|
|
|
139
147
|
// * Empty - slot is not in use and unowned. All other metadata and data is
|
|
140
148
|
// in an undefined state.
|
|
141
149
|
// * Construction - slot is exclusively owned by one thread, the thread
|
|
142
|
-
// successfully entering this state, for populating or freeing data
|
|
150
|
+
// successfully entering this state, for populating or freeing data
|
|
151
|
+
// (de-construction, same state marker).
|
|
143
152
|
// * Shareable (group) - slot holds an entry with counted references for
|
|
144
153
|
// pinning and reading, including
|
|
145
154
|
// * Visible - slot holds an entry that can be returned by Lookup
|
|
@@ -187,15 +196,19 @@ class ClockCacheTest;
|
|
|
187
196
|
// know from our "redundant" stats that overwrites are very rare for the block
|
|
188
197
|
// cache, so we should not spend much to make them effective.
|
|
189
198
|
//
|
|
190
|
-
//
|
|
191
|
-
// sequence without seeing an existing (visible) entry for the same
|
|
192
|
-
// way we only insert if we can improve the probing performance, and
|
|
193
|
-
// need to probe beyond our insert position, assuming we are willing
|
|
194
|
-
// the previous entry for the same key die of old age (eventual eviction
|
|
195
|
-
// not being used). We can reach a similar state with concurrent
|
|
196
|
-
// where one will pass over the other while it is "under
|
|
197
|
-
// This temporary duplication is acceptable for RocksDB block
|
|
198
|
-
// we know redundant insertion is rare.
|
|
199
|
+
// FixedHyperClockCache: Instead we Insert as soon as we find an empty slot in
|
|
200
|
+
// the probing sequence without seeing an existing (visible) entry for the same
|
|
201
|
+
// key. This way we only insert if we can improve the probing performance, and
|
|
202
|
+
// we don't need to probe beyond our insert position, assuming we are willing
|
|
203
|
+
// to let the previous entry for the same key die of old age (eventual eviction
|
|
204
|
+
// from not being used). We can reach a similar state with concurrent
|
|
205
|
+
// insertions, where one will pass over the other while it is "under
|
|
206
|
+
// construction." This temporary duplication is acceptable for RocksDB block
|
|
207
|
+
// cache because we know redundant insertion is rare.
|
|
208
|
+
// AutoHyperClockCache: Similar, except we only notice and return an existing
|
|
209
|
+
// match if it is found in the search for a suitable empty slot (starting with
|
|
210
|
+
// the same slot as the head pointer), not by following the existing chain of
|
|
211
|
+
// entries. Insertions are always made to the head of the chain.
|
|
199
212
|
//
|
|
200
213
|
// Another problem to solve is what to return to the caller when we find an
|
|
201
214
|
// existing entry whose probing position we cannot improve on, or when the
|
|
@@ -322,7 +335,6 @@ struct ClockHandle : public ClockHandleBasicData {
|
|
|
322
335
|
// For setting the hit bit
|
|
323
336
|
static constexpr uint8_t kHitBitShift = 2U * kCounterNumBits;
|
|
324
337
|
static constexpr uint64_t kHitBitMask = uint64_t{1} << kHitBitShift;
|
|
325
|
-
;
|
|
326
338
|
|
|
327
339
|
// For reading or updating the state marker in meta word
|
|
328
340
|
static constexpr uint8_t kStateShift = kHitBitShift + 1;
|
|
@@ -395,6 +407,8 @@ class BaseClockTable {
|
|
|
395
407
|
|
|
396
408
|
uint32_t GetHashSeed() const { return hash_seed_; }
|
|
397
409
|
|
|
410
|
+
uint64_t GetYieldCount() const { return yield_count_.load(); }
|
|
411
|
+
|
|
398
412
|
struct EvictionData {
|
|
399
413
|
size_t freed_charge = 0;
|
|
400
414
|
size_t freed_count = 0;
|
|
@@ -448,6 +462,9 @@ class BaseClockTable {
|
|
|
448
462
|
// Clock algorithm sweep pointer.
|
|
449
463
|
std::atomic<uint64_t> clock_pointer_{};
|
|
450
464
|
|
|
465
|
+
// Counter for number of times we yield to wait on another thread.
|
|
466
|
+
std::atomic<uint64_t> yield_count_{};
|
|
467
|
+
|
|
451
468
|
// TODO: is this separation needed if we don't do background evictions?
|
|
452
469
|
ALIGN_AS(CACHE_LINE_SIZE)
|
|
453
470
|
// Number of elements in the table.
|
|
@@ -472,6 +489,10 @@ class BaseClockTable {
|
|
|
472
489
|
const uint32_t& hash_seed_;
|
|
473
490
|
};
|
|
474
491
|
|
|
492
|
+
// Hash table for cache entries with size determined at creation time.
|
|
493
|
+
// Uses open addressing and double hashing. Since entries cannot be moved,
|
|
494
|
+
// the "displacements" count ensures probing sequences find entries even when
|
|
495
|
+
// entries earlier in the probing sequence have been removed.
|
|
475
496
|
class FixedHyperClockTable : public BaseClockTable {
|
|
476
497
|
public:
|
|
477
498
|
// Target size to be exactly a common cache line size (see static_assert in
|
|
@@ -626,11 +647,314 @@ class FixedHyperClockTable : public BaseClockTable {
|
|
|
626
647
|
const std::unique_ptr<HandleImpl[]> array_;
|
|
627
648
|
}; // class FixedHyperClockTable
|
|
628
649
|
|
|
629
|
-
//
|
|
630
|
-
//
|
|
631
|
-
|
|
650
|
+
// Hash table for cache entries that resizes automatically based on occupancy.
|
|
651
|
+
// However, it depends on a contiguous memory region to grow into
|
|
652
|
+
// incrementally, using linear hashing, so uses an anonymous mmap so that
|
|
653
|
+
// only the used portion of the memory region is mapped to physical memory
|
|
654
|
+
// (part of RSS).
|
|
655
|
+
//
|
|
656
|
+
// This table implementation uses the same "low-level protocol" for managing
|
|
657
|
+
// the contens of an entry slot as FixedHyperClockTable does, captured in the
|
|
658
|
+
// ClockHandle struct. The provides most of the essential data safety, but
|
|
659
|
+
// AutoHyperClockTable is another "high-level protocol" for organizing entries
|
|
660
|
+
// into a hash table, with automatic resizing.
|
|
661
|
+
//
|
|
662
|
+
// This implementation is not fully wait-free but we can call it "essentially
|
|
663
|
+
// wait-free," and here's why. First, like FixedHyperClockCache, there is no
|
|
664
|
+
// locking nor other forms of waiting at the cache or shard level. Also like
|
|
665
|
+
// FixedHCC there is essentially an entry-level read-write lock implemented
|
|
666
|
+
// with atomics, but our relaxed atomicity/consistency guarantees (e.g.
|
|
667
|
+
// duplicate inserts are possible) mean we do not need to wait for entry
|
|
668
|
+
// locking. Lookups, non-erasing Releases, and non-evicting non-growing Inserts
|
|
669
|
+
// are all fully wait-free. Of course, these waits are not dependent on any
|
|
670
|
+
// external factors such as I/O.
|
|
671
|
+
//
|
|
672
|
+
// For operations that remove entries from a chain or grow the table by
|
|
673
|
+
// splitting a chain, there is a chain-level locking mechanism that we call a
|
|
674
|
+
// "rewrite" lock, and the only waits are for these locks. On average, each
|
|
675
|
+
// chain lock is relevant to < 2 entries each. (The average would be less than
|
|
676
|
+
// one entry each, but we do not lock when there's no entry to remove or
|
|
677
|
+
// migrate.) And a given thread can only hold two such chain locks at a time,
|
|
678
|
+
// more typically just one. So in that sense alone, the waiting that does exist
|
|
679
|
+
// is very localized.
|
|
680
|
+
//
|
|
681
|
+
// If we look closer at the operations utilizing that locking mechanism, we
|
|
682
|
+
// can see why it's "essentially wait-free."
|
|
683
|
+
// * Grow operations to increase the size of the table: each operation splits
|
|
684
|
+
// an existing chain into two, and chains for splitting are chosen in table
|
|
685
|
+
// order. Grow operations are fully parallel except for the chain locking, but
|
|
686
|
+
// for one Grow operation to wait on another, it has to be feeding into the
|
|
687
|
+
// other, which means the table has doubled in size already from other Grow
|
|
688
|
+
// operations without the original one finishing. So Grow operations are very
|
|
689
|
+
// low latency (unlike LRUCache doubling the table size in one operation) and
|
|
690
|
+
// very parallelizeable. (We use some tricks to break up dependencies in
|
|
691
|
+
// updating metadata on the usable size of the table.) And obviously Grow
|
|
692
|
+
// operations are very rare after the initial population of the table.
|
|
693
|
+
// * Evict operations (part of many Inserts): clock updates and evictions
|
|
694
|
+
// sweep through the structure in table order, so like Grow operations,
|
|
695
|
+
// parallel Evict can only wait on each other if an Evict has lingered (slept)
|
|
696
|
+
// long enough that the clock pointer has wrapped around the entire structure.
|
|
697
|
+
// * Random erasures (Erase, Release with erase_if_last_ref, etc.): these
|
|
698
|
+
// operations are rare and not really considered performance critical.
|
|
699
|
+
// Currently they're mostly used for removing placeholder cache entries, e.g.
|
|
700
|
+
// for memory tracking, though that could use standalone entries instead to
|
|
701
|
+
// avoid potential contention in table operations. It's possible that future
|
|
702
|
+
// enhancements could pro-actively remove cache entries from obsolete files,
|
|
703
|
+
// but that's not yet implemented.
|
|
704
|
+
class AutoHyperClockTable : public BaseClockTable {
|
|
632
705
|
public:
|
|
633
|
-
|
|
706
|
+
// Target size to be exactly a common cache line size (see static_assert in
|
|
707
|
+
// clock_cache.cc)
|
|
708
|
+
struct ALIGN_AS(64U) HandleImpl : public ClockHandle {
|
|
709
|
+
// To orgainize AutoHyperClockTable entries into a hash table while
|
|
710
|
+
// allowing the table size to grow without existing entries being moved,
|
|
711
|
+
// a version of chaining is used. Rather than being heap allocated (and
|
|
712
|
+
// incurring overheads to ensure memory safety) entries must go into
|
|
713
|
+
// Handles ("slots") in the pre-allocated array. To improve CPU cache
|
|
714
|
+
// locality, the chain head pointers are interleved with the entries;
|
|
715
|
+
// specifically, a Handle contains
|
|
716
|
+
// * A head pointer for a chain of entries with this "home" location.
|
|
717
|
+
// * A ClockHandle, for an entry that may or may not be in the chain
|
|
718
|
+
// starting from that head (but for performance ideally is on that
|
|
719
|
+
// chain).
|
|
720
|
+
// * A next pointer for the continuation of the chain containing this
|
|
721
|
+
// entry.
|
|
722
|
+
//
|
|
723
|
+
// The pointers are not raw pointers, but are indices into the array,
|
|
724
|
+
// and are decorated in two ways to help detect and recover from
|
|
725
|
+
// relevant concurrent modifications during Lookup, so that Lookup is
|
|
726
|
+
// fully wait-free:
|
|
727
|
+
// * Each "with_shift" pointer contains a shift count that indicates
|
|
728
|
+
// how many hash bits were used in chosing the home address for the
|
|
729
|
+
// chain--specifically the next entry in the chain.
|
|
730
|
+
// * The end of a chain is given a special "end" marker and refers back
|
|
731
|
+
// to the head of the chain.
|
|
732
|
+
//
|
|
733
|
+
// Why do we need shift on each pointer? To make Lookup wait-free, we need
|
|
734
|
+
// to be able to query a chain without missing anything, and preferably
|
|
735
|
+
// avoid synchronously double-checking the length_info. Without the shifts,
|
|
736
|
+
// there is a risk that we start down a chain and while paused on an entry
|
|
737
|
+
// that goes to a new home, we then follow the rest of the
|
|
738
|
+
// partially-migrated chain to see the shared ending with the old home, but
|
|
739
|
+
// for a time were following the chain for the new home, missing some
|
|
740
|
+
// entries for the old home.
|
|
741
|
+
//
|
|
742
|
+
// Why do we need the end of the chain to loop back? If Lookup pauses
|
|
743
|
+
// at an "under construction" entry, and sees that "next" is null after
|
|
744
|
+
// waking up, we need something to tell whether the "under construction"
|
|
745
|
+
// entry was freed and reused for another chain. Otherwise, we could
|
|
746
|
+
// miss entries still on the original chain due in the presence of a
|
|
747
|
+
// concurrent modification. Until an entry is fully erased from a chain,
|
|
748
|
+
// it is normal to see "under construction" entries on the chain, and it
|
|
749
|
+
// is not safe to read their hashed key without either a read reference
|
|
750
|
+
// on the entry or a rewrite lock on the chain.
|
|
751
|
+
|
|
752
|
+
// Marker in a "with_shift" head pointer for some thread owning writes
|
|
753
|
+
// to the chain structure (except for inserts), but only if not an
|
|
754
|
+
// "end" pointer. Also called the "rewrite lock."
|
|
755
|
+
static constexpr uint64_t kHeadLocked = uint64_t{1} << 7;
|
|
756
|
+
|
|
757
|
+
// Marker in a "with_shift" pointer for the end of a chain. Must also
|
|
758
|
+
// point back to the head of the chain (with end marker removed).
|
|
759
|
+
// Also includes the "locked" bit so that attempting to lock an empty
|
|
760
|
+
// chain has no effect (not needed, as the lock is only needed for
|
|
761
|
+
// removals).
|
|
762
|
+
static constexpr uint64_t kNextEndFlags = (uint64_t{1} << 6) | kHeadLocked;
|
|
763
|
+
|
|
764
|
+
static inline bool IsEnd(uint64_t next_with_shift) {
|
|
765
|
+
// Assuming certain values never used, suffices to check this one bit
|
|
766
|
+
constexpr auto kCheckBit = kNextEndFlags ^ kHeadLocked;
|
|
767
|
+
return next_with_shift & kCheckBit;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// Bottom bits to right shift away to get an array index from a
|
|
771
|
+
// "with_shift" pointer.
|
|
772
|
+
static constexpr int kNextShift = 8;
|
|
773
|
+
|
|
774
|
+
// A bit mask for the "shift" associated with each "with_shift" pointer.
|
|
775
|
+
// Always bottommost bits.
|
|
776
|
+
static constexpr int kShiftMask = 63;
|
|
777
|
+
|
|
778
|
+
// A marker for head_next_with_shift that indicates this HandleImpl is
|
|
779
|
+
// heap allocated (standalone) rather than in the table.
|
|
780
|
+
static constexpr uint64_t kStandaloneMarker = UINT64_MAX;
|
|
781
|
+
|
|
782
|
+
// A marker for head_next_with_shift indicating the head is not yet part
|
|
783
|
+
// of the usable table, or for chain_next_with_shift indicating that the
|
|
784
|
+
// entry is not present or is not yet part of a chain (must not be
|
|
785
|
+
// "shareable" state).
|
|
786
|
+
static constexpr uint64_t kUnusedMarker = 0;
|
|
787
|
+
|
|
788
|
+
// See above. The head pointer is logically independent of the rest of
|
|
789
|
+
// the entry, including the chain next pointer.
|
|
790
|
+
std::atomic<uint64_t> head_next_with_shift{kUnusedMarker};
|
|
791
|
+
std::atomic<uint64_t> chain_next_with_shift{kUnusedMarker};
|
|
792
|
+
|
|
793
|
+
// For supporting CreateStandalone and some fallback cases.
|
|
794
|
+
inline bool IsStandalone() const {
|
|
795
|
+
return head_next_with_shift.load(std::memory_order_acquire) ==
|
|
796
|
+
kStandaloneMarker;
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
inline void SetStandalone() {
|
|
800
|
+
head_next_with_shift.store(kStandaloneMarker, std::memory_order_release);
|
|
801
|
+
}
|
|
802
|
+
}; // struct HandleImpl
|
|
803
|
+
|
|
804
|
+
struct Opts {
|
|
805
|
+
explicit Opts(size_t _min_avg_value_size)
|
|
806
|
+
: min_avg_value_size(_min_avg_value_size) {}
|
|
807
|
+
|
|
808
|
+
explicit Opts(const HyperClockCacheOptions& opts) {
|
|
809
|
+
assert(opts.estimated_entry_charge == 0);
|
|
810
|
+
min_avg_value_size = opts.min_avg_entry_charge;
|
|
811
|
+
}
|
|
812
|
+
size_t min_avg_value_size;
|
|
813
|
+
};
|
|
814
|
+
|
|
815
|
+
AutoHyperClockTable(size_t capacity, bool strict_capacity_limit,
|
|
816
|
+
CacheMetadataChargePolicy metadata_charge_policy,
|
|
817
|
+
MemoryAllocator* allocator,
|
|
818
|
+
const Cache::EvictionCallback* eviction_callback,
|
|
819
|
+
const uint32_t* hash_seed, const Opts& opts);
|
|
820
|
+
~AutoHyperClockTable();
|
|
821
|
+
|
|
822
|
+
// For BaseClockTable::Insert
|
|
823
|
+
struct InsertState {
|
|
824
|
+
uint64_t saved_length_info = 0;
|
|
825
|
+
size_t likely_empty_slot = 0;
|
|
826
|
+
};
|
|
827
|
+
|
|
828
|
+
void StartInsert(InsertState& state);
|
|
829
|
+
|
|
830
|
+
// Does initial check for whether there's hash table room for another
|
|
831
|
+
// inserted entry, possibly growing if needed. Returns true iff (after
|
|
832
|
+
// the call) there is room for the proposed number of entries.
|
|
833
|
+
bool GrowIfNeeded(size_t new_occupancy, InsertState& state);
|
|
834
|
+
|
|
835
|
+
HandleImpl* DoInsert(const ClockHandleBasicData& proto,
|
|
836
|
+
uint64_t initial_countdown, bool take_ref,
|
|
837
|
+
InsertState& state);
|
|
838
|
+
|
|
839
|
+
// Runs the clock eviction algorithm trying to reclaim at least
|
|
840
|
+
// requested_charge. Returns how much is evicted, which could be less
|
|
841
|
+
// if it appears impossible to evict the requested amount without blocking.
|
|
842
|
+
void Evict(size_t requested_charge, InsertState& state, EvictionData* data);
|
|
843
|
+
|
|
844
|
+
HandleImpl* Lookup(const UniqueId64x2& hashed_key);
|
|
845
|
+
|
|
846
|
+
bool Release(HandleImpl* handle, bool useful, bool erase_if_last_ref);
|
|
847
|
+
|
|
848
|
+
void Erase(const UniqueId64x2& hashed_key);
|
|
849
|
+
|
|
850
|
+
void EraseUnRefEntries();
|
|
851
|
+
|
|
852
|
+
size_t GetTableSize() const;
|
|
853
|
+
|
|
854
|
+
size_t GetOccupancyLimit() const;
|
|
855
|
+
|
|
856
|
+
const HandleImpl* HandlePtr(size_t idx) const { return &array_[idx]; }
|
|
857
|
+
|
|
858
|
+
#ifndef NDEBUG
|
|
859
|
+
size_t& TEST_MutableOccupancyLimit() {
|
|
860
|
+
return *reinterpret_cast<size_t*>(&occupancy_limit_);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
// Release N references
|
|
864
|
+
void TEST_ReleaseN(HandleImpl* handle, size_t n);
|
|
865
|
+
#endif
|
|
866
|
+
|
|
867
|
+
// Maximum ratio of number of occupied slots to number of usable slots. The
|
|
868
|
+
// actual load factor should float pretty close to this number, which should
|
|
869
|
+
// be a nice space/time trade-off, though large swings in WriteBufferManager
|
|
870
|
+
// memory could lead to low (but very much safe) load factors (only after
|
|
871
|
+
// seeing high load factors). Linear hashing along with (modified) linear
|
|
872
|
+
// probing to find an available slot increases potential risks of high
|
|
873
|
+
// load factors, so are disallowed.
|
|
874
|
+
static constexpr double kMaxLoadFactor = 0.60;
|
|
875
|
+
|
|
876
|
+
private: // functions
|
|
877
|
+
// Returns true iff increased usable length. Due to load factor
|
|
878
|
+
// considerations, GrowIfNeeded might call this more than once to make room
|
|
879
|
+
// for one more entry.
|
|
880
|
+
bool Grow(InsertState& state);
|
|
881
|
+
|
|
882
|
+
// Operational details of splitting a chain into two for Grow().
|
|
883
|
+
void SplitForGrow(size_t grow_home, size_t old_home, int old_shift);
|
|
884
|
+
|
|
885
|
+
// Takes an "under construction" entry and ensures it is no longer connected
|
|
886
|
+
// to its home chain (in preparaion for completing erasure and freeing the
|
|
887
|
+
// slot). Note that previous operations might have already noticed it being
|
|
888
|
+
// "under (de)construction" and removed it from its chain.
|
|
889
|
+
void Remove(HandleImpl* h);
|
|
890
|
+
|
|
891
|
+
// Try to take ownership of an entry and erase+remove it from the table.
|
|
892
|
+
// Returns true if successful. Could fail if
|
|
893
|
+
// * There are other references to the entry
|
|
894
|
+
// * Some other thread has exclusive ownership or has freed it.
|
|
895
|
+
bool TryEraseHandle(HandleImpl* h, bool holding_ref, bool mark_invisible);
|
|
896
|
+
|
|
897
|
+
// Calculates the appropriate maximum table size, for creating the memory
|
|
898
|
+
// mapping.
|
|
899
|
+
static size_t CalcMaxUsableLength(
|
|
900
|
+
size_t capacity, size_t min_avg_value_size,
|
|
901
|
+
CacheMetadataChargePolicy metadata_charge_policy);
|
|
902
|
+
|
|
903
|
+
// Shared helper function that implements removing entries from a chain
|
|
904
|
+
// with proper handling to ensure all existing data is seen even in the
|
|
905
|
+
// presence of concurrent insertions, etc. (See implementation.)
|
|
906
|
+
template <class OpData>
|
|
907
|
+
void PurgeImpl(OpData* op_data, size_t home = SIZE_MAX);
|
|
908
|
+
|
|
909
|
+
// An RAII wrapper for locking a chain of entries for removals. See
|
|
910
|
+
// implementation.
|
|
911
|
+
class ChainRewriteLock;
|
|
912
|
+
|
|
913
|
+
// Helper function for PurgeImpl while holding a ChainRewriteLock. See
|
|
914
|
+
// implementation.
|
|
915
|
+
template <class OpData>
|
|
916
|
+
void PurgeImplLocked(OpData* op_data, ChainRewriteLock& rewrite_lock,
|
|
917
|
+
size_t home);
|
|
918
|
+
|
|
919
|
+
// Update length_info_ as much as possible without waiting, given a known
|
|
920
|
+
// usable (ready for inserts and lookups) grow_home. (Previous grow_homes
|
|
921
|
+
// might not be usable yet, but we can check if they are by looking at
|
|
922
|
+
// the corresponding old home.)
|
|
923
|
+
void CatchUpLengthInfoNoWait(size_t known_usable_grow_home);
|
|
924
|
+
|
|
925
|
+
private: // data
|
|
926
|
+
// mmaped area holding handles
|
|
927
|
+
const TypedMemMapping<HandleImpl> array_;
|
|
928
|
+
|
|
929
|
+
// Metadata for table size under linear hashing.
|
|
930
|
+
//
|
|
931
|
+
// Lowest 8 bits are the minimum number of lowest hash bits to use
|
|
932
|
+
// ("min shift"). The upper 56 bits are a threshold. If that minumum number
|
|
933
|
+
// of bits taken from a hash value is < this threshold, then one more bit of
|
|
934
|
+
// hash value is taken and used.
|
|
935
|
+
//
|
|
936
|
+
// Other mechanisms (shift amounts on pointers) ensure complete availability
|
|
937
|
+
// of data already in the table even if a reader only sees a completely
|
|
938
|
+
// out-of-date version of this value. In the worst case, it could take
|
|
939
|
+
// log time to find the correct chain, but normally this value enables
|
|
940
|
+
// readers to find the correct chain on the first try.
|
|
941
|
+
//
|
|
942
|
+
// NOTES: length_info_ is only updated at the end of a Grow operation,
|
|
943
|
+
// so that waiting in Grow operations isn't done while entries are pinned
|
|
944
|
+
// for internal operation purposes. Thus, Lookup and Insert have to
|
|
945
|
+
// detect and support cases where length_info hasn't caught up to updated
|
|
946
|
+
// chains. Winning grow thread is the one that transitions
|
|
947
|
+
// head_next_with_shift from zeros. Grow threads can spin/yield wait for
|
|
948
|
+
// preconditions and postconditions to be met.
|
|
949
|
+
std::atomic<uint64_t> length_info_;
|
|
950
|
+
|
|
951
|
+
// An already-computed version of the usable length times the max load
|
|
952
|
+
// factor. Could be slightly out of date but GrowIfNeeded()/Grow() handle
|
|
953
|
+
// that internally.
|
|
954
|
+
std::atomic<size_t> occupancy_limit_;
|
|
955
|
+
|
|
956
|
+
// See explanation in AutoHyperClockTable::Evict
|
|
957
|
+
std::atomic<size_t> clock_pointer_mask_;
|
|
634
958
|
}; // class AutoHyperClockTable
|
|
635
959
|
|
|
636
960
|
// A single shard of sharded cache.
|
|
@@ -785,7 +1109,6 @@ class FixedHyperClockCache
|
|
|
785
1109
|
const std::shared_ptr<Logger>& /*info_log*/) const override;
|
|
786
1110
|
}; // class FixedHyperClockCache
|
|
787
1111
|
|
|
788
|
-
// Placeholder for future automatic HCC variant
|
|
789
1112
|
class AutoHyperClockCache
|
|
790
1113
|
#ifdef NDEBUG
|
|
791
1114
|
final
|
|
@@ -795,6 +1118,9 @@ class AutoHyperClockCache
|
|
|
795
1118
|
using BaseHyperClockCache::BaseHyperClockCache;
|
|
796
1119
|
|
|
797
1120
|
const char* Name() const override { return "AutoHyperClockCache"; }
|
|
1121
|
+
|
|
1122
|
+
void ReportProblems(
|
|
1123
|
+
const std::shared_ptr<Logger>& /*info_log*/) const override;
|
|
798
1124
|
}; // class AutoHyperClockCache
|
|
799
1125
|
|
|
800
1126
|
} // namespace clock_cache
|