@nxtedition/rocksdb 13.5.13 → 14.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +33 -2
- package/binding.gyp +2 -2
- package/chained-batch.js +9 -16
- package/deps/rocksdb/rocksdb/BUCK +18 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
- package/deps/rocksdb/rocksdb/Makefile +20 -9
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
- package/deps/rocksdb/rocksdb/db/c.cc +207 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
- package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
- package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
- package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
- package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
- package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
- package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
- package/deps/rocksdb/rocksdb/env/env.cc +12 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
- package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/options.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
- package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
- package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
- package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
- package/deps/rocksdb/rocksdb/table/format.cc +6 -12
- package/deps/rocksdb/rocksdb/table/format.h +10 -0
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
- package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
- package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
- package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
- package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
- package/deps/rocksdb/rocksdb/util/coding.h +3 -3
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
- package/deps/rocksdb/rocksdb/util/compression.h +5 -0
- package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
- package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
- package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
- package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
- package/index.js +3 -3
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/util.h +38 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
|
@@ -498,6 +498,7 @@ struct CompactionServiceJobInfo {
|
|
|
498
498
|
// the output level of the compaction.
|
|
499
499
|
int output_level;
|
|
500
500
|
|
|
501
|
+
CompactionServiceJobInfo() {}
|
|
501
502
|
CompactionServiceJobInfo(std::string db_name_, std::string db_id_,
|
|
502
503
|
std::string db_session_id_, uint32_t cf_id_,
|
|
503
504
|
std::string cf_name_, uint64_t job_id_,
|
|
@@ -622,6 +623,7 @@ struct DBOptions {
|
|
|
622
623
|
// checking for corruption, including
|
|
623
624
|
// * paranoid_file_checks
|
|
624
625
|
// * paranoid_memory_checks
|
|
626
|
+
// * memtable_veirfy_per_key_checksum_on_seek
|
|
625
627
|
// * DB::VerifyChecksum()
|
|
626
628
|
//
|
|
627
629
|
// Default: true
|
|
@@ -1360,16 +1362,11 @@ struct DBOptions {
|
|
|
1360
1362
|
// Dynamically changeable through SetDBOptions() API.
|
|
1361
1363
|
bool avoid_flush_during_shutdown = false;
|
|
1362
1364
|
|
|
1363
|
-
//
|
|
1364
|
-
//
|
|
1365
|
-
// that already exist, rather than overwriting matching keys).
|
|
1366
|
-
// Setting this option to true has the following effects:
|
|
1367
|
-
// 1) Disable some internal optimizations around SST file compression.
|
|
1368
|
-
// 2) Reserve the last level for ingested files only.
|
|
1369
|
-
// 3) Compaction will not include any file from the last level.
|
|
1370
|
-
// Note that only Universal Compaction supports allow_ingest_behind.
|
|
1371
|
-
// `num_levels` should be >= 3 if this option is turned on.
|
|
1365
|
+
// DEPRECATED: use ColumnFamilyOptions::cf_allow_ingest_behind instead.
|
|
1366
|
+
// This option might be removed in a future release.
|
|
1372
1367
|
//
|
|
1368
|
+
// See comment for `ColumnFamilyOptions::cf_allow_ingest_behind` for
|
|
1369
|
+
// detail about the option's functionality and use cases.
|
|
1373
1370
|
//
|
|
1374
1371
|
// DEFAULT: false
|
|
1375
1372
|
// Immutable.
|
|
@@ -1780,6 +1777,119 @@ struct ScanOptions {
|
|
|
1780
1777
|
: range(_start, _upper_bound) {}
|
|
1781
1778
|
};
|
|
1782
1779
|
|
|
1780
|
+
// Container for multiple scan ranges that can be used with MultiScan.
|
|
1781
|
+
// This replaces std::vector<ScanOptions> with a more efficient implementation
|
|
1782
|
+
// that can merge overlapping ranges.
|
|
1783
|
+
class MultiScanArgs {
|
|
1784
|
+
public:
|
|
1785
|
+
// Constructor that takes a comparator
|
|
1786
|
+
explicit MultiScanArgs(const Comparator* comparator) : comp_(comparator) {}
|
|
1787
|
+
|
|
1788
|
+
// Copy Constructor
|
|
1789
|
+
MultiScanArgs(const MultiScanArgs& other) {
|
|
1790
|
+
comp_ = other.comp_;
|
|
1791
|
+
original_ranges_ = other.original_ranges_;
|
|
1792
|
+
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1793
|
+
max_prefetch_size = other.max_prefetch_size;
|
|
1794
|
+
use_async_io = other.use_async_io;
|
|
1795
|
+
}
|
|
1796
|
+
MultiScanArgs(MultiScanArgs&& other) noexcept
|
|
1797
|
+
: io_coalesce_threshold(other.io_coalesce_threshold),
|
|
1798
|
+
max_prefetch_size(other.max_prefetch_size),
|
|
1799
|
+
use_async_io(other.use_async_io),
|
|
1800
|
+
comp_(other.comp_),
|
|
1801
|
+
original_ranges_(std::move(other.original_ranges_)) {}
|
|
1802
|
+
|
|
1803
|
+
MultiScanArgs& operator=(const MultiScanArgs& other) {
|
|
1804
|
+
comp_ = other.comp_;
|
|
1805
|
+
original_ranges_ = other.original_ranges_;
|
|
1806
|
+
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1807
|
+
max_prefetch_size = other.max_prefetch_size;
|
|
1808
|
+
use_async_io = other.use_async_io;
|
|
1809
|
+
return *this;
|
|
1810
|
+
}
|
|
1811
|
+
|
|
1812
|
+
MultiScanArgs& operator=(MultiScanArgs&& other) noexcept {
|
|
1813
|
+
if (this != &other) {
|
|
1814
|
+
comp_ = other.comp_;
|
|
1815
|
+
original_ranges_ = std::move(other.original_ranges_);
|
|
1816
|
+
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1817
|
+
max_prefetch_size = other.max_prefetch_size;
|
|
1818
|
+
use_async_io = other.use_async_io;
|
|
1819
|
+
}
|
|
1820
|
+
return *this;
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
void insert(const Slice& s, const Slice& b) {
|
|
1824
|
+
original_ranges_.emplace_back(s, b);
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
void insert(const Slice& s, const Slice& b,
|
|
1828
|
+
const std::optional<std::unordered_map<std::string, std::string>>&
|
|
1829
|
+
property_bag) {
|
|
1830
|
+
original_ranges_.emplace_back(s, b);
|
|
1831
|
+
original_ranges_.back().property_bag = property_bag;
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
void insert(const Slice& s) { original_ranges_.emplace_back(s); }
|
|
1835
|
+
|
|
1836
|
+
void insert(const Slice& s,
|
|
1837
|
+
const std::optional<std::unordered_map<std::string, std::string>>&
|
|
1838
|
+
property_bag) {
|
|
1839
|
+
original_ranges_.emplace_back(s);
|
|
1840
|
+
original_ranges_.back().property_bag = property_bag;
|
|
1841
|
+
}
|
|
1842
|
+
|
|
1843
|
+
size_t size() const { return original_ranges_.size(); }
|
|
1844
|
+
bool empty() const { return original_ranges_.empty(); }
|
|
1845
|
+
|
|
1846
|
+
void reserve(size_t size) { original_ranges_.reserve(size); }
|
|
1847
|
+
|
|
1848
|
+
operator std::vector<ScanOptions>*() { return &original_ranges_; }
|
|
1849
|
+
|
|
1850
|
+
operator const std::vector<ScanOptions>*() const { return &original_ranges_; }
|
|
1851
|
+
|
|
1852
|
+
~MultiScanArgs() {}
|
|
1853
|
+
|
|
1854
|
+
const std::vector<ScanOptions>& GetScanRanges() const {
|
|
1855
|
+
return original_ranges_;
|
|
1856
|
+
}
|
|
1857
|
+
|
|
1858
|
+
const Comparator* GetComparator() const { return comp_; }
|
|
1859
|
+
|
|
1860
|
+
// Copies the configurations (excluding actual scan ranges) from another
|
|
1861
|
+
// MultiScanArgs.
|
|
1862
|
+
void CopyConfigFrom(const MultiScanArgs& other) {
|
|
1863
|
+
io_coalesce_threshold = other.io_coalesce_threshold;
|
|
1864
|
+
max_prefetch_size = other.max_prefetch_size;
|
|
1865
|
+
use_async_io = other.use_async_io;
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
uint64_t io_coalesce_threshold = 16 << 10; // 16KB by default
|
|
1869
|
+
|
|
1870
|
+
// Maximum size (in bytes) for the data blocks loaded by a MultiScan.
|
|
1871
|
+
// This limits the amount of I/O and memory usage by pinned data blocks.
|
|
1872
|
+
//
|
|
1873
|
+
// When set to 0 (the default), there is no limit. When the limit is reached,
|
|
1874
|
+
// the iterator will start returning Status::PrefetchLimitReached().
|
|
1875
|
+
//
|
|
1876
|
+
// Note that prefetching happens only once in Prepare(), which is different
|
|
1877
|
+
// from ReadOptions::readahead_size, which applies any time the iterator does
|
|
1878
|
+
// I/O.
|
|
1879
|
+
// Note that this limit is per file and applies to compressed block size.
|
|
1880
|
+
uint64_t max_prefetch_size = 0;
|
|
1881
|
+
|
|
1882
|
+
// Enable async I/O for multi-scan operations
|
|
1883
|
+
// When true, BlockBasedTableIterator will use ReadAsync() for reading blocks
|
|
1884
|
+
// When false, it will use synchronous MultiRead().
|
|
1885
|
+
bool use_async_io = false;
|
|
1886
|
+
|
|
1887
|
+
private:
|
|
1888
|
+
// The comparator used for ordering ranges
|
|
1889
|
+
const Comparator* comp_;
|
|
1890
|
+
std::vector<ScanOptions> original_ranges_;
|
|
1891
|
+
};
|
|
1892
|
+
|
|
1783
1893
|
// Options that control read operations
|
|
1784
1894
|
struct ReadOptions {
|
|
1785
1895
|
// *** BEGIN options relevant to point lookups as well as scans ***
|
|
@@ -2344,7 +2454,47 @@ struct CompactRangeOptions {
|
|
|
2344
2454
|
double blob_garbage_collection_age_cutoff = -1;
|
|
2345
2455
|
};
|
|
2346
2456
|
|
|
2347
|
-
// IngestExternalFileOptions
|
|
2457
|
+
// IngestExternalFileOptions setting guide:
|
|
2458
|
+
//
|
|
2459
|
+
// The options in IngestExternalFileOptions interact in complex ways depending
|
|
2460
|
+
// on the source and overlap of SST files. Below is a summary of recommended
|
|
2461
|
+
// non-default settings for common use cases:
|
|
2462
|
+
//
|
|
2463
|
+
// 1. Ingesting only SST writer generated non-overlapping SSTs that are not
|
|
2464
|
+
// expected to overlap with existing data:
|
|
2465
|
+
// - Optionally set fail_if_not_bottommost_level = true to enforce placement
|
|
2466
|
+
// in the last level. This is better paird with SST partitioner to guarantee
|
|
2467
|
+
// that there are no existing file with keys across the ingesting key range.
|
|
2468
|
+
// - Set allow_blocking_flush to false: Not expecting to overlap with
|
|
2469
|
+
// memtable and cause a flush.
|
|
2470
|
+
// - If snapshot consistency is not expected, set snapshot_consistency to
|
|
2471
|
+
// false and allow_global_seqno to false. allow_global_seqno = false will
|
|
2472
|
+
// fail ingestion if any input file overlap with each other.
|
|
2473
|
+
//
|
|
2474
|
+
// 2. Ingesting SST writer generated overlapping SSTs:
|
|
2475
|
+
// - order files with older updates first, newer overwrites later.
|
|
2476
|
+
// - Set allow_global_seqno = true since newer files need to be assigned
|
|
2477
|
+
// larger sequence numbers.
|
|
2478
|
+
//
|
|
2479
|
+
// 3. Ingesting DB generated SSTs: overlapping with target CF data is not
|
|
2480
|
+
// allowed. Input files are allowed to contain both DB generated files and SST
|
|
2481
|
+
// file writer generated files. They will all be treated as DB generated.
|
|
2482
|
+
// - Set allow_db_generated_files = true.
|
|
2483
|
+
// - Set snapshot_consistency = false: snapshot consistency requires
|
|
2484
|
+
// assigning higher sequence number to ingested files. DB generated files
|
|
2485
|
+
// don't support global seqno assignment yet.
|
|
2486
|
+
// - Set allow_blocking_flush to false: Not expecting to overlap with
|
|
2487
|
+
// memtable and cause a flush.
|
|
2488
|
+
// - If the source live DB is running, set link_files = true instead of
|
|
2489
|
+
// move_files.
|
|
2490
|
+
// 3a) SST files are non-overlapping and all keys have seqno 0: e.g., a
|
|
2491
|
+
// temporary RocksDB instance used to sort some data, and compacts all
|
|
2492
|
+
// data into the last level before ingestion.
|
|
2493
|
+
// - Optionally set fail_if_not_bottommost_level = true to enforce placement
|
|
2494
|
+
// in the last level.
|
|
2495
|
+
// 3b) SST files are overlapping, e.g. ingesting files from one CF to another.
|
|
2496
|
+
// - Ensure older updates are ordered first and newer updates are ordered
|
|
2497
|
+
// later. See more in option comment for allow_db_generated_files.
|
|
2348
2498
|
struct IngestExternalFileOptions {
|
|
2349
2499
|
// Can be set to true to move the files instead of copying them.
|
|
2350
2500
|
// The input files will be unlinked after successful ingestion.
|
|
@@ -2361,10 +2511,20 @@ struct IngestExternalFileOptions {
|
|
|
2361
2511
|
// If set to false, an ingested file keys could appear in existing snapshots
|
|
2362
2512
|
// that where created before the file was ingested.
|
|
2363
2513
|
bool snapshot_consistency = true;
|
|
2364
|
-
//
|
|
2514
|
+
// Enables assiging a global sequence number to each ingested file, i.e.,
|
|
2515
|
+
// all keys in the ingested file will be treated as having this seqno.
|
|
2516
|
+
// If set to false, we will use the sequence numbers in the ingested file
|
|
2517
|
+
// as is, and IngestExternalFile() will fail if the ingested key range
|
|
2365
2518
|
// overlaps with existing keys or tombstones or output of ongoing compaction
|
|
2366
|
-
//
|
|
2367
|
-
//
|
|
2519
|
+
// in the CF (the conditions under which a global seqno must be assigned to
|
|
2520
|
+
// the ingested file).
|
|
2521
|
+
// If the ingested files overlap with each other, we need to assign global
|
|
2522
|
+
// sequence to the ingested files and this option needs to be enabled. One
|
|
2523
|
+
// exception to this is when ingesting DB generated SST files (see option
|
|
2524
|
+
// allow_db_generated_files below). DB generated files do not support
|
|
2525
|
+
// global seqno assignment and can be ingested even if they overlap with
|
|
2526
|
+
// each other. This option has no effect when allow_db_generated_files is
|
|
2527
|
+
// enabled.
|
|
2368
2528
|
bool allow_global_seqno = true;
|
|
2369
2529
|
// Normally (true), IngestExternalFile() will trigger and block for flushing
|
|
2370
2530
|
// memtable(s) if there is overlap between ingested files and memtable(s). If
|
|
@@ -2376,8 +2536,8 @@ struct IngestExternalFileOptions {
|
|
|
2376
2536
|
// to be skipped rather than overwriting existing data under that key.
|
|
2377
2537
|
// Use case: back-fill of some historical data in the database without
|
|
2378
2538
|
// over-writing existing newer version of data.
|
|
2379
|
-
// This option could only be used if the
|
|
2380
|
-
// with
|
|
2539
|
+
// This option could only be used if the CF has been running
|
|
2540
|
+
// with cf_allow_ingest_behind=true since CF creation (or before any write).
|
|
2381
2541
|
// All files will be ingested at the bottommost level with seqno=0.
|
|
2382
2542
|
bool ingest_behind = false;
|
|
2383
2543
|
// DEPRECATED - Set to true if you would like to write global_seqno to
|
|
@@ -2430,18 +2590,53 @@ struct IngestExternalFileOptions {
|
|
|
2430
2590
|
//
|
|
2431
2591
|
// XXX: "bottommost" is obsolete/confusing terminology to refer to last level
|
|
2432
2592
|
bool fail_if_not_bottommost_level = false;
|
|
2433
|
-
// EXPERIMENTAL
|
|
2434
|
-
//
|
|
2593
|
+
// EXPERIMENTAL, SUBJECT TO CHANGE
|
|
2594
|
+
//
|
|
2595
|
+
// Enables special mode of ingestion that allows files generated by a live DB,
|
|
2596
|
+
// instead of SstFileWriter. When true:
|
|
2435
2597
|
// - Allows files to be ingested when their cf_id doesn't match the CF they
|
|
2436
2598
|
// are being ingested into.
|
|
2599
|
+
// - Allows files with any sequence numbers to be ingested.
|
|
2600
|
+
// - Original sequence numbers are preserved (no reassignment).
|
|
2601
|
+
//
|
|
2437
2602
|
// REQUIREMENTS:
|
|
2438
|
-
// - Ingested files must
|
|
2439
|
-
//
|
|
2440
|
-
//
|
|
2441
|
-
//
|
|
2442
|
-
//
|
|
2443
|
-
//
|
|
2444
|
-
//
|
|
2603
|
+
// - Ingested files must NOT overlap with any existing data in the DB. Since
|
|
2604
|
+
// no sequence number reassignment is performed on db generated files.
|
|
2605
|
+
// Ingestion will fail if any overlap is detected. However, input files
|
|
2606
|
+
// are allowed to overlap with each other when this option is enabled. This
|
|
2607
|
+
// is useful when ingesting multiple levels of files from a CF, where
|
|
2608
|
+
// levels naturally overlap with each other.
|
|
2609
|
+
// - CAUTION: If input files overlap with each other, then for any given user
|
|
2610
|
+
// key appearing in multiple files, earlier files MUST have smaller sequence
|
|
2611
|
+
// numbers than later files. Later files will be placed at a higher level
|
|
2612
|
+
// (smaller level number). This is to ensure the LSM invariant where for
|
|
2613
|
+
// the same key, recent updates are in higher levels. This means that
|
|
2614
|
+
// if you are ingesting files from multiple levels of a CF, you should
|
|
2615
|
+
// put files from lower levels first, and files from higher levels later.
|
|
2616
|
+
// Example for getting files from a CF for ingestion:
|
|
2617
|
+
//
|
|
2618
|
+
// ColumnFamilyMetaData cf_meta;
|
|
2619
|
+
// from_db->GetColumnFamilyMetaData(from_cf, &cf_meta);
|
|
2620
|
+
// // iterate in reverse to start from lowest level
|
|
2621
|
+
// for (auto level_meta = cf_meta.levels.rbegin();
|
|
2622
|
+
// level_meta != cf_meta.levels.rend(); ++level_meta) {
|
|
2623
|
+
// // L0 files need to be added in reverse order so we iterate in reverse
|
|
2624
|
+
// // within a level too
|
|
2625
|
+
// for (auto file_meta = level_meta->files.rbegin();
|
|
2626
|
+
// file_meta != level_meta->files.rend(); ++file_meta) {
|
|
2627
|
+
// // Add file for ingestion
|
|
2628
|
+
// }
|
|
2629
|
+
// }
|
|
2630
|
+
//
|
|
2631
|
+
// WARNING: Violating the sequence number ordering requirement will cause
|
|
2632
|
+
// LSM invariant violations and may lead to incorrect reads or data
|
|
2633
|
+
// corruption.
|
|
2634
|
+
// - If you would like to enforce that the ingested files do not overlap
|
|
2635
|
+
// with each other, you can set `fail_if_not_bottommost_level` to true.
|
|
2636
|
+
// If ingested files overlap with each other, some file will be placed
|
|
2637
|
+
// above Lmax, failing the ingestion if the option is set.
|
|
2638
|
+
// - `write_global_seqno` must be false (sequence numbers cannot be
|
|
2639
|
+
// reassigned).
|
|
2445
2640
|
bool allow_db_generated_files = false;
|
|
2446
2641
|
|
|
2447
2642
|
// Controls whether data and metadata blocks (e.g. index, filter) read during
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
//
|
|
3
|
+
// This source code is licensed under both the GPLv2 (found in the
|
|
4
|
+
// COPYING file in the root directory) and Apache 2.0 License
|
|
5
|
+
// (found in the LICENSE.Apache file in the root directory).
|
|
6
|
+
|
|
7
|
+
#pragma once
|
|
8
|
+
|
|
9
|
+
#include "rocksdb/rocksdb_namespace.h"
|
|
10
|
+
|
|
11
|
+
namespace ROCKSDB_NAMESPACE {
|
|
12
|
+
|
|
13
|
+
int point_lock_bench_tool(int argc, char** argv);
|
|
14
|
+
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -33,8 +33,8 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
33
33
|
// Wait() or SecondaryCache::WaitAll() may be skipped if IsReady() happens to
|
|
34
34
|
// return true, but (depending on the implementation) IsReady() might never
|
|
35
35
|
// return true without Wait() or SecondaryCache::WaitAll(). After the handle
|
|
36
|
-
// is known ready, calling Value() is required to avoid
|
|
37
|
-
// of a cache hit.
|
|
36
|
+
// is known ready, calling Value() and taking ownership is required to avoid
|
|
37
|
+
// a memory leak in case of a cache hit.
|
|
38
38
|
class SecondaryCacheResultHandle {
|
|
39
39
|
public:
|
|
40
40
|
virtual ~SecondaryCacheResultHandle() = default;
|
|
@@ -443,10 +443,14 @@ enum Tickers : uint32_t {
|
|
|
443
443
|
// Tiered storage related statistics
|
|
444
444
|
HOT_FILE_READ_BYTES,
|
|
445
445
|
WARM_FILE_READ_BYTES,
|
|
446
|
+
COOL_FILE_READ_BYTES,
|
|
446
447
|
COLD_FILE_READ_BYTES,
|
|
448
|
+
ICE_FILE_READ_BYTES,
|
|
447
449
|
HOT_FILE_READ_COUNT,
|
|
448
450
|
WARM_FILE_READ_COUNT,
|
|
451
|
+
COOL_FILE_READ_COUNT,
|
|
449
452
|
COLD_FILE_READ_COUNT,
|
|
453
|
+
ICE_FILE_READ_COUNT,
|
|
450
454
|
|
|
451
455
|
// Last level and non-last level read statistics
|
|
452
456
|
LAST_LEVEL_READ_BYTES,
|
|
@@ -542,6 +546,9 @@ enum Tickers : uint32_t {
|
|
|
542
546
|
// TransactionOptions::large_txn_commit_optimize_threshold.
|
|
543
547
|
NUMBER_WBWI_INGEST,
|
|
544
548
|
|
|
549
|
+
// Failure to load the UDI during SST table open
|
|
550
|
+
SST_USER_DEFINED_INDEX_LOAD_FAIL_COUNT,
|
|
551
|
+
|
|
545
552
|
TICKER_ENUM_MAX
|
|
546
553
|
};
|
|
547
554
|
|
|
@@ -115,6 +115,8 @@ class Status {
|
|
|
115
115
|
kIOFenced = 14,
|
|
116
116
|
kMergeOperatorFailed = 15,
|
|
117
117
|
kMergeOperandThresholdExceeded = 16,
|
|
118
|
+
kPrefetchLimitReached = 17,
|
|
119
|
+
kNotExpectedCodePath = 18,
|
|
118
120
|
kMaxSubCode
|
|
119
121
|
};
|
|
120
122
|
|
|
@@ -318,12 +320,19 @@ class Status {
|
|
|
318
320
|
|
|
319
321
|
static Status LockLimit() { return Status(kAborted, kLockLimit); }
|
|
320
322
|
|
|
323
|
+
static Status PrefetchLimitReached() {
|
|
324
|
+
return Status(kIncomplete, kPrefetchLimitReached);
|
|
325
|
+
}
|
|
326
|
+
|
|
321
327
|
// Returns true iff the status indicates success.
|
|
322
328
|
bool ok() const {
|
|
323
329
|
MarkChecked();
|
|
324
330
|
return code() == kOk;
|
|
325
331
|
}
|
|
326
332
|
|
|
333
|
+
// Assert the status is OK in debug mode
|
|
334
|
+
void AssertOK() const { assert(ok()); }
|
|
335
|
+
|
|
327
336
|
// Returns true iff the status indicates success *with* something
|
|
328
337
|
// overwritten
|
|
329
338
|
bool IsOkOverwritten() const {
|
|
@@ -486,6 +495,13 @@ class Status {
|
|
|
486
495
|
return (code() == kIOError) && (subcode() == kIOFenced);
|
|
487
496
|
}
|
|
488
497
|
|
|
498
|
+
// Returns true iff the status indicates prefetch limit reached during
|
|
499
|
+
// MultiScan.
|
|
500
|
+
bool IsPrefetchLimitReached() const {
|
|
501
|
+
MarkChecked();
|
|
502
|
+
return (code() == kIncomplete) && (subcode() == kPrefetchLimitReached);
|
|
503
|
+
}
|
|
504
|
+
|
|
489
505
|
// Return a string representation of this status suitable for printing.
|
|
490
506
|
// Returns the string "OK" for success.
|
|
491
507
|
std::string ToString() const;
|
|
@@ -440,10 +440,13 @@ struct BlockBasedTableOptions {
|
|
|
440
440
|
// versions of RocksDB able to read partitioned filters are able to read
|
|
441
441
|
// decoupled partitioned filters.)
|
|
442
442
|
//
|
|
443
|
-
// decouple_partitioned_filters =
|
|
444
|
-
//
|
|
445
|
-
//
|
|
446
|
-
|
|
443
|
+
// decouple_partitioned_filters = true is the new default. This option is now
|
|
444
|
+
// DEPRECATED and might be ignored and/or removed in a future release.
|
|
445
|
+
//
|
|
446
|
+
// NOTE: decouple_partitioned_filters = false with partition_filters = true
|
|
447
|
+
// disables parallel compression (CompressionOptions::parallel_threads
|
|
448
|
+
// sanitized to 1).
|
|
449
|
+
bool decouple_partitioned_filters = true;
|
|
447
450
|
|
|
448
451
|
// Option to generate Bloom/Ribbon filters that minimize memory
|
|
449
452
|
// internal fragmentation.
|
|
@@ -501,8 +504,17 @@ struct BlockBasedTableOptions {
|
|
|
501
504
|
// If non-nullptr, use the specified factory to build user-defined index.
|
|
502
505
|
// This allows users to define their own index format and build the index
|
|
503
506
|
// during table building.
|
|
507
|
+
//
|
|
508
|
+
// NOTE: UserDefinedIndexFactory currently disables parallel compression
|
|
509
|
+
// (CompressionOptions::parallel_threads sanitized to 1).
|
|
504
510
|
std::shared_ptr<UserDefinedIndexFactory> user_defined_index_factory = nullptr;
|
|
505
511
|
|
|
512
|
+
// EXPERIMENTAL
|
|
513
|
+
//
|
|
514
|
+
// Return an error Status if a user_defined_index_factory is configured,
|
|
515
|
+
// but there's no corresponding UDI block in the SST file being opened.
|
|
516
|
+
bool fail_if_no_udi_on_open = false;
|
|
517
|
+
|
|
506
518
|
// If true, place whole keys in the filter (not just prefixes).
|
|
507
519
|
// This must generally be true for gets to be efficient.
|
|
508
520
|
bool whole_key_filtering = true;
|
|
@@ -76,6 +76,7 @@ struct TablePropertiesNames {
|
|
|
76
76
|
static const std::string kTailStartOffset;
|
|
77
77
|
static const std::string kUserDefinedTimestampsPersisted;
|
|
78
78
|
static const std::string kKeyLargestSeqno;
|
|
79
|
+
static const std::string kKeySmallestSeqno;
|
|
79
80
|
};
|
|
80
81
|
|
|
81
82
|
// `TablePropertiesCollector` provides the mechanism for users to collect
|
|
@@ -220,6 +221,8 @@ struct TableProperties {
|
|
|
220
221
|
uint64_t orig_file_number = 0;
|
|
221
222
|
// the total size of all data blocks.
|
|
222
223
|
uint64_t data_size = 0;
|
|
224
|
+
// the total uncompressed size of all data blocks (since RocksDB 10.7)
|
|
225
|
+
uint64_t uncompressed_data_size = 0;
|
|
223
226
|
// the size of index block.
|
|
224
227
|
uint64_t index_size = 0;
|
|
225
228
|
// Total number of index partitions if kTwoLevelIndexSearch is used
|
|
@@ -307,6 +310,16 @@ struct TableProperties {
|
|
|
307
310
|
// table is empty).
|
|
308
311
|
uint64_t key_largest_seqno = UINT64_MAX;
|
|
309
312
|
|
|
313
|
+
bool HasKeyLargestSeqno() const { return key_largest_seqno != UINT64_MAX; }
|
|
314
|
+
|
|
315
|
+
// The smallest sequence number of keys in this file.
|
|
316
|
+
// UINT64_MAX means unknown.
|
|
317
|
+
// Only written to properties block if known (should be known unless the
|
|
318
|
+
// table is empty).
|
|
319
|
+
uint64_t key_smallest_seqno = UINT64_MAX;
|
|
320
|
+
|
|
321
|
+
bool HasKeySmallestSeqno() const { return key_smallest_seqno != UINT64_MAX; }
|
|
322
|
+
|
|
310
323
|
// DB identity
|
|
311
324
|
// db_id is an identifier generated the first time the DB is created
|
|
312
325
|
// If DB identity is unset or unassigned, `db_id` will be an empty string.
|
|
@@ -118,7 +118,11 @@ enum class Temperature : uint8_t {
|
|
|
118
118
|
kUnknown = 0,
|
|
119
119
|
kHot = 0x04,
|
|
120
120
|
kWarm = 0x08,
|
|
121
|
+
kCool = 0x0A,
|
|
121
122
|
kCold = 0x0C,
|
|
123
|
+
kIce = 0x10,
|
|
124
|
+
// XXX: this is mis-named. It is instead an invalid temperature beyond the
|
|
125
|
+
// rest
|
|
122
126
|
kLastTemperature,
|
|
123
127
|
};
|
|
124
128
|
|
|
@@ -144,9 +144,7 @@ class CompactionOptionsUniversal {
|
|
|
144
144
|
incremental(false),
|
|
145
145
|
reduce_file_locking(false) {}
|
|
146
146
|
|
|
147
|
-
#if __cplusplus >= 202002L
|
|
148
147
|
bool operator==(const CompactionOptionsUniversal& rhs) const = default;
|
|
149
|
-
#endif
|
|
150
148
|
};
|
|
151
149
|
|
|
152
150
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -27,6 +27,10 @@ inline const std::string kUserDefinedIndexPrefix =
|
|
|
27
27
|
// It allows users to define their own index format and build custom
|
|
28
28
|
// indexes during table building. Currently, only a monolithic index
|
|
29
29
|
// block is supported (no partitioned index).
|
|
30
|
+
//
|
|
31
|
+
// This is currently supported only for a restricted set of use cases. The
|
|
32
|
+
// CF must be ingest only, and only files containing Puts generated by
|
|
33
|
+
// SstFileWriter are supported.
|
|
30
34
|
|
|
31
35
|
// The interface for building user-defined index.
|
|
32
36
|
class UserDefinedIndexBuilder {
|
|
@@ -51,6 +55,10 @@ class UserDefinedIndexBuilder {
|
|
|
51
55
|
// The previous index entry key and the new index entry key cover
|
|
52
56
|
// all the keys in the data block associated with the new index entry.
|
|
53
57
|
//
|
|
58
|
+
// The last_key_in_current_block and first_key_in_next_block will be user
|
|
59
|
+
// keys, i.e the user key string, and optionally the user timestamp if one
|
|
60
|
+
// is configured, without a sequence number suffix.
|
|
61
|
+
//
|
|
54
62
|
// Called before the OnKeyAdded() call for first_key_in_next_block.
|
|
55
63
|
// @last_key_in_current_block: The last key in the current data block
|
|
56
64
|
// @first_key_in_next_block: it will be nullptr if the entry being added is
|
|
@@ -72,6 +80,9 @@ class UserDefinedIndexBuilder {
|
|
|
72
80
|
// override OnKeyAdded() if they need to collect additional information.
|
|
73
81
|
// The type argument indicates whether the value is a full value or partial.
|
|
74
82
|
// At the moment, only full values are supported.
|
|
83
|
+
//
|
|
84
|
+
// The key will be a user key. RocksDB guarantees that there will only be
|
|
85
|
+
// one entry for each key in the file/index.
|
|
75
86
|
virtual void OnKeyAdded(const Slice& /*key*/, ValueType /*type*/,
|
|
76
87
|
const Slice& /*value*/) {}
|
|
77
88
|
|
|
@@ -100,6 +111,14 @@ class UserDefinedIndexIterator {
|
|
|
100
111
|
// termination criteria, kInbound if the data block is definitely fully
|
|
101
112
|
// within bounds, or kUnknown if the data block could be partially
|
|
102
113
|
// within bounds.
|
|
114
|
+
// The UDI implementation needs to be careful about returning kOutOfBound.
|
|
115
|
+
// If a limit key is specified in ScanOptions, an implementation that
|
|
116
|
+
// does not store the first key in the block for the corresponding index
|
|
117
|
+
// entry cannot reliably determine if the block is out of bounds. It must
|
|
118
|
+
// compare against the previous index key to determine if the current block
|
|
119
|
+
// is out of bounds w.r.t the limit. Other termination criteria (specified
|
|
120
|
+
// in property_bag) may cause the scan to terminate earlier, in which case
|
|
121
|
+
// kOutOfBound can be returned earlier.
|
|
103
122
|
virtual Status SeekAndGetResult(const Slice& target,
|
|
104
123
|
IterateResult* result) = 0;
|
|
105
124
|
|
|
@@ -125,11 +144,22 @@ class UserDefinedIndexReader {
|
|
|
125
144
|
virtual size_t ApproximateMemoryUsage() const = 0;
|
|
126
145
|
};
|
|
127
146
|
|
|
147
|
+
// Options for user defined index
|
|
148
|
+
struct UserDefinedIndexOption {
|
|
149
|
+
const Comparator* comparator = BytewiseComparator();
|
|
150
|
+
};
|
|
151
|
+
|
|
128
152
|
// Factory for creating user-defined index builders.
|
|
129
153
|
class UserDefinedIndexFactory : public Customizable {
|
|
130
154
|
public:
|
|
131
155
|
virtual ~UserDefinedIndexFactory() = default;
|
|
132
156
|
|
|
157
|
+
static const char* Type() { return "UserDefinedIndexFactory"; }
|
|
158
|
+
|
|
159
|
+
static Status CreateFromString(
|
|
160
|
+
const ConfigOptions& config_options, const std::string& value,
|
|
161
|
+
std::shared_ptr<UserDefinedIndexFactory>* factory);
|
|
162
|
+
|
|
133
163
|
// Create a new builder for user-defined index.
|
|
134
164
|
virtual UserDefinedIndexBuilder* NewBuilder() const = 0;
|
|
135
165
|
|
|
@@ -137,6 +167,21 @@ class UserDefinedIndexFactory : public Customizable {
|
|
|
137
167
|
// block
|
|
138
168
|
virtual std::unique_ptr<UserDefinedIndexReader> NewReader(
|
|
139
169
|
Slice& index_block) const = 0;
|
|
170
|
+
|
|
171
|
+
// New API for allowing customized comparator
|
|
172
|
+
virtual Status NewBuilder(
|
|
173
|
+
const UserDefinedIndexOption& /*option*/,
|
|
174
|
+
std::unique_ptr<UserDefinedIndexBuilder>& builder) const {
|
|
175
|
+
builder.reset(NewBuilder());
|
|
176
|
+
return Status::OK();
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
virtual Status NewReader(
|
|
180
|
+
const UserDefinedIndexOption& /*option*/, Slice& index_block,
|
|
181
|
+
std::unique_ptr<UserDefinedIndexReader>& reader) const {
|
|
182
|
+
reader = NewReader(index_block);
|
|
183
|
+
return Status::OK();
|
|
184
|
+
};
|
|
140
185
|
};
|
|
141
186
|
|
|
142
187
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -90,7 +90,7 @@ class CacheDumper {
|
|
|
90
90
|
public:
|
|
91
91
|
virtual ~CacheDumper() = default;
|
|
92
92
|
// Only dump the blocks in the block cache that belong to the DBs in this list
|
|
93
|
-
virtual Status SetDumpFilter(std::vector<DB
|
|
93
|
+
virtual Status SetDumpFilter(const std::vector<DB*>& db_list) {
|
|
94
94
|
(void)db_list;
|
|
95
95
|
return Status::NotSupported("SetDumpFilter is not supported");
|
|
96
96
|
}
|
|
@@ -292,7 +292,7 @@ class StackableDB : public DB {
|
|
|
292
292
|
using DB::NewMultiScan;
|
|
293
293
|
std::unique_ptr<MultiScan> NewMultiScan(
|
|
294
294
|
const ReadOptions& opts, ColumnFamilyHandle* column_family,
|
|
295
|
-
const
|
|
295
|
+
const MultiScanArgs& scan_opts) override {
|
|
296
296
|
return db_->NewMultiScan(opts, column_family, scan_opts);
|
|
297
297
|
}
|
|
298
298
|
|
|
@@ -653,7 +653,12 @@ class Transaction {
|
|
|
653
653
|
// Change the value of TransactionOptions.lock_timeout (in milliseconds) for
|
|
654
654
|
// this transaction.
|
|
655
655
|
// Has no effect on OptimisticTransactions.
|
|
656
|
-
virtual void SetLockTimeout(int64_t
|
|
656
|
+
virtual void SetLockTimeout(int64_t timeout_ms) = 0;
|
|
657
|
+
|
|
658
|
+
// Change the value of deadlock_timeout (in milliseconds) for this
|
|
659
|
+
// transaction.
|
|
660
|
+
// Has no effect on OptimisticTransactions.
|
|
661
|
+
virtual void SetDeadlockTimeout(int64_t timeout_ms) = 0;
|
|
657
662
|
|
|
658
663
|
// Return the WriteOptions that will be used during Commit()
|
|
659
664
|
virtual WriteOptions* GetWriteOptions() = 0;
|
|
@@ -217,6 +217,11 @@ struct TransactionDBOptions {
|
|
|
217
217
|
// Other value means the user provides a custom lock manager.
|
|
218
218
|
std::shared_ptr<LockManagerHandle> lock_mgr_handle;
|
|
219
219
|
|
|
220
|
+
// EXPERIMENTAL
|
|
221
|
+
//
|
|
222
|
+
// Flag to enable/disable the per key point lock manager.
|
|
223
|
+
bool use_per_key_point_lock_mgr = false;
|
|
224
|
+
|
|
220
225
|
// If true, the TransactionDB implementation might skip concurrency control
|
|
221
226
|
// unless it is overridden by TransactionOptions or
|
|
222
227
|
// TransactionDBWriteOptimizations. This can be used in conjunction with
|
|
@@ -319,6 +324,22 @@ struct TransactionOptions {
|
|
|
319
324
|
// If negative, TransactionDBOptions::transaction_lock_timeout will be used.
|
|
320
325
|
int64_t lock_timeout = -1;
|
|
321
326
|
|
|
327
|
+
// Timeout in microseconds before perform dead lock detection.
|
|
328
|
+
// If 0, deadlock detection will be performed immediately.
|
|
329
|
+
//
|
|
330
|
+
// To optimize performance, this parameter could be tuned.
|
|
331
|
+
//
|
|
332
|
+
// When deadlock happens very frequently, deadlock timeout should be set to 0,
|
|
333
|
+
// so deadlock will be detected immediately.
|
|
334
|
+
//
|
|
335
|
+
// When deadlock happen very rarely, this timeout could be turned to be
|
|
336
|
+
// slightly longer than the typical transaction execution time, so that
|
|
337
|
+
// transaction will be waked up to take the lock before this timeout, which
|
|
338
|
+
// will allow the transaction to save the CPU time on deadlock detection.
|
|
339
|
+
//
|
|
340
|
+
// Deadlock timeout is always smaller than lock_timeout.
|
|
341
|
+
int64_t deadlock_timeout_us = 500;
|
|
342
|
+
|
|
322
343
|
// Expiration duration in milliseconds. If non-negative, transactions that
|
|
323
344
|
// last longer than this many milliseconds will fail to commit. If not set,
|
|
324
345
|
// a forgotten transaction that is never committed, rolled back, or deleted
|