@nxtedition/rocksdb 8.1.17 → 8.2.0-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +32 -2
- package/binding.gyp +8 -0
- package/deps/liburing/liburing.gyp +20 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +4 -0
- package/deps/rocksdb/rocksdb/TARGETS +7 -0
- package/deps/rocksdb/rocksdb/cache/cache.cc +43 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +8 -5
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +12 -48
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +26 -18
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +5 -62
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +119 -44
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +34 -29
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +148 -209
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +118 -284
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +23 -71
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +351 -392
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +5 -2
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +296 -0
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.h +52 -0
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +22 -19
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +56 -20
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +3 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +3 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +19 -25
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +216 -0
- package/deps/rocksdb/rocksdb/db/c.cc +90 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +8 -7
- package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +24 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +17 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +18 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +3 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +245 -302
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +13 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +5 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +75 -15
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +2 -3
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -5
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +91 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +5 -12
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +16 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +47 -24
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +4 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -3
- package/deps/rocksdb/rocksdb/db/db_iter.cc +28 -29
- package/deps/rocksdb/rocksdb/db/db_iter.h +0 -3
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +176 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +391 -2
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +26 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +13 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +3 -1
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +0 -6
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +3 -0
- package/deps/rocksdb/rocksdb/db/forward_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +4 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +68 -40
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +3 -3
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +115 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +169 -72
- package/deps/rocksdb/rocksdb/db/internal_stats.h +36 -7
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +4 -0
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +151 -0
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.cc +47 -16
- package/deps/rocksdb/rocksdb/db/range_del_aggregator.h +10 -8
- package/deps/rocksdb/rocksdb/db/range_del_aggregator_test.cc +91 -93
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_set.cc +30 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +1 -0
- package/deps/rocksdb/rocksdb/db/write_stall_stats.cc +179 -0
- package/deps/rocksdb/rocksdb/db/write_stall_stats.h +47 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +109 -7
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +147 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +31 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +22 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +4 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +42 -59
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +7 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/expected_state.cc +6 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +6 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +127 -36
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +8 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +35 -0
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +29 -8
- package/deps/rocksdb/rocksdb/file/file_util.cc +14 -10
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +183 -63
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_cache.h +159 -66
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +52 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +134 -73
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +46 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +0 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/perf_context.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +3 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +18 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +28 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +39 -0
- package/deps/rocksdb/rocksdb/monitoring/perf_context.cc +5 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +9 -1
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +17 -7
- package/deps/rocksdb/rocksdb/port/win/env_win.h +1 -0
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +38 -34
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +11 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +5 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +126 -132
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +0 -16
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +3 -4
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +370 -0
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +44 -0
- package/deps/rocksdb/rocksdb/table/get_context.cc +4 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +555 -267
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +10 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +113 -70
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.cc +96 -0
- package/deps/rocksdb/rocksdb/test_util/secondary_cache_test_util.h +117 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc +5 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +3 -3
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +9 -2
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +5 -1
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +11 -0
- package/deps/rocksdb/rocksdb.gyp +7 -1
- package/package.json +1 -1
- package/prebuilds/linux-x64/node.napi.node +0 -0
|
@@ -76,6 +76,8 @@ typedef struct rocksdb_backup_engine_options_t rocksdb_backup_engine_options_t;
|
|
|
76
76
|
typedef struct rocksdb_restore_options_t rocksdb_restore_options_t;
|
|
77
77
|
typedef struct rocksdb_memory_allocator_t rocksdb_memory_allocator_t;
|
|
78
78
|
typedef struct rocksdb_lru_cache_options_t rocksdb_lru_cache_options_t;
|
|
79
|
+
typedef struct rocksdb_hyper_clock_cache_options_t
|
|
80
|
+
rocksdb_hyper_clock_cache_options_t;
|
|
79
81
|
typedef struct rocksdb_cache_t rocksdb_cache_t;
|
|
80
82
|
typedef struct rocksdb_compactionfilter_t rocksdb_compactionfilter_t;
|
|
81
83
|
typedef struct rocksdb_compactionfiltercontext_t
|
|
@@ -597,13 +599,14 @@ extern ROCKSDB_LIBRARY_API void rocksdb_release_snapshot(
|
|
|
597
599
|
extern ROCKSDB_LIBRARY_API char* rocksdb_property_value(rocksdb_t* db,
|
|
598
600
|
const char* propname);
|
|
599
601
|
/* returns 0 on success, -1 otherwise */
|
|
600
|
-
int rocksdb_property_int(rocksdb_t* db,
|
|
601
|
-
|
|
602
|
+
extern ROCKSDB_LIBRARY_API int rocksdb_property_int(rocksdb_t* db,
|
|
603
|
+
const char* propname,
|
|
604
|
+
uint64_t* out_val);
|
|
602
605
|
|
|
603
606
|
/* returns 0 on success, -1 otherwise */
|
|
604
|
-
int rocksdb_property_int_cf(
|
|
605
|
-
|
|
606
|
-
|
|
607
|
+
extern ROCKSDB_LIBRARY_API int rocksdb_property_int_cf(
|
|
608
|
+
rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
|
|
609
|
+
const char* propname, uint64_t* out_val);
|
|
607
610
|
|
|
608
611
|
extern ROCKSDB_LIBRARY_API char* rocksdb_property_value_cf(
|
|
609
612
|
rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
|
|
@@ -662,6 +665,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_flush_cf(
|
|
|
662
665
|
rocksdb_t* db, const rocksdb_flushoptions_t* options,
|
|
663
666
|
rocksdb_column_family_handle_t* column_family, char** errptr);
|
|
664
667
|
|
|
668
|
+
extern ROCKSDB_LIBRARY_API void rocksdb_flush_cfs(
|
|
669
|
+
rocksdb_t* db, const rocksdb_flushoptions_t* options,
|
|
670
|
+
rocksdb_column_family_handle_t** column_family, int num_column_families,
|
|
671
|
+
char** errptr);
|
|
672
|
+
|
|
665
673
|
extern ROCKSDB_LIBRARY_API void rocksdb_flush_wal(rocksdb_t* db,
|
|
666
674
|
unsigned char sync,
|
|
667
675
|
char** errptr);
|
|
@@ -2012,6 +2020,29 @@ rocksdb_cache_get_usage(rocksdb_cache_t* cache);
|
|
|
2012
2020
|
extern ROCKSDB_LIBRARY_API size_t
|
|
2013
2021
|
rocksdb_cache_get_pinned_usage(rocksdb_cache_t* cache);
|
|
2014
2022
|
|
|
2023
|
+
/* HyperClockCache */
|
|
2024
|
+
extern ROCKSDB_LIBRARY_API rocksdb_hyper_clock_cache_options_t*
|
|
2025
|
+
rocksdb_hyper_clock_cache_options_create(size_t capacity,
|
|
2026
|
+
size_t estimated_entry_charge);
|
|
2027
|
+
extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_destroy(
|
|
2028
|
+
rocksdb_hyper_clock_cache_options_t*);
|
|
2029
|
+
extern ROCKSDB_LIBRARY_API void rocksdb_hyper_clock_cache_options_set_capacity(
|
|
2030
|
+
rocksdb_hyper_clock_cache_options_t*, size_t);
|
|
2031
|
+
extern ROCKSDB_LIBRARY_API void
|
|
2032
|
+
rocksdb_hyper_clock_cache_options_set_estimated_entry_charge(
|
|
2033
|
+
rocksdb_hyper_clock_cache_options_t*, size_t);
|
|
2034
|
+
extern ROCKSDB_LIBRARY_API void
|
|
2035
|
+
rocksdb_hyper_clock_cache_options_set_num_shard_bits(
|
|
2036
|
+
rocksdb_hyper_clock_cache_options_t*, int);
|
|
2037
|
+
extern ROCKSDB_LIBRARY_API void
|
|
2038
|
+
rocksdb_hyper_clock_cache_options_set_memory_allocator(
|
|
2039
|
+
rocksdb_hyper_clock_cache_options_t*, rocksdb_memory_allocator_t*);
|
|
2040
|
+
|
|
2041
|
+
extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_hyper_clock(
|
|
2042
|
+
size_t capacity, size_t estimated_entry_charge);
|
|
2043
|
+
extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
|
|
2044
|
+
rocksdb_cache_create_hyper_clock_opts(rocksdb_hyper_clock_cache_options_t*);
|
|
2045
|
+
|
|
2015
2046
|
/* DBPath */
|
|
2016
2047
|
|
|
2017
2048
|
extern ROCKSDB_LIBRARY_API rocksdb_dbpath_t* rocksdb_dbpath_create(
|
|
@@ -2116,6 +2147,11 @@ rocksdb_ingestexternalfileoptions_set_allow_blocking_flush(
|
|
|
2116
2147
|
extern ROCKSDB_LIBRARY_API void
|
|
2117
2148
|
rocksdb_ingestexternalfileoptions_set_ingest_behind(
|
|
2118
2149
|
rocksdb_ingestexternalfileoptions_t* opt, unsigned char ingest_behind);
|
|
2150
|
+
extern ROCKSDB_LIBRARY_API void
|
|
2151
|
+
rocksdb_ingestexternalfileoptions_set_fail_if_not_bottommost_level(
|
|
2152
|
+
rocksdb_ingestexternalfileoptions_t* opt,
|
|
2153
|
+
unsigned char fail_if_not_bottommost_level);
|
|
2154
|
+
|
|
2119
2155
|
extern ROCKSDB_LIBRARY_API void rocksdb_ingestexternalfileoptions_destroy(
|
|
2120
2156
|
rocksdb_ingestexternalfileoptions_t* opt);
|
|
2121
2157
|
|
|
@@ -2198,6 +2234,12 @@ extern ROCKSDB_LIBRARY_API void rocksdb_universal_compaction_options_destroy(
|
|
|
2198
2234
|
extern ROCKSDB_LIBRARY_API rocksdb_fifo_compaction_options_t*
|
|
2199
2235
|
rocksdb_fifo_compaction_options_create(void);
|
|
2200
2236
|
extern ROCKSDB_LIBRARY_API void
|
|
2237
|
+
rocksdb_fifo_compaction_options_set_allow_compaction(
|
|
2238
|
+
rocksdb_fifo_compaction_options_t* fifo_opts, unsigned char allow_compaction);
|
|
2239
|
+
extern ROCKSDB_LIBRARY_API unsigned char
|
|
2240
|
+
rocksdb_fifo_compaction_options_get_allow_compaction(
|
|
2241
|
+
rocksdb_fifo_compaction_options_t* fifo_opts);
|
|
2242
|
+
extern ROCKSDB_LIBRARY_API void
|
|
2201
2243
|
rocksdb_fifo_compaction_options_set_max_table_files_size(
|
|
2202
2244
|
rocksdb_fifo_compaction_options_t* fifo_opts, uint64_t size);
|
|
2203
2245
|
extern ROCKSDB_LIBRARY_API uint64_t
|
|
@@ -2622,6 +2664,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cf(
|
|
|
2622
2664
|
rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
|
|
2623
2665
|
rocksdb_column_family_handle_t* column_family, char** errptr);
|
|
2624
2666
|
|
|
2667
|
+
extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_cfs(
|
|
2668
|
+
rocksdb_transactiondb_t* txn_db, const rocksdb_flushoptions_t* options,
|
|
2669
|
+
rocksdb_column_family_handle_t** column_families, int num_column_families,
|
|
2670
|
+
char** errptr);
|
|
2671
|
+
|
|
2625
2672
|
extern ROCKSDB_LIBRARY_API void rocksdb_transactiondb_flush_wal(
|
|
2626
2673
|
rocksdb_transactiondb_t* txn_db, unsigned char sync, char** errptr);
|
|
2627
2674
|
|
|
@@ -135,6 +135,9 @@ struct ShardedCacheOptions {
|
|
|
135
135
|
CacheMetadataChargePolicy metadata_charge_policy =
|
|
136
136
|
kDefaultCacheMetadataChargePolicy;
|
|
137
137
|
|
|
138
|
+
// A SecondaryCache instance to use the non-volatile tier.
|
|
139
|
+
std::shared_ptr<SecondaryCache> secondary_cache;
|
|
140
|
+
|
|
138
141
|
ShardedCacheOptions() {}
|
|
139
142
|
ShardedCacheOptions(
|
|
140
143
|
size_t _capacity, int _num_shard_bits, bool _strict_capacity_limit,
|
|
@@ -182,9 +185,6 @@ struct LRUCacheOptions : public ShardedCacheOptions {
|
|
|
182
185
|
// -DROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX, false otherwise.
|
|
183
186
|
bool use_adaptive_mutex = kDefaultToAdaptiveMutex;
|
|
184
187
|
|
|
185
|
-
// A SecondaryCache instance to use a the non-volatile tier.
|
|
186
|
-
std::shared_ptr<SecondaryCache> secondary_cache;
|
|
187
|
-
|
|
188
188
|
LRUCacheOptions() {}
|
|
189
189
|
LRUCacheOptions(size_t _capacity, int _num_shard_bits,
|
|
190
190
|
bool _strict_capacity_limit, double _high_pri_pool_ratio,
|
|
@@ -27,30 +27,129 @@ class SliceTransform;
|
|
|
27
27
|
// CompactionFilter allows an application to modify/delete a key-value during
|
|
28
28
|
// table file creation.
|
|
29
29
|
//
|
|
30
|
-
//
|
|
30
|
+
// Some general notes:
|
|
31
|
+
//
|
|
32
|
+
// * RocksDB snapshots do not guarantee to preserve the state of the DB in the
|
|
33
|
+
// presence of CompactionFilter. Data seen from a snapshot might disappear after
|
|
34
|
+
// a table file created with a `CompactionFilter` is installed. If you use
|
|
35
|
+
// snapshots, think twice about whether you want to use `CompactionFilter` and
|
|
36
|
+
// whether you are using it in a safe way.
|
|
37
|
+
//
|
|
38
|
+
// * If multithreaded compaction is being used *and* a single CompactionFilter
|
|
39
|
+
// instance was supplied via Options::compaction_filter, CompactionFilter
|
|
40
|
+
// methods may be called from different threads concurrently. The application
|
|
41
|
+
// must ensure that such calls are thread-safe. If the CompactionFilter was
|
|
42
|
+
// created by a factory, then it will only ever be used by a single thread that
|
|
43
|
+
// is doing the table file creation, and this call does not need to be
|
|
44
|
+
// thread-safe. However, multiple filters may be in existence and operating
|
|
45
|
+
// concurrently.
|
|
46
|
+
//
|
|
47
|
+
// * The key passed to the filtering methods includes the timestamp if
|
|
48
|
+
// user-defined timestamps are enabled.
|
|
49
|
+
//
|
|
50
|
+
// * Exceptions MUST NOT propagate out of overridden functions into RocksDB,
|
|
31
51
|
// because RocksDB is not exception-safe. This could cause undefined behavior
|
|
32
52
|
// including data loss, unreported corruption, deadlocks, and more.
|
|
33
53
|
class CompactionFilter : public Customizable {
|
|
34
54
|
public:
|
|
55
|
+
// Value type of the key-value passed to the compaction filter's FilterV2/V3
|
|
56
|
+
// methods.
|
|
35
57
|
enum ValueType {
|
|
58
|
+
// Plain key-value
|
|
36
59
|
kValue,
|
|
60
|
+
// Merge operand
|
|
37
61
|
kMergeOperand,
|
|
38
|
-
|
|
62
|
+
// Used internally by the old stacked BlobDB implementation; this value type
|
|
63
|
+
// is never passed to application code. Note that when using the new
|
|
64
|
+
// integrated BlobDB, values stored separately as blobs are retrieved and
|
|
65
|
+
// presented to FilterV2/V3 with the type kValue above.
|
|
66
|
+
kBlobIndex,
|
|
67
|
+
// Wide-column entity
|
|
39
68
|
kWideColumnEntity,
|
|
40
69
|
};
|
|
41
70
|
|
|
71
|
+
// Potential decisions that can be returned by the compaction filter's
|
|
72
|
+
// FilterV2/V3 and FilterBlobByKey methods. See decision-specific caveats and
|
|
73
|
+
// constraints below.
|
|
42
74
|
enum class Decision {
|
|
75
|
+
// Keep the current key-value as-is.
|
|
43
76
|
kKeep,
|
|
77
|
+
|
|
78
|
+
// Remove the current key-value. Note that the semantics of removal are
|
|
79
|
+
// dependent on the value type. If the current key-value is a plain
|
|
80
|
+
// key-value or a wide-column entity, it is converted to a tombstone
|
|
81
|
+
// (Delete), resulting in the deletion of any earlier versions of the key.
|
|
82
|
+
// If it is a merge operand, it is simply dropped. Note: if you are using
|
|
83
|
+
// a TransactionDB, it is not recommended to filter out merge operands.
|
|
84
|
+
// If a Merge operation is filtered out, TransactionDB may not realize there
|
|
85
|
+
// is a write conflict and may allow a Transaction that should have failed
|
|
86
|
+
// to Commit. Instead, it is better to implement any Merge filtering inside
|
|
87
|
+
// the MergeOperator.
|
|
44
88
|
kRemove,
|
|
89
|
+
|
|
90
|
+
// Change the value of the current key-value. If the current key-value is a
|
|
91
|
+
// plain key-value or a merge operand, its value is updated but its value
|
|
92
|
+
// type remains the same. If the current key-value is a wide-column entity,
|
|
93
|
+
// it is converted to a plain key-value with the new value specified.
|
|
45
94
|
kChangeValue,
|
|
95
|
+
|
|
96
|
+
// Remove all key-values with key in [key, *skip_until). This range of keys
|
|
97
|
+
// will be skipped in a way that potentially avoids some IO operations
|
|
98
|
+
// compared to removing the keys one by one. Note that removal in this case
|
|
99
|
+
// means dropping the key-value regardless of value type; in other words, in
|
|
100
|
+
// contrast with kRemove, plain values and entities are not converted to
|
|
101
|
+
// tombstones.
|
|
102
|
+
//
|
|
103
|
+
// *skip_until <= key is treated the same as Decision::kKeep (since the
|
|
104
|
+
// range [key, *skip_until) is empty).
|
|
105
|
+
//
|
|
106
|
+
// Caveats:
|
|
107
|
+
// * The keys are skipped even if there are snapshots containing them,
|
|
108
|
+
// i.e. values removed by kRemoveAndSkipUntil can disappear from a
|
|
109
|
+
// snapshot - beware if you're using TransactionDB or DB::GetSnapshot().
|
|
110
|
+
// * If value for a key was overwritten or merged into (multiple Put()s
|
|
111
|
+
// or Merge()s), and `CompactionFilter` skips this key with
|
|
112
|
+
// kRemoveAndSkipUntil, it's possible that it will remove only
|
|
113
|
+
// the new value, exposing the old value that was supposed to be
|
|
114
|
+
// overwritten.
|
|
115
|
+
// * Doesn't work with PlainTableFactory in prefix mode.
|
|
116
|
+
// * If you use kRemoveAndSkipUntil for table files created by compaction,
|
|
117
|
+
// consider also reducing compaction_readahead_size option.
|
|
46
118
|
kRemoveAndSkipUntil,
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
119
|
+
|
|
120
|
+
// Used internally by the old stacked BlobDB implementation. Returning this
|
|
121
|
+
// decision from application code is not supported.
|
|
122
|
+
kChangeBlobIndex,
|
|
123
|
+
|
|
124
|
+
// Used internally by the old stacked BlobDB implementation. Returning this
|
|
125
|
+
// decision from application code is not supported.
|
|
126
|
+
kIOError,
|
|
127
|
+
|
|
128
|
+
// Remove the current key-value by converting it to a SingleDelete-type
|
|
129
|
+
// tombstone. Only supported for plain-key values and wide-column entities;
|
|
130
|
+
// not supported for merge operands. All the caveats related to
|
|
131
|
+
// SingleDeletes apply.
|
|
132
|
+
kPurge,
|
|
133
|
+
|
|
134
|
+
// Change the current key-value to the wide-column entity specified. If the
|
|
135
|
+
// current key-value is already a wide-column entity, only its columns are
|
|
136
|
+
// updated; if it is a plain key-value, it is converted to a wide-column
|
|
137
|
+
// entity with the specified columns. Not supported for merge operands.
|
|
138
|
+
// Only applicable to FilterV3.
|
|
50
139
|
kChangeWideColumnEntity,
|
|
140
|
+
|
|
141
|
+
// When using the integrated BlobDB implementation, it may be possible for
|
|
142
|
+
// applications to make a filtering decision for a given blob based on
|
|
143
|
+
// the key only without actually reading the blob value, which saves some
|
|
144
|
+
// I/O; see the FilterBlobByKey method below. Returning kUndetermined from
|
|
145
|
+
// FilterBlobByKey signals that making a decision solely based on the
|
|
146
|
+
// key is not possible; in this case, RocksDB reads the blob value and
|
|
147
|
+
// passes the key-value to the regular filtering method. Only applicable to
|
|
148
|
+
// FilterBlobByKey; returning this value from FilterV2/V3 is not supported.
|
|
51
149
|
kUndetermined,
|
|
52
150
|
};
|
|
53
151
|
|
|
152
|
+
// Used internally by the old stacked BlobDB implementation.
|
|
54
153
|
enum class BlobDecision { kKeep, kChangeValue, kCorruption, kIOError };
|
|
55
154
|
|
|
56
155
|
// Context information for a table file creation.
|
|
@@ -76,8 +175,8 @@ class CompactionFilter : public Customizable {
|
|
|
76
175
|
// The table file creation process invokes this method before adding a kv to
|
|
77
176
|
// the table file. A return value of false indicates that the kv should be
|
|
78
177
|
// preserved in the new table file and a return value of true indicates
|
|
79
|
-
// that this key-value should be removed
|
|
80
|
-
// application can inspect the existing value of the key and make decision
|
|
178
|
+
// that this key-value should be removed (that is, converted to a tombstone).
|
|
179
|
+
// The application can inspect the existing value of the key and make decision
|
|
81
180
|
// based on it.
|
|
82
181
|
//
|
|
83
182
|
// Key-Values that are results of merge operation during table file creation
|
|
@@ -88,23 +187,6 @@ class CompactionFilter : public Customizable {
|
|
|
88
187
|
// When the value is to be preserved, the application has the option
|
|
89
188
|
// to modify the existing_value and pass it back through new_value.
|
|
90
189
|
// value_changed needs to be set to true in this case.
|
|
91
|
-
//
|
|
92
|
-
// Note that RocksDB snapshots (i.e. call GetSnapshot() API on a
|
|
93
|
-
// DB* object) will not guarantee to preserve the state of the DB with
|
|
94
|
-
// CompactionFilter. Data seen from a snapshot might disappear after a
|
|
95
|
-
// table file created with a `CompactionFilter` is installed. If you use
|
|
96
|
-
// snapshots, think twice about whether you want to use `CompactionFilter` and
|
|
97
|
-
// whether you are using it in a safe way.
|
|
98
|
-
//
|
|
99
|
-
// If multithreaded compaction is being used *and* a single CompactionFilter
|
|
100
|
-
// instance was supplied via Options::compaction_filter, this method may be
|
|
101
|
-
// called from different threads concurrently. The application must ensure
|
|
102
|
-
// that the call is thread-safe.
|
|
103
|
-
//
|
|
104
|
-
// If the CompactionFilter was created by a factory, then it will only ever
|
|
105
|
-
// be used by a single thread that is doing the table file creation, and this
|
|
106
|
-
// call does not need to be thread-safe. However, multiple filters may be
|
|
107
|
-
// in existence and operating concurrently.
|
|
108
190
|
virtual bool Filter(int /*level*/, const Slice& /*key*/,
|
|
109
191
|
const Slice& /*existing_value*/,
|
|
110
192
|
std::string* /*new_value*/,
|
|
@@ -126,48 +208,18 @@ class CompactionFilter : public Customizable {
|
|
|
126
208
|
return false;
|
|
127
209
|
}
|
|
128
210
|
|
|
129
|
-
//
|
|
130
|
-
//
|
|
211
|
+
// A unified API for plain values and merge operands that may
|
|
212
|
+
// return a variety of decisions (see Decision above). The `value_type`
|
|
213
|
+
// parameter indicates the type of the key-value and the `existing_value`
|
|
214
|
+
// contains the current value or merge operand. The `new_value` output
|
|
215
|
+
// parameter can be used to set the updated value or merge operand when the
|
|
216
|
+
// kChangeValue decision is made by the filter. See the description of
|
|
217
|
+
// kRemoveAndSkipUntil above for the semantics of the `skip_until` output
|
|
218
|
+
// parameter, and see Decision above for more information on the semantics of
|
|
219
|
+
// the potential return values.
|
|
220
|
+
//
|
|
131
221
|
// The default implementation uses Filter() and FilterMergeOperand().
|
|
132
222
|
// If you're overriding this method, no need to override the other two.
|
|
133
|
-
// `value_type` indicates whether this key-value corresponds to a normal
|
|
134
|
-
// value (e.g. written with Put()) or a merge operand (written with Merge()).
|
|
135
|
-
//
|
|
136
|
-
// Possible return values:
|
|
137
|
-
// * kKeep - keep the key-value pair.
|
|
138
|
-
// * kRemove - remove the key-value pair or merge operand.
|
|
139
|
-
// * kChangeValue - keep the key and change the value/operand to *new_value.
|
|
140
|
-
// * kRemoveAndSkipUntil - remove this key-value pair, and also remove
|
|
141
|
-
// all key-value pairs with key in [key, *skip_until). This range
|
|
142
|
-
// of keys will be skipped without reading, potentially saving some
|
|
143
|
-
// IO operations compared to removing the keys one by one.
|
|
144
|
-
//
|
|
145
|
-
// *skip_until <= key is treated the same as Decision::kKeep
|
|
146
|
-
// (since the range [key, *skip_until) is empty).
|
|
147
|
-
//
|
|
148
|
-
// Caveats:
|
|
149
|
-
// - The keys are skipped even if there are snapshots containing them,
|
|
150
|
-
// i.e. values removed by kRemoveAndSkipUntil can disappear from a
|
|
151
|
-
// snapshot - beware if you're using TransactionDB or
|
|
152
|
-
// DB::GetSnapshot().
|
|
153
|
-
// - If value for a key was overwritten or merged into (multiple Put()s
|
|
154
|
-
// or Merge()s), and `CompactionFilter` skips this key with
|
|
155
|
-
// kRemoveAndSkipUntil, it's possible that it will remove only
|
|
156
|
-
// the new value, exposing the old value that was supposed to be
|
|
157
|
-
// overwritten.
|
|
158
|
-
// - Doesn't work with PlainTableFactory in prefix mode.
|
|
159
|
-
// - If you use kRemoveAndSkipUntil for table files created by
|
|
160
|
-
// compaction, consider also reducing compaction_readahead_size
|
|
161
|
-
// option.
|
|
162
|
-
//
|
|
163
|
-
// Should never return kUndetermined.
|
|
164
|
-
// Note: If you are using a TransactionDB, it is not recommended to filter
|
|
165
|
-
// out or modify merge operands (ValueType::kMergeOperand).
|
|
166
|
-
// If a merge operation is filtered out, TransactionDB may not realize there
|
|
167
|
-
// is a write conflict and may allow a Transaction to Commit that should have
|
|
168
|
-
// failed. Instead, it is better to implement any Merge filtering inside the
|
|
169
|
-
// MergeOperator.
|
|
170
|
-
// key includes timestamp if user-defined timestamp is enabled.
|
|
171
223
|
virtual Decision FilterV2(int level, const Slice& key, ValueType value_type,
|
|
172
224
|
const Slice& existing_value, std::string* new_value,
|
|
173
225
|
std::string* /*skip_until*/) const {
|
|
@@ -195,17 +247,21 @@ class CompactionFilter : public Customizable {
|
|
|
195
247
|
}
|
|
196
248
|
}
|
|
197
249
|
|
|
198
|
-
// Wide column aware API. Called for plain values, merge operands, and
|
|
250
|
+
// Wide column aware unified API. Called for plain values, merge operands, and
|
|
199
251
|
// wide-column entities; the `value_type` parameter indicates the type of the
|
|
200
252
|
// key-value. When the key-value is a plain value or a merge operand, the
|
|
201
253
|
// `existing_value` parameter contains the existing value and the
|
|
202
254
|
// `existing_columns` parameter is invalid (nullptr). When the key-value is a
|
|
203
255
|
// wide-column entity, the `existing_columns` parameter contains the wide
|
|
204
256
|
// columns of the existing entity and the `existing_value` parameter is
|
|
205
|
-
// invalid (nullptr). The
|
|
206
|
-
//
|
|
207
|
-
//
|
|
208
|
-
//
|
|
257
|
+
// invalid (nullptr). The `new_value` output parameter can be used to set the
|
|
258
|
+
// updated value or merge operand when the kChangeValue decision is made by
|
|
259
|
+
// the filter. The `new_columns` output parameter can be used to specify
|
|
260
|
+
// the pairs of column names and column values when the
|
|
261
|
+
// kChangeWideColumnEntity decision is returned. See the description of
|
|
262
|
+
// kRemoveAndSkipUntil above for the semantics of the `skip_until` output
|
|
263
|
+
// parameter, and see Decision above for more information on the semantics of
|
|
264
|
+
// the potential return values.
|
|
209
265
|
//
|
|
210
266
|
// For compatibility, the default implementation keeps all wide-column
|
|
211
267
|
// entities, and falls back to FilterV2 for plain values and merge operands.
|
|
@@ -255,10 +311,15 @@ class CompactionFilter : public Customizable {
|
|
|
255
311
|
virtual bool IsStackedBlobDbInternalCompactionFilter() const { return false; }
|
|
256
312
|
|
|
257
313
|
// In the case of BlobDB, it may be possible to reach a decision with only
|
|
258
|
-
// the key without reading the actual value
|
|
259
|
-
//
|
|
260
|
-
//
|
|
261
|
-
//
|
|
314
|
+
// the key without reading the actual value, saving some I/O operations.
|
|
315
|
+
// Keys where the value is stored separately in a blob file will be
|
|
316
|
+
// passed to this method. If the method returns a supported decision other
|
|
317
|
+
// than kUndetermined, it will be considered final and performed without
|
|
318
|
+
// reading the existing value. Returning kUndetermined will cause FilterV3()
|
|
319
|
+
// to be called to make a decision as usual. The output parameters
|
|
320
|
+
// `new_value` and `skip_until` are applicable to the decisions kChangeValue
|
|
321
|
+
// and kRemoveAndSkipUntil respectively, and have the same semantics as
|
|
322
|
+
// the corresponding parameters of FilterV2/V3.
|
|
262
323
|
virtual Decision FilterBlobByKey(int /*level*/, const Slice& /*key*/,
|
|
263
324
|
std::string* /*new_value*/,
|
|
264
325
|
std::string* /*skip_until*/) const {
|
|
@@ -301,6 +301,18 @@ class DB {
|
|
|
301
301
|
std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
|
|
302
302
|
std::string trim_ts);
|
|
303
303
|
|
|
304
|
+
// Manually, synchronously attempt to resume DB writes after a write failure
|
|
305
|
+
// to the underlying filesystem. See
|
|
306
|
+
// https://github.com/facebook/rocksdb/wiki/Background-Error-Handling
|
|
307
|
+
//
|
|
308
|
+
// Returns OK if writes are successfully resumed, or there was no
|
|
309
|
+
// outstanding error to recover from. Returns underlying write error if
|
|
310
|
+
// it is not recoverable.
|
|
311
|
+
//
|
|
312
|
+
// WART: Does not mix well with auto-resume. Will return Busy if an
|
|
313
|
+
// auto-resume is in progress, without waiting for it to complete.
|
|
314
|
+
// See DBOptions::max_bgerror_resume_count and
|
|
315
|
+
// EventListener::OnErrorRecoveryBegin
|
|
304
316
|
virtual Status Resume() { return Status::NotSupported(); }
|
|
305
317
|
|
|
306
318
|
// Close the DB by releasing resources, closing files etc. This should be
|
|
@@ -941,6 +953,18 @@ class DB {
|
|
|
941
953
|
// level, as well as the histogram of latency of single requests.
|
|
942
954
|
static const std::string kCFFileHistogram;
|
|
943
955
|
|
|
956
|
+
// "rocksdb.cf-write-stall-stats" - returns a multi-line string or
|
|
957
|
+
// map with statistics on CF-scope write stalls for a given CF
|
|
958
|
+
// See`WriteStallStatsMapKeys` for structured representation of keys
|
|
959
|
+
// available in the map form.
|
|
960
|
+
static const std::string kCFWriteStallStats;
|
|
961
|
+
|
|
962
|
+
// "rocksdb.db-write-stall-stats" - returns a multi-line string or
|
|
963
|
+
// map with statistics on DB-scope write stalls
|
|
964
|
+
// See`WriteStallStatsMapKeys` for structured representation of keys
|
|
965
|
+
// available in the map form.
|
|
966
|
+
static const std::string kDBWriteStallStats;
|
|
967
|
+
|
|
944
968
|
// "rocksdb.dbstats" - As a string property, returns a multi-line string
|
|
945
969
|
// with general database stats, both cumulative (over the db's
|
|
946
970
|
// lifetime) and interval (since the last retrieval of kDBStats).
|
|
@@ -1717,11 +1741,12 @@ class DB {
|
|
|
1717
1741
|
const std::vector<IngestExternalFileArg>& args) = 0;
|
|
1718
1742
|
|
|
1719
1743
|
// CreateColumnFamilyWithImport() will create a new column family with
|
|
1720
|
-
// column_family_name and import external SST files specified in metadata
|
|
1721
|
-
// this column family.
|
|
1744
|
+
// column_family_name and import external SST files specified in `metadata`
|
|
1745
|
+
// into this column family.
|
|
1722
1746
|
// (1) External SST files can be created using SstFileWriter.
|
|
1723
1747
|
// (2) External SST files can be exported from a particular column family in
|
|
1724
|
-
// an existing DB using Checkpoint::ExportColumnFamily.
|
|
1748
|
+
// an existing DB using Checkpoint::ExportColumnFamily. `metadata` should
|
|
1749
|
+
// be the output from Checkpoint::ExportColumnFamily.
|
|
1725
1750
|
// Option in import_options specifies whether the external files are copied or
|
|
1726
1751
|
// moved (default is copy). When option specifies copy, managing files at
|
|
1727
1752
|
// external_file_path is caller's responsibility. When option specifies a
|
|
@@ -1860,6 +1885,24 @@ class DB {
|
|
|
1860
1885
|
}
|
|
1861
1886
|
};
|
|
1862
1887
|
|
|
1888
|
+
struct WriteStallStatsMapKeys {
|
|
1889
|
+
static const std::string& TotalStops();
|
|
1890
|
+
static const std::string& TotalDelays();
|
|
1891
|
+
|
|
1892
|
+
static const std::string& CFL0FileCountLimitDelaysWithOngoingCompaction();
|
|
1893
|
+
static const std::string& CFL0FileCountLimitStopsWithOngoingCompaction();
|
|
1894
|
+
|
|
1895
|
+
// REQUIRES:
|
|
1896
|
+
// `cause` isn't any of these: `WriteStallCause::kNone`,
|
|
1897
|
+
// `WriteStallCause::kCFScopeWriteStallCauseEnumMax`,
|
|
1898
|
+
// `WriteStallCause::kDBScopeWriteStallCauseEnumMax`
|
|
1899
|
+
//
|
|
1900
|
+
// REQUIRES:
|
|
1901
|
+
// `condition` isn't any of these: `WriteStallCondition::kNormal`
|
|
1902
|
+
static std::string CauseConditionCount(WriteStallCause cause,
|
|
1903
|
+
WriteStallCondition condition);
|
|
1904
|
+
};
|
|
1905
|
+
|
|
1863
1906
|
// Overloaded operators for enum class SizeApproximationFlags.
|
|
1864
1907
|
inline DB::SizeApproximationFlags operator&(DB::SizeApproximationFlags lhs,
|
|
1865
1908
|
DB::SizeApproximationFlags rhs) {
|
|
@@ -682,6 +682,10 @@ class FileSystem : public Customizable {
|
|
|
682
682
|
return IOStatus::OK();
|
|
683
683
|
}
|
|
684
684
|
|
|
685
|
+
// Indicates to upper layers whether the FileSystem supports/uses async IO
|
|
686
|
+
// or not
|
|
687
|
+
virtual bool use_async_io() { return true; }
|
|
688
|
+
|
|
685
689
|
// If you're adding methods here, remember to add them to EnvWrapper too.
|
|
686
690
|
|
|
687
691
|
private:
|
|
@@ -1522,6 +1526,8 @@ class FileSystemWrapper : public FileSystem {
|
|
|
1522
1526
|
return target_->AbortIO(io_handles);
|
|
1523
1527
|
}
|
|
1524
1528
|
|
|
1529
|
+
virtual bool use_async_io() override { return target_->use_async_io(); }
|
|
1530
|
+
|
|
1525
1531
|
protected:
|
|
1526
1532
|
std::shared_ptr<FileSystem> target_;
|
|
1527
1533
|
};
|
|
@@ -194,12 +194,6 @@ enum class BackgroundErrorReason {
|
|
|
194
194
|
kManifestWriteNoWAL,
|
|
195
195
|
};
|
|
196
196
|
|
|
197
|
-
enum class WriteStallCondition {
|
|
198
|
-
kNormal,
|
|
199
|
-
kDelayed,
|
|
200
|
-
kStopped,
|
|
201
|
-
};
|
|
202
|
-
|
|
203
197
|
struct WriteStallInfo {
|
|
204
198
|
// the name of the column family
|
|
205
199
|
std::string cf_name;
|
|
@@ -148,6 +148,13 @@ struct SstFileMetaData : public FileStorageInfo {
|
|
|
148
148
|
// For L0, larger `epoch_number` indicates newer L0 file.
|
|
149
149
|
// 0 if the information is not available.
|
|
150
150
|
uint64_t epoch_number = 0;
|
|
151
|
+
|
|
152
|
+
// These bounds define the effective key range for range tombstones
|
|
153
|
+
// in this file.
|
|
154
|
+
// Currently only used by CreateColumnFamilyWithImport().
|
|
155
|
+
std::string smallest{}; // Smallest internal key served by table
|
|
156
|
+
std::string largest{}; // Largest internal key served by table
|
|
157
|
+
|
|
151
158
|
// DEPRECATED: The name of the file within its directory with a
|
|
152
159
|
// leading slash (e.g. "/123456.sst"). Use relative_filename from base struct
|
|
153
160
|
// instead.
|
|
@@ -1311,12 +1311,12 @@ struct DBOptions {
|
|
|
1311
1311
|
// Default: false
|
|
1312
1312
|
bool best_efforts_recovery = false;
|
|
1313
1313
|
|
|
1314
|
-
// It defines how many times
|
|
1314
|
+
// It defines how many times DB::Resume() is called by a separate thread when
|
|
1315
1315
|
// background retryable IO Error happens. When background retryable IO
|
|
1316
1316
|
// Error happens, SetBGError is called to deal with the error. If the error
|
|
1317
1317
|
// can be auto-recovered (e.g., retryable IO Error during Flush or WAL write),
|
|
1318
1318
|
// then db resume is called in background to recover from the error. If this
|
|
1319
|
-
// value is 0 or negative,
|
|
1319
|
+
// value is 0 or negative, DB::Resume() will not be called automatically.
|
|
1320
1320
|
//
|
|
1321
1321
|
// Default: INT_MAX
|
|
1322
1322
|
int max_bgerror_resume_count = INT_MAX;
|
|
@@ -135,9 +135,14 @@ struct PerfContext {
|
|
|
135
135
|
// than the snapshot that iterator is using.
|
|
136
136
|
//
|
|
137
137
|
uint64_t internal_recent_skipped_count;
|
|
138
|
-
// How many
|
|
138
|
+
// How many merge operands were fed into the merge operator by iterators.
|
|
139
|
+
// Note: base values are not included in the count.
|
|
139
140
|
//
|
|
140
141
|
uint64_t internal_merge_count;
|
|
142
|
+
// How many merge operands were fed into the merge operator by point lookups.
|
|
143
|
+
// Note: base values are not included in the count.
|
|
144
|
+
//
|
|
145
|
+
uint64_t internal_merge_point_lookup_count;
|
|
141
146
|
// Number of times we reseeked inside a merging iterator, specifically to skip
|
|
142
147
|
// after or before a range of keys covered by a range deletion in a newer LSM
|
|
143
148
|
// component.
|
|
@@ -99,12 +99,12 @@ class SecondaryCache : public Customizable {
|
|
|
99
99
|
// needs to return true.
|
|
100
100
|
// This hint can also be safely ignored.
|
|
101
101
|
//
|
|
102
|
-
//
|
|
103
|
-
//
|
|
102
|
+
// kept_in_sec_cache is to indicate whether the entry will be kept in the
|
|
103
|
+
// secondary cache after the Lookup (rather than erased because of Lookup)
|
|
104
104
|
virtual std::unique_ptr<SecondaryCacheResultHandle> Lookup(
|
|
105
105
|
const Slice& key, const Cache::CacheItemHelper* helper,
|
|
106
106
|
Cache::CreateContext* create_context, bool wait, bool advise_erase,
|
|
107
|
-
bool&
|
|
107
|
+
bool& kept_in_sec_cache) = 0;
|
|
108
108
|
|
|
109
109
|
// Indicate whether a handle can be erased in this secondary cache.
|
|
110
110
|
[[nodiscard]] virtual bool SupportForceErase() const = 0;
|
|
@@ -415,6 +415,20 @@ enum Tickers : uint32_t {
|
|
|
415
415
|
// Number of errors returned to the async read callback
|
|
416
416
|
ASYNC_READ_ERROR_COUNT,
|
|
417
417
|
|
|
418
|
+
// Fine grained secondary cache stats
|
|
419
|
+
SECONDARY_CACHE_FILTER_HITS,
|
|
420
|
+
SECONDARY_CACHE_INDEX_HITS,
|
|
421
|
+
SECONDARY_CACHE_DATA_HITS,
|
|
422
|
+
|
|
423
|
+
// Number of lookup into the prefetched tail (see
|
|
424
|
+
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
|
|
425
|
+
// that can't find its data for table open
|
|
426
|
+
TABLE_OPEN_PREFETCH_TAIL_MISS,
|
|
427
|
+
// Number of lookup into the prefetched tail (see
|
|
428
|
+
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
|
|
429
|
+
// that finds its data for table open
|
|
430
|
+
TABLE_OPEN_PREFETCH_TAIL_HIT,
|
|
431
|
+
|
|
418
432
|
TICKER_ENUM_MAX
|
|
419
433
|
};
|
|
420
434
|
|
|
@@ -528,6 +542,10 @@ enum Histograms : uint32_t {
|
|
|
528
542
|
// Wait time for aborting async read in FilePrefetchBuffer destructor
|
|
529
543
|
ASYNC_PREFETCH_ABORT_MICROS,
|
|
530
544
|
|
|
545
|
+
// Number of bytes read for RocksDB's prefetching contents (as opposed to file
|
|
546
|
+
// system's prefetch) from the end of SST table during block based table open
|
|
547
|
+
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
|
|
548
|
+
|
|
531
549
|
HISTOGRAM_ENUM_MAX
|
|
532
550
|
};
|
|
533
551
|
|
|
@@ -63,4 +63,32 @@ enum EntryType {
|
|
|
63
63
|
kEntryOther,
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
enum class WriteStallCause {
|
|
67
|
+
// Beginning of CF-scope write stall causes
|
|
68
|
+
//
|
|
69
|
+
// Always keep `kMemtableLimit` as the first stat in this section
|
|
70
|
+
kMemtableLimit,
|
|
71
|
+
kL0FileCountLimit,
|
|
72
|
+
kPendingCompactionBytes,
|
|
73
|
+
kCFScopeWriteStallCauseEnumMax,
|
|
74
|
+
// End of CF-scope write stall causes
|
|
75
|
+
|
|
76
|
+
// Beginning of DB-scope write stall causes
|
|
77
|
+
//
|
|
78
|
+
// Always keep `kWriteBufferManagerLimit` as the first stat in this section
|
|
79
|
+
kWriteBufferManagerLimit,
|
|
80
|
+
kDBScopeWriteStallCauseEnumMax,
|
|
81
|
+
// End of DB-scope write stall causes
|
|
82
|
+
|
|
83
|
+
// Always add new WriteStallCause before `kNone`
|
|
84
|
+
kNone,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
enum class WriteStallCondition {
|
|
88
|
+
kDelayed,
|
|
89
|
+
kStopped,
|
|
90
|
+
// Always add new WriteStallCondition before `kNormal`
|
|
91
|
+
kNormal,
|
|
92
|
+
};
|
|
93
|
+
|
|
66
94
|
} // namespace ROCKSDB_NAMESPACE
|