@nxtedition/rocksdb 10.1.5 → 10.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +19 -11
- package/deps/rocksdb/rocksdb/CMakeLists.txt +16 -5
- package/deps/rocksdb/rocksdb/Makefile +38 -15
- package/deps/rocksdb/rocksdb/TARGETS +10 -0
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +58 -0
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +4 -4
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +4 -2
- package/deps/rocksdb/rocksdb/db/builder.cc +2 -2
- package/deps/rocksdb/rocksdb/db/builder.h +1 -1
- package/deps/rocksdb/rocksdb/db/c.cc +205 -6
- package/deps/rocksdb/rocksdb/db/c_test.c +189 -1
- package/deps/rocksdb/rocksdb/db/column_family.cc +28 -0
- package/deps/rocksdb/rocksdb/db/column_family.h +17 -0
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +234 -60
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +11 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +22 -25
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +112 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +72 -21
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +77 -0
- package/deps/rocksdb/rocksdb/db/convenience.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +269 -112
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +107 -43
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +93 -24
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +157 -68
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +56 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -105
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +39 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +21 -14
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +107 -63
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +43 -2
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/db_rate_limiter_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +10 -2
- package/deps/rocksdb/rocksdb/db/db_test2.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +5 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -6
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +92 -2
- package/deps/rocksdb/rocksdb/db/error_handler.cc +34 -39
- package/deps/rocksdb/rocksdb/db/error_handler.h +3 -4
- package/deps/rocksdb/rocksdb/db/error_handler_fs_test.cc +8 -4
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +6 -3
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +71 -15
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +11 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +383 -4
- package/deps/rocksdb/rocksdb/db/fault_injection_test.cc +88 -72
- package/deps/rocksdb/rocksdb/db/flush_job.cc +30 -3
- package/deps/rocksdb/rocksdb/db/flush_job.h +14 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +60 -1
- package/deps/rocksdb/rocksdb/db/internal_stats.h +20 -1
- package/deps/rocksdb/rocksdb/db/log_writer.cc +24 -0
- package/deps/rocksdb/rocksdb/db/log_writer.h +5 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +6 -4
- package/deps/rocksdb/rocksdb/db/memtable.h +10 -10
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +4 -4
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +10 -3
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +8 -10
- package/deps/rocksdb/rocksdb/db/repair.cc +4 -3
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.cc +30 -0
- package/deps/rocksdb/rocksdb/db/seqno_to_time_mapping.h +9 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +17 -2
- package/deps/rocksdb/rocksdb/db/table_cache.h +9 -1
- package/deps/rocksdb/rocksdb/db/table_properties_collector.h +9 -2
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +3 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +3 -3
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_edit.cc +0 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +7 -6
- package/deps/rocksdb/rocksdb/db/version_set.cc +54 -31
- package/deps/rocksdb/rocksdb/db/version_set.h +14 -7
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +37 -29
- package/deps/rocksdb/rocksdb/db/wal_manager.h +6 -5
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +6 -0
- package/deps/rocksdb/rocksdb/db/write_batch.cc +54 -23
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +46 -5
- package/deps/rocksdb/rocksdb/db/write_thread.cc +53 -5
- package/deps/rocksdb/rocksdb/db/write_thread.h +36 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +57 -17
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +11 -3
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +8 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +10 -25
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h +25 -88
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.cc +93 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_filters.h +16 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +43 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h +109 -21
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +8 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +666 -205
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +55 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +18 -16
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +19 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +782 -494
- package/deps/rocksdb/rocksdb/env/composite_env_wrapper.h +21 -0
- package/deps/rocksdb/rocksdb/env/env.cc +6 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +0 -1
- package/deps/rocksdb/rocksdb/file/file_util.cc +8 -2
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +34 -19
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +29 -32
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +41 -15
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +63 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +16 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +0 -16
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/listener.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +76 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +17 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/transaction_log.h +12 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +31 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_write_callback.h +29 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h +4 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/customizable_util.h +0 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h +17 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +46 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -2
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/db_options.cc +8 -0
- package/deps/rocksdb/rocksdb/options/db_options.h +9 -5
- package/deps/rocksdb/rocksdb/options/options.cc +3 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/port/jemalloc_helper.h +2 -2
- package/deps/rocksdb/rocksdb/port/stack_trace.cc +1 -0
- package/deps/rocksdb/rocksdb/port/win/port_win.cc +3 -2
- package/deps/rocksdb/rocksdb/src.mk +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/binary_search_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +4 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -41
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +15 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +1 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +31 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +10 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +11 -15
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +17 -11
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -21
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +9 -11
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +16 -16
- package/deps/rocksdb/rocksdb/table/block_based/hash_index_reader.cc +1 -2
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +14 -9
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +82 -41
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +13 -14
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +18 -22
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +51 -13
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +3 -11
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h +2 -3
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +9 -10
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +3 -2
- package/deps/rocksdb/rocksdb/table/format.cc +1 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +18 -13
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +5 -3
- package/deps/rocksdb/rocksdb/table/plain/plain_table_builder.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_reader.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer_collectors.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +8 -7
- package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +1 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +19 -0
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +434 -110
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_impl.h +3 -1
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +3 -0
- package/deps/rocksdb/rocksdb/util/aligned_storage.h +24 -0
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +1 -1
- package/deps/rocksdb/rocksdb/util/random.cc +2 -1
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +1 -1
- package/deps/rocksdb/rocksdb/util/udt_util.cc +33 -0
- package/deps/rocksdb/rocksdb/util/udt_util.h +7 -0
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +33 -0
- package/deps/rocksdb/rocksdb/util/write_batch_util.h +5 -0
- package/deps/rocksdb/rocksdb/util/xxhash.h +10 -3
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +13 -13
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +104 -48
- package/deps/rocksdb/rocksdb/utilities/debug.cc +16 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +647 -235
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +274 -157
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +144 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +45 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector_test.cc +139 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc +12 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +105 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +64 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +43 -5
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +154 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +158 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +16 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc +4 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc +9 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +43 -7
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +1 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/chained-batch-test.js.tap +0 -0
- package/.tap/test-results/node_modules/abstract-level/test/get-test.js.tap +0 -0
- package/.tap/test-results/test/abstract-level-test.js.tap +0 -1077
- package/.tap/test-results/test/batch-test.js.tap +0 -12
- package/.tap/test-results/test/chained-batch-gc-test.js.tap +0 -11
- package/.tap/test-results/test/cleanup-hanging-iterators-test.js.tap +0 -135
- package/.tap/test-results/test/clear-gc-test.js.tap +0 -13
- package/.tap/test-results/test/column-test.js.tap +0 -55
- package/.tap/test-results/test/common.js.tap +0 -0
- package/.tap/test-results/test/compression-test.js.tap +0 -30
- package/.tap/test-results/test/db-identity.js.tap +0 -12
- package/.tap/test-results/test/electron.js.tap +0 -0
- package/.tap/test-results/test/env-cleanup-hook-test.js.tap +0 -40
- package/.tap/test-results/test/env-cleanup-hook.js.tap +0 -0
- package/.tap/test-results/test/gc.js.tap +0 -0
- package/.tap/test-results/test/getproperty-test.js.tap +0 -29
- package/.tap/test-results/test/iterator-gc-test.js.tap +0 -15
- package/.tap/test-results/test/iterator-hwm-test.js.tap +0 -131
- package/.tap/test-results/test/iterator-recursion-test.js.tap +0 -12
- package/.tap/test-results/test/iterator-starvation-test.js.tap +0 -73
- package/.tap/test-results/test/iterator-test.js.tap +0 -6
- package/.tap/test-results/test/leak-tester-batch.js.tap +0 -0
- package/.tap/test-results/test/leak-tester-iterator.js.tap +0 -0
- package/.tap/test-results/test/leak-tester.js.tap +0 -0
- package/.tap/test-results/test/lock-test.js.tap +0 -18
- package/.tap/test-results/test/lock.js.tap +0 -0
- package/.tap/test-results/test/make.js.tap +0 -0
- package/.tap/test-results/test/max-rev-merge.js.tap +0 -0
- package/.tap/test-results/test/merge-operator-test.js.tap +0 -12
- package/.tap/test-results/test/mkdir-test.js.tap +0 -15
- package/.tap/test-results/test/segfault-test.js.tap +0 -76
- package/.tap/test-results/test/stack-blower.js.tap +0 -0
- package/deps/rocksdb/rocksdb/README.md +0 -29
- package/deps/rocksdb/rocksdb/microbench/README.md +0 -60
- package/deps/rocksdb/rocksdb/plugin/README.md +0 -43
- package/deps/rocksdb/rocksdb/port/README +0 -10
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/README +0 -13
|
@@ -143,6 +143,13 @@ class FlushJob {
|
|
|
143
143
|
// `MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT` for details.
|
|
144
144
|
void GetEffectiveCutoffUDTForPickedMemTables();
|
|
145
145
|
|
|
146
|
+
// If this column family enables tiering feature, it will find the current
|
|
147
|
+
// `preclude_last_level_min_seqno_`, and the smaller one between this and
|
|
148
|
+
// the `earliset_snapshot_` will later be announced to user property
|
|
149
|
+
// collectors. It indicates to tiering use cases which data are old enough to
|
|
150
|
+
// be placed on the last level.
|
|
151
|
+
void GetPrecludeLastLevelMinSeqno();
|
|
152
|
+
|
|
146
153
|
Status MaybeIncreaseFullHistoryTsLowToAboveCutoffUDT();
|
|
147
154
|
|
|
148
155
|
const std::string& dbname_;
|
|
@@ -161,6 +168,7 @@ class FlushJob {
|
|
|
161
168
|
InstrumentedMutex* db_mutex_;
|
|
162
169
|
std::atomic<bool>* shutting_down_;
|
|
163
170
|
std::vector<SequenceNumber> existing_snapshots_;
|
|
171
|
+
SequenceNumber earliest_snapshot_;
|
|
164
172
|
SequenceNumber earliest_write_conflict_snapshot_;
|
|
165
173
|
SnapshotChecker* snapshot_checker_;
|
|
166
174
|
JobContext* job_context_;
|
|
@@ -221,6 +229,12 @@ class FlushJob {
|
|
|
221
229
|
// Keeps track of the newest user-defined timestamp for this flush job if
|
|
222
230
|
// `persist_user_defined_timestamps` flag is false.
|
|
223
231
|
std::string cutoff_udt_;
|
|
232
|
+
|
|
233
|
+
// The current minimum seqno that compaction jobs will preclude the data from
|
|
234
|
+
// the last level. Data with seqnos larger than this or larger than
|
|
235
|
+
// `earliest_snapshot_` will be output to the penultimate level had it gone
|
|
236
|
+
// through a compaction to the last level.
|
|
237
|
+
SequenceNumber preclude_last_level_min_seqno_ = kMaxSequenceNumber;
|
|
224
238
|
};
|
|
225
239
|
|
|
226
240
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
#include <limits>
|
|
17
17
|
#include <sstream>
|
|
18
18
|
#include <string>
|
|
19
|
+
#include <unordered_map>
|
|
19
20
|
#include <utility>
|
|
20
21
|
#include <vector>
|
|
21
22
|
|
|
@@ -33,7 +34,6 @@
|
|
|
33
34
|
|
|
34
35
|
namespace ROCKSDB_NAMESPACE {
|
|
35
36
|
|
|
36
|
-
|
|
37
37
|
const std::map<LevelStatType, LevelStat> InternalStats::compaction_level_stats =
|
|
38
38
|
{
|
|
39
39
|
{LevelStatType::NUM_FILES, LevelStat{"NumFiles", "Files"}},
|
|
@@ -2135,5 +2135,64 @@ void InternalStats::DumpCFFileHistogram(std::string* value) {
|
|
|
2135
2135
|
value->append(oss.str());
|
|
2136
2136
|
}
|
|
2137
2137
|
|
|
2138
|
+
namespace {
|
|
2139
|
+
|
|
2140
|
+
class SumPropertyAggregator : public IntPropertyAggregator {
|
|
2141
|
+
public:
|
|
2142
|
+
SumPropertyAggregator() : aggregated_value_(0) {}
|
|
2143
|
+
virtual ~SumPropertyAggregator() override = default;
|
|
2144
|
+
|
|
2145
|
+
void Add(ColumnFamilyData* cfd, uint64_t value) override {
|
|
2146
|
+
(void)cfd;
|
|
2147
|
+
aggregated_value_ += value;
|
|
2148
|
+
}
|
|
2149
|
+
|
|
2150
|
+
uint64_t Aggregate() const override { return aggregated_value_; }
|
|
2151
|
+
|
|
2152
|
+
private:
|
|
2153
|
+
uint64_t aggregated_value_;
|
|
2154
|
+
};
|
|
2155
|
+
|
|
2156
|
+
// A block cache may be shared by multiple column families.
|
|
2157
|
+
// BlockCachePropertyAggregator ensures that the same cache is only added once.
|
|
2158
|
+
class BlockCachePropertyAggregator : public IntPropertyAggregator {
|
|
2159
|
+
public:
|
|
2160
|
+
BlockCachePropertyAggregator() = default;
|
|
2161
|
+
virtual ~BlockCachePropertyAggregator() override = default;
|
|
2162
|
+
|
|
2163
|
+
void Add(ColumnFamilyData* cfd, uint64_t value) override {
|
|
2164
|
+
auto* table_factory = cfd->ioptions()->table_factory.get();
|
|
2165
|
+
assert(table_factory != nullptr);
|
|
2166
|
+
Cache* cache =
|
|
2167
|
+
table_factory->GetOptions<Cache>(TableFactory::kBlockCacheOpts());
|
|
2168
|
+
if (cache != nullptr) {
|
|
2169
|
+
block_cache_properties_.emplace(cache, value);
|
|
2170
|
+
}
|
|
2171
|
+
}
|
|
2172
|
+
|
|
2173
|
+
uint64_t Aggregate() const override {
|
|
2174
|
+
uint64_t sum = 0;
|
|
2175
|
+
for (const auto& p : block_cache_properties_) {
|
|
2176
|
+
sum += p.second;
|
|
2177
|
+
}
|
|
2178
|
+
return sum;
|
|
2179
|
+
}
|
|
2180
|
+
|
|
2181
|
+
private:
|
|
2182
|
+
std::unordered_map<Cache*, uint64_t> block_cache_properties_;
|
|
2183
|
+
};
|
|
2184
|
+
|
|
2185
|
+
} // anonymous namespace
|
|
2186
|
+
|
|
2187
|
+
std::unique_ptr<IntPropertyAggregator> CreateIntPropertyAggregator(
|
|
2188
|
+
const Slice& property) {
|
|
2189
|
+
if (property == DB::Properties::kBlockCacheCapacity ||
|
|
2190
|
+
property == DB::Properties::kBlockCacheUsage ||
|
|
2191
|
+
property == DB::Properties::kBlockCachePinnedUsage) {
|
|
2192
|
+
return std::make_unique<BlockCachePropertyAggregator>();
|
|
2193
|
+
} else {
|
|
2194
|
+
return std::make_unique<SumPropertyAggregator>();
|
|
2195
|
+
}
|
|
2196
|
+
}
|
|
2138
2197
|
|
|
2139
2198
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -432,7 +432,7 @@ class InternalStats {
|
|
|
432
432
|
explicit CompactionStatsFull() : stats(), penultimate_level_stats() {}
|
|
433
433
|
|
|
434
434
|
explicit CompactionStatsFull(CompactionReason reason, int c)
|
|
435
|
-
: stats(reason, c), penultimate_level_stats(reason, c){}
|
|
435
|
+
: stats(reason, c), penultimate_level_stats(reason, c) {}
|
|
436
436
|
|
|
437
437
|
uint64_t TotalBytesWritten() const {
|
|
438
438
|
uint64_t bytes_written = stats.bytes_written + stats.bytes_written_blob;
|
|
@@ -873,5 +873,24 @@ class InternalStats {
|
|
|
873
873
|
uint64_t started_at_;
|
|
874
874
|
};
|
|
875
875
|
|
|
876
|
+
// IntPropertyAggregator aggregates an integer property across all column
|
|
877
|
+
// families.
|
|
878
|
+
class IntPropertyAggregator {
|
|
879
|
+
public:
|
|
880
|
+
IntPropertyAggregator() {}
|
|
881
|
+
virtual ~IntPropertyAggregator() {}
|
|
882
|
+
|
|
883
|
+
IntPropertyAggregator(const IntPropertyAggregator&) = delete;
|
|
884
|
+
void operator=(const IntPropertyAggregator&) = delete;
|
|
885
|
+
|
|
886
|
+
// Add a column family's property value to the aggregator.
|
|
887
|
+
virtual void Add(ColumnFamilyData* cfd, uint64_t value) = 0;
|
|
888
|
+
|
|
889
|
+
// Get the aggregated value.
|
|
890
|
+
virtual uint64_t Aggregate() const = 0;
|
|
891
|
+
};
|
|
892
|
+
|
|
893
|
+
std::unique_ptr<IntPropertyAggregator> CreateIntPropertyAggregator(
|
|
894
|
+
const Slice& property);
|
|
876
895
|
|
|
877
896
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -53,6 +53,11 @@ Writer::~Writer() {
|
|
|
53
53
|
|
|
54
54
|
IOStatus Writer::WriteBuffer(const WriteOptions& write_options) {
|
|
55
55
|
if (dest_->seen_error()) {
|
|
56
|
+
#ifndef NDEBUG
|
|
57
|
+
if (dest_->seen_injected_error()) {
|
|
58
|
+
return IOStatus::IOError("Seen injected error. Skip writing buffer.");
|
|
59
|
+
}
|
|
60
|
+
#endif // NDEBUG
|
|
56
61
|
return IOStatus::IOError("Seen error. Skip writing buffer.");
|
|
57
62
|
}
|
|
58
63
|
IOOptions opts;
|
|
@@ -74,9 +79,23 @@ IOStatus Writer::Close(const WriteOptions& write_options) {
|
|
|
74
79
|
return s;
|
|
75
80
|
}
|
|
76
81
|
|
|
82
|
+
bool Writer::PublishIfClosed() {
|
|
83
|
+
if (dest_->IsClosed()) {
|
|
84
|
+
dest_.reset();
|
|
85
|
+
return true;
|
|
86
|
+
} else {
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
77
91
|
IOStatus Writer::AddRecord(const WriteOptions& write_options,
|
|
78
92
|
const Slice& slice) {
|
|
79
93
|
if (dest_->seen_error()) {
|
|
94
|
+
#ifndef NDEBUG
|
|
95
|
+
if (dest_->seen_injected_error()) {
|
|
96
|
+
return IOStatus::IOError("Seen injected error. Skip writing buffer.");
|
|
97
|
+
}
|
|
98
|
+
#endif // NDEBUG
|
|
80
99
|
return IOStatus::IOError("Seen error. Skip writing buffer.");
|
|
81
100
|
}
|
|
82
101
|
const char* ptr = slice.data();
|
|
@@ -184,6 +203,11 @@ IOStatus Writer::AddCompressionTypeRecord(const WriteOptions& write_options) {
|
|
|
184
203
|
}
|
|
185
204
|
|
|
186
205
|
if (dest_->seen_error()) {
|
|
206
|
+
#ifndef NDEBUG
|
|
207
|
+
if (dest_->seen_injected_error()) {
|
|
208
|
+
return IOStatus::IOError("Seen injected error. Skip writing buffer.");
|
|
209
|
+
}
|
|
210
|
+
#endif // NDEBUG
|
|
187
211
|
return IOStatus::IOError("Seen error. Skip writing buffer.");
|
|
188
212
|
}
|
|
189
213
|
|
|
@@ -107,6 +107,11 @@ class Writer {
|
|
|
107
107
|
|
|
108
108
|
IOStatus Close(const WriteOptions& write_options);
|
|
109
109
|
|
|
110
|
+
// If closing the writer through file(), call this afterwards to modify
|
|
111
|
+
// this object's state to reflect that. Returns true if the destination file
|
|
112
|
+
// has been closed. If it hasn't been closed, returns false with no change.
|
|
113
|
+
bool PublishIfClosed();
|
|
114
|
+
|
|
110
115
|
bool BufferIsEmpty();
|
|
111
116
|
|
|
112
117
|
size_t TEST_block_offset() const { return block_offset_; }
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
#include <array>
|
|
14
14
|
#include <limits>
|
|
15
15
|
#include <memory>
|
|
16
|
+
#include <optional>
|
|
16
17
|
|
|
17
18
|
#include "db/dbformat.h"
|
|
18
19
|
#include "db/kv_checksum.h"
|
|
@@ -617,8 +618,9 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIteratorInternal(
|
|
|
617
618
|
}
|
|
618
619
|
|
|
619
620
|
void MemTable::ConstructFragmentedRangeTombstones() {
|
|
620
|
-
|
|
621
|
-
//
|
|
621
|
+
// There should be no concurrent Construction.
|
|
622
|
+
// We could also check fragmented_range_tombstone_list_ to avoid repeate
|
|
623
|
+
// constructions. We just construct them here again to be safe.
|
|
622
624
|
if (!is_range_del_table_empty_.load(std::memory_order_relaxed)) {
|
|
623
625
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
624
626
|
auto* unfragmented_iter = new MemTableIterator(
|
|
@@ -955,9 +957,9 @@ static bool SaveValue(void* arg, const char* entry) {
|
|
|
955
957
|
s->key->user_key())) {
|
|
956
958
|
// Correct user key
|
|
957
959
|
TEST_SYNC_POINT_CALLBACK("Memtable::SaveValue:Found:entry", &entry);
|
|
958
|
-
std::
|
|
960
|
+
std::optional<ReadLock> read_lock;
|
|
959
961
|
if (s->inplace_update_support) {
|
|
960
|
-
read_lock.
|
|
962
|
+
read_lock.emplace(s->mem->GetLock(s->key->user_key()));
|
|
961
963
|
}
|
|
962
964
|
|
|
963
965
|
if (s->protection_bytes_per_key > 0) {
|
|
@@ -534,21 +534,21 @@ class MemTable {
|
|
|
534
534
|
// Returns a heuristic flush decision
|
|
535
535
|
bool ShouldFlushNow();
|
|
536
536
|
|
|
537
|
+
// Updates `fragmented_range_tombstone_list_` that will be used to serve reads
|
|
538
|
+
// when this memtable becomes an immutable memtable (in some
|
|
539
|
+
// MemtableListVersion::memlist_). Should be called when this memtable is
|
|
540
|
+
// about to become immutable. May be called multiple times since
|
|
541
|
+
// SwitchMemtable() may fail.
|
|
537
542
|
void ConstructFragmentedRangeTombstones();
|
|
538
543
|
|
|
539
544
|
// Returns whether a fragmented range tombstone list is already constructed
|
|
540
545
|
// for this memtable. It should be constructed right before a memtable is
|
|
541
546
|
// added to an immutable memtable list. Note that if a memtable does not have
|
|
542
|
-
// any range tombstone, then no range tombstone list will ever be constructed
|
|
543
|
-
//
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
return fragmented_range_tombstone_list_.get() != nullptr ||
|
|
548
|
-
is_range_del_table_empty_;
|
|
549
|
-
} else {
|
|
550
|
-
return fragmented_range_tombstone_list_.get() != nullptr;
|
|
551
|
-
}
|
|
547
|
+
// any range tombstone, then no range tombstone list will ever be constructed
|
|
548
|
+
// and true is returned in that case.
|
|
549
|
+
bool IsFragmentedRangeTombstonesConstructed() const {
|
|
550
|
+
return fragmented_range_tombstone_list_.get() != nullptr ||
|
|
551
|
+
is_range_del_table_empty_;
|
|
552
552
|
}
|
|
553
553
|
|
|
554
554
|
// Get the newest user-defined timestamp contained in this MemTable. Check
|
|
@@ -235,19 +235,19 @@ void MemTableListVersion::AddIterators(
|
|
|
235
235
|
SequenceNumber read_seq = options.snapshot != nullptr
|
|
236
236
|
? options.snapshot->GetSequenceNumber()
|
|
237
237
|
: kMaxSequenceNumber;
|
|
238
|
-
TruncatedRangeDelIterator
|
|
238
|
+
std::unique_ptr<TruncatedRangeDelIterator> mem_tombstone_iter;
|
|
239
239
|
auto range_del_iter = m->NewRangeTombstoneIterator(
|
|
240
240
|
options, read_seq, true /* immutale_memtable */);
|
|
241
241
|
if (range_del_iter == nullptr || range_del_iter->empty()) {
|
|
242
242
|
delete range_del_iter;
|
|
243
243
|
} else {
|
|
244
|
-
mem_tombstone_iter =
|
|
244
|
+
mem_tombstone_iter = std::make_unique<TruncatedRangeDelIterator>(
|
|
245
245
|
std::unique_ptr<FragmentedRangeTombstoneIterator>(range_del_iter),
|
|
246
246
|
&m->GetInternalKeyComparator(), nullptr /* smallest */,
|
|
247
247
|
nullptr /* largest */);
|
|
248
248
|
}
|
|
249
|
-
merge_iter_builder->AddPointAndTombstoneIterator(
|
|
250
|
-
|
|
249
|
+
merge_iter_builder->AddPointAndTombstoneIterator(
|
|
250
|
+
mem_iter, std::move(mem_tombstone_iter));
|
|
251
251
|
}
|
|
252
252
|
}
|
|
253
253
|
}
|
|
@@ -85,7 +85,7 @@ class MultiCfIteratorImpl {
|
|
|
85
85
|
void Next() {
|
|
86
86
|
assert(Valid());
|
|
87
87
|
auto& min_heap = GetHeap<MultiCfMinHeap>([this]() {
|
|
88
|
-
|
|
88
|
+
std::string target(key().data(), key().size());
|
|
89
89
|
InitMinHeap();
|
|
90
90
|
Seek(target);
|
|
91
91
|
});
|
|
@@ -94,7 +94,7 @@ class MultiCfIteratorImpl {
|
|
|
94
94
|
void Prev() {
|
|
95
95
|
assert(Valid());
|
|
96
96
|
auto& max_heap = GetHeap<MultiCfMaxHeap>([this]() {
|
|
97
|
-
|
|
97
|
+
std::string target(key().data(), key().size());
|
|
98
98
|
InitMaxHeap();
|
|
99
99
|
SeekForPrev(target);
|
|
100
100
|
});
|
|
@@ -198,8 +198,15 @@ class MultiCfIteratorImpl {
|
|
|
198
198
|
|
|
199
199
|
template <typename BinaryHeap, typename AdvanceFuncType>
|
|
200
200
|
void AdvanceIterator(BinaryHeap& heap, AdvanceFuncType advance_func) {
|
|
201
|
-
assert(!heap.empty());
|
|
202
201
|
reset_func_();
|
|
202
|
+
// It is possible for one or more child iters are at invalid keys due to
|
|
203
|
+
// manual prefix iteration. For such cases, we consider the result of the
|
|
204
|
+
// multi-cf-iter is also undefined.
|
|
205
|
+
// https://github.com/facebook/rocksdb/wiki/Prefix-Seek#manual-prefix-iterating
|
|
206
|
+
// for details about manual prefix iteration
|
|
207
|
+
if (heap.empty()) {
|
|
208
|
+
return;
|
|
209
|
+
}
|
|
203
210
|
|
|
204
211
|
// 1. Keep the top iterator (by popping it from the heap)
|
|
205
212
|
// 2. Make sure all others have iterated past the top iterator key slice
|
|
@@ -17,16 +17,6 @@
|
|
|
17
17
|
#include "table/internal_iterator.h"
|
|
18
18
|
|
|
19
19
|
namespace ROCKSDB_NAMESPACE {
|
|
20
|
-
struct FragmentedRangeTombstoneList;
|
|
21
|
-
|
|
22
|
-
struct FragmentedRangeTombstoneListCache {
|
|
23
|
-
// ensure only the first reader needs to initialize l
|
|
24
|
-
std::mutex reader_mutex;
|
|
25
|
-
std::unique_ptr<FragmentedRangeTombstoneList> tombstones = nullptr;
|
|
26
|
-
// readers will first check this bool to avoid
|
|
27
|
-
std::atomic<bool> initialized = false;
|
|
28
|
-
};
|
|
29
|
-
|
|
30
20
|
struct FragmentedRangeTombstoneList {
|
|
31
21
|
public:
|
|
32
22
|
// A compact representation of a "stack" of range tombstone fragments, which
|
|
@@ -124,6 +114,14 @@ struct FragmentedRangeTombstoneList {
|
|
|
124
114
|
uint64_t total_tombstone_payload_bytes_;
|
|
125
115
|
};
|
|
126
116
|
|
|
117
|
+
struct FragmentedRangeTombstoneListCache {
|
|
118
|
+
// ensure only the first reader needs to initialize l
|
|
119
|
+
std::mutex reader_mutex;
|
|
120
|
+
std::unique_ptr<FragmentedRangeTombstoneList> tombstones = nullptr;
|
|
121
|
+
// readers will first check this bool to avoid
|
|
122
|
+
std::atomic<bool> initialized = false;
|
|
123
|
+
};
|
|
124
|
+
|
|
127
125
|
// FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
|
|
128
126
|
// meta block into an iterator over non-overlapping tombstone fragments. The
|
|
129
127
|
// tombstone fragmentation process should be more efficient than the range
|
|
@@ -480,9 +480,10 @@ class Repairer {
|
|
|
480
480
|
dbname_, /* versions */ nullptr, immutable_db_options_, tboptions,
|
|
481
481
|
file_options_, table_cache_.get(), iter.get(),
|
|
482
482
|
std::move(range_del_iters), &meta, nullptr /* blob_file_additions */,
|
|
483
|
-
{}, kMaxSequenceNumber, kMaxSequenceNumber,
|
|
484
|
-
false /* paranoid_file_checks*/,
|
|
485
|
-
nullptr /*
|
|
483
|
+
{}, kMaxSequenceNumber, kMaxSequenceNumber, kMaxSequenceNumber,
|
|
484
|
+
snapshot_checker, false /* paranoid_file_checks*/,
|
|
485
|
+
nullptr /* internal_stats */, &io_s, nullptr /*IOTracer*/,
|
|
486
|
+
BlobFileCreationReason::kRecovery,
|
|
486
487
|
nullptr /* seqno_to_time_mapping */, nullptr /* event_logger */,
|
|
487
488
|
0 /* job_id */, nullptr /* table_properties */, write_hint);
|
|
488
489
|
ROCKS_LOG_INFO(db_options_.info_log,
|
|
@@ -69,6 +69,36 @@ SequenceNumber SeqnoToTimeMapping::GetProximalSeqnoBeforeTime(
|
|
|
69
69
|
return it->seqno;
|
|
70
70
|
}
|
|
71
71
|
|
|
72
|
+
void SeqnoToTimeMapping::GetCurrentTieringCutoffSeqnos(
|
|
73
|
+
uint64_t current_time, uint64_t preserve_internal_time_seconds,
|
|
74
|
+
uint64_t preclude_last_level_data_seconds,
|
|
75
|
+
SequenceNumber* preserve_time_min_seqno,
|
|
76
|
+
SequenceNumber* preclude_last_level_min_seqno) const {
|
|
77
|
+
uint64_t preserve_time_duration = std::max(preserve_internal_time_seconds,
|
|
78
|
+
preclude_last_level_data_seconds);
|
|
79
|
+
if (preserve_time_duration <= 0) {
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
uint64_t preserve_time = current_time > preserve_time_duration
|
|
83
|
+
? current_time - preserve_time_duration
|
|
84
|
+
: 0;
|
|
85
|
+
// GetProximalSeqnoBeforeTime tells us the last seqno known to have been
|
|
86
|
+
// written at or before the given time. + 1 to get the minimum we should
|
|
87
|
+
// preserve without excluding anything that might have been written on or
|
|
88
|
+
// after the given time.
|
|
89
|
+
if (preserve_time_min_seqno) {
|
|
90
|
+
*preserve_time_min_seqno = GetProximalSeqnoBeforeTime(preserve_time) + 1;
|
|
91
|
+
}
|
|
92
|
+
if (preclude_last_level_data_seconds > 0 && preclude_last_level_min_seqno) {
|
|
93
|
+
uint64_t preclude_last_level_time =
|
|
94
|
+
current_time > preclude_last_level_data_seconds
|
|
95
|
+
? current_time - preclude_last_level_data_seconds
|
|
96
|
+
: 0;
|
|
97
|
+
*preclude_last_level_min_seqno =
|
|
98
|
+
GetProximalSeqnoBeforeTime(preclude_last_level_time) + 1;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
72
102
|
void SeqnoToTimeMapping::EnforceMaxTimeSpan(uint64_t now) {
|
|
73
103
|
assert(enforced_); // at least sorted
|
|
74
104
|
uint64_t cutoff_time;
|
|
@@ -213,6 +213,15 @@ class SeqnoToTimeMapping {
|
|
|
213
213
|
// must be in enforced state as a precondition.
|
|
214
214
|
SequenceNumber GetProximalSeqnoBeforeTime(uint64_t time) const;
|
|
215
215
|
|
|
216
|
+
// Given current time, the configured `preserve_internal_time_seconds`, and
|
|
217
|
+
// `preclude_last_level_data_seconds`, find the relevant cutoff sequence
|
|
218
|
+
// numbers for tiering.
|
|
219
|
+
void GetCurrentTieringCutoffSeqnos(
|
|
220
|
+
uint64_t current_time, uint64_t preserve_internal_time_seconds,
|
|
221
|
+
uint64_t preclude_last_level_data_seconds,
|
|
222
|
+
SequenceNumber* preserve_time_min_seqno,
|
|
223
|
+
SequenceNumber* preclude_last_level_min_seqno) const;
|
|
224
|
+
|
|
216
225
|
// Encode to a binary string by appending to `dest`.
|
|
217
226
|
// Because this is a const operation depending on sortedness, the structure
|
|
218
227
|
// must be in enforced state as a precondition.
|
|
@@ -163,6 +163,11 @@ Status TableCache::GetTableReader(
|
|
|
163
163
|
return s;
|
|
164
164
|
}
|
|
165
165
|
|
|
166
|
+
Cache::Handle* TableCache::Lookup(Cache* cache, uint64_t file_number) {
|
|
167
|
+
Slice key = GetSliceForFileNumber(&file_number);
|
|
168
|
+
return cache->Lookup(key);
|
|
169
|
+
}
|
|
170
|
+
|
|
166
171
|
Status TableCache::FindTable(
|
|
167
172
|
const ReadOptions& ro, const FileOptions& file_options,
|
|
168
173
|
const InternalKeyComparator& internal_comparator,
|
|
@@ -225,7 +230,7 @@ InternalIterator* TableCache::NewIterator(
|
|
|
225
230
|
const InternalKey* smallest_compaction_key,
|
|
226
231
|
const InternalKey* largest_compaction_key, bool allow_unprepared_value,
|
|
227
232
|
uint8_t block_protection_bytes_per_key, const SequenceNumber* read_seqno,
|
|
228
|
-
TruncatedRangeDelIterator
|
|
233
|
+
std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter) {
|
|
229
234
|
PERF_TIMER_GUARD(new_table_iterator_nanos);
|
|
230
235
|
|
|
231
236
|
Status s;
|
|
@@ -280,7 +285,7 @@ InternalIterator* TableCache::NewIterator(
|
|
|
280
285
|
delete new_range_del_iter;
|
|
281
286
|
*range_del_iter = nullptr;
|
|
282
287
|
} else {
|
|
283
|
-
*range_del_iter =
|
|
288
|
+
*range_del_iter = std::make_unique<TruncatedRangeDelIterator>(
|
|
284
289
|
std::unique_ptr<FragmentedRangeTombstoneIterator>(
|
|
285
290
|
new_range_del_iter),
|
|
286
291
|
&icomparator, &file_meta.smallest, &file_meta.largest);
|
|
@@ -727,4 +732,14 @@ uint64_t TableCache::ApproximateSize(
|
|
|
727
732
|
|
|
728
733
|
return result;
|
|
729
734
|
}
|
|
735
|
+
|
|
736
|
+
void TableCache::ReleaseObsolete(Cache* cache, Cache::Handle* h,
|
|
737
|
+
uint32_t uncache_aggressiveness) {
|
|
738
|
+
CacheInterface typed_cache(cache);
|
|
739
|
+
TypedHandle* table_handle = reinterpret_cast<TypedHandle*>(h);
|
|
740
|
+
TableReader* table_reader = typed_cache.Value(table_handle);
|
|
741
|
+
table_reader->MarkObsolete(uncache_aggressiveness);
|
|
742
|
+
typed_cache.ReleaseAndEraseIfLastRef(table_handle);
|
|
743
|
+
}
|
|
744
|
+
|
|
730
745
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -100,7 +100,7 @@ class TableCache {
|
|
|
100
100
|
const InternalKey* largest_compaction_key, bool allow_unprepared_value,
|
|
101
101
|
uint8_t protection_bytes_per_key,
|
|
102
102
|
const SequenceNumber* range_del_read_seqno = nullptr,
|
|
103
|
-
TruncatedRangeDelIterator
|
|
103
|
+
std::unique_ptr<TruncatedRangeDelIterator>* range_del_iter = nullptr);
|
|
104
104
|
|
|
105
105
|
// If a seek to internal key "k" in specified file finds an entry,
|
|
106
106
|
// call get_context->SaveValue() repeatedly until
|
|
@@ -165,6 +165,14 @@ class TableCache {
|
|
|
165
165
|
// Evict any entry for the specified file number
|
|
166
166
|
static void Evict(Cache* cache, uint64_t file_number);
|
|
167
167
|
|
|
168
|
+
// Handles releasing, erasing, etc. of what should be the last reference
|
|
169
|
+
// to an obsolete file.
|
|
170
|
+
static void ReleaseObsolete(Cache* cache, Cache::Handle* handle,
|
|
171
|
+
uint32_t uncache_aggressiveness);
|
|
172
|
+
|
|
173
|
+
// Return handle to an existing cache entry if there is one
|
|
174
|
+
static Cache::Handle* Lookup(Cache* cache, uint64_t file_number);
|
|
175
|
+
|
|
168
176
|
// Find table reader
|
|
169
177
|
// @param skip_filters Disables loading/accessing the filter block
|
|
170
178
|
// @param level == -1 means not specified
|
|
@@ -44,7 +44,9 @@ class InternalTblPropCollFactory {
|
|
|
44
44
|
virtual ~InternalTblPropCollFactory() {}
|
|
45
45
|
// has to be thread-safe
|
|
46
46
|
virtual InternalTblPropColl* CreateInternalTblPropColl(
|
|
47
|
-
uint32_t column_family_id, int level_at_creation
|
|
47
|
+
uint32_t column_family_id, int level_at_creation, int num_levels,
|
|
48
|
+
SequenceNumber last_level_inclusive_max_seqno_threshold =
|
|
49
|
+
kMaxSequenceNumber) = 0;
|
|
48
50
|
|
|
49
51
|
// The name of the properties collector can be used for debugging purpose.
|
|
50
52
|
virtual const char* Name() const = 0;
|
|
@@ -92,10 +94,15 @@ class UserKeyTablePropertiesCollectorFactory
|
|
|
92
94
|
std::shared_ptr<TablePropertiesCollectorFactory> user_collector_factory)
|
|
93
95
|
: user_collector_factory_(user_collector_factory) {}
|
|
94
96
|
InternalTblPropColl* CreateInternalTblPropColl(
|
|
95
|
-
uint32_t column_family_id, int level_at_creation
|
|
97
|
+
uint32_t column_family_id, int level_at_creation, int num_levels,
|
|
98
|
+
SequenceNumber last_level_inclusive_max_seqno_threshold =
|
|
99
|
+
kMaxSequenceNumber) override {
|
|
96
100
|
TablePropertiesCollectorFactory::Context context;
|
|
97
101
|
context.column_family_id = column_family_id;
|
|
98
102
|
context.level_at_creation = level_at_creation;
|
|
103
|
+
context.num_levels = num_levels;
|
|
104
|
+
context.last_level_inclusive_max_seqno_threshold =
|
|
105
|
+
last_level_inclusive_max_seqno_threshold;
|
|
99
106
|
TablePropertiesCollector* collector =
|
|
100
107
|
user_collector_factory_->CreateTablePropertiesCollector(context);
|
|
101
108
|
if (collector) {
|
|
@@ -209,7 +209,9 @@ class RegularKeysStartWithAFactory : public InternalTblPropCollFactory,
|
|
|
209
209
|
}
|
|
210
210
|
}
|
|
211
211
|
InternalTblPropColl* CreateInternalTblPropColl(
|
|
212
|
-
uint32_t /*column_family_id*/, int /* level_at_creation
|
|
212
|
+
uint32_t /*column_family_id*/, int /* level_at_creation */,
|
|
213
|
+
int /* num_levels */,
|
|
214
|
+
SequenceNumber /* last_level_inclusive_max_seqno_threshold */) override {
|
|
213
215
|
return new RegularKeysStartWithAInternal();
|
|
214
216
|
}
|
|
215
217
|
const char* Name() const override { return "RegularKeysStartWithA"; }
|
|
@@ -18,7 +18,7 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl(
|
|
|
18
18
|
const std::string& dir, const ImmutableDBOptions* options,
|
|
19
19
|
const TransactionLogIterator::ReadOptions& read_options,
|
|
20
20
|
const EnvOptions& soptions, const SequenceNumber seq,
|
|
21
|
-
std::unique_ptr<
|
|
21
|
+
std::unique_ptr<VectorWalPtr> files, VersionSet const* const versions,
|
|
22
22
|
const bool seq_per_batch, const std::shared_ptr<IOTracer>& io_tracer)
|
|
23
23
|
: dir_(dir),
|
|
24
24
|
options_(options),
|
|
@@ -44,7 +44,7 @@ TransactionLogIteratorImpl::TransactionLogIteratorImpl(
|
|
|
44
44
|
}
|
|
45
45
|
|
|
46
46
|
Status TransactionLogIteratorImpl::OpenLogFile(
|
|
47
|
-
const
|
|
47
|
+
const WalFile* log_file,
|
|
48
48
|
std::unique_ptr<SequentialFileReader>* file_reader) {
|
|
49
49
|
FileSystemPtr fs(options_->fs, io_tracer_);
|
|
50
50
|
std::unique_ptr<FSSequentialFile> file;
|
|
@@ -281,7 +281,7 @@ void TransactionLogIteratorImpl::UpdateCurrentWriteBatch(const Slice& record) {
|
|
|
281
281
|
current_status_ = Status::OK();
|
|
282
282
|
}
|
|
283
283
|
|
|
284
|
-
Status TransactionLogIteratorImpl::OpenLogReader(const
|
|
284
|
+
Status TransactionLogIteratorImpl::OpenLogReader(const WalFile* log_file) {
|
|
285
285
|
std::unique_ptr<SequentialFileReader> file;
|
|
286
286
|
Status s = OpenLogFile(log_file, &file);
|
|
287
287
|
if (!s.ok()) {
|
|
@@ -19,9 +19,9 @@
|
|
|
19
19
|
|
|
20
20
|
namespace ROCKSDB_NAMESPACE {
|
|
21
21
|
|
|
22
|
-
class
|
|
22
|
+
class WalFileImpl : public WalFile {
|
|
23
23
|
public:
|
|
24
|
-
|
|
24
|
+
WalFileImpl(uint64_t logNum, WalFileType logType, SequenceNumber startSeq,
|
|
25
25
|
uint64_t sizeBytes)
|
|
26
26
|
: logNumber_(logNum),
|
|
27
27
|
type_(logType),
|
|
@@ -43,7 +43,7 @@ class LogFileImpl : public LogFile {
|
|
|
43
43
|
|
|
44
44
|
uint64_t SizeFileBytes() const override { return sizeFileBytes_; }
|
|
45
45
|
|
|
46
|
-
bool operator<(const
|
|
46
|
+
bool operator<(const WalFile& that) const {
|
|
47
47
|
return LogNumber() < that.LogNumber();
|
|
48
48
|
}
|
|
49
49
|
|
|
@@ -60,7 +60,7 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
|
|
|
60
60
|
const std::string& dir, const ImmutableDBOptions* options,
|
|
61
61
|
const TransactionLogIterator::ReadOptions& read_options,
|
|
62
62
|
const EnvOptions& soptions, const SequenceNumber seqNum,
|
|
63
|
-
std::unique_ptr<
|
|
63
|
+
std::unique_ptr<VectorWalPtr> files, VersionSet const* const versions,
|
|
64
64
|
const bool seq_per_batch, const std::shared_ptr<IOTracer>& io_tracer);
|
|
65
65
|
|
|
66
66
|
bool Valid() override;
|
|
@@ -77,7 +77,7 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
|
|
|
77
77
|
const TransactionLogIterator::ReadOptions read_options_;
|
|
78
78
|
const EnvOptions& soptions_;
|
|
79
79
|
SequenceNumber starting_sequence_number_;
|
|
80
|
-
std::unique_ptr<
|
|
80
|
+
std::unique_ptr<VectorWalPtr> files_;
|
|
81
81
|
// Used only to get latest seq. num
|
|
82
82
|
// TODO(icanadi) can this be just a callback?
|
|
83
83
|
VersionSet const* const versions_;
|
|
@@ -92,7 +92,7 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
|
|
|
92
92
|
std::unique_ptr<WriteBatch> current_batch_;
|
|
93
93
|
std::unique_ptr<log::Reader> current_log_reader_;
|
|
94
94
|
std::string scratch_;
|
|
95
|
-
Status OpenLogFile(const
|
|
95
|
+
Status OpenLogFile(const WalFile* log_file,
|
|
96
96
|
std::unique_ptr<SequentialFileReader>* file);
|
|
97
97
|
|
|
98
98
|
struct LogReporter : public log::Reader::Reporter {
|
|
@@ -123,6 +123,6 @@ class TransactionLogIteratorImpl : public TransactionLogIterator {
|
|
|
123
123
|
bool IsBatchExpected(const WriteBatch* batch, SequenceNumber expected_seq);
|
|
124
124
|
// Update current batch if a continuous batch is found.
|
|
125
125
|
void UpdateCurrentWriteBatch(const Slice& record);
|
|
126
|
-
Status OpenLogReader(const
|
|
126
|
+
Status OpenLogReader(const WalFile* file);
|
|
127
127
|
};
|
|
128
128
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -364,14 +364,15 @@ class DumpManifestHandler : public VersionEditHandler {
|
|
|
364
364
|
|
|
365
365
|
Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override {
|
|
366
366
|
// Write out each individual edit
|
|
367
|
-
if (
|
|
367
|
+
if (json_) {
|
|
368
368
|
// Print out DebugStrings. Can include non-terminating null characters.
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
369
|
+
std::string edit_dump_str = edit.DebugJSON(count_, hex_);
|
|
370
|
+
fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
|
|
371
|
+
fwrite("\n", sizeof(char), 1, stdout);
|
|
372
|
+
} else if (verbose_) {
|
|
372
373
|
// Print out DebugStrings. Can include non-terminating null characters.
|
|
373
|
-
|
|
374
|
-
|
|
374
|
+
std::string edit_dump_str = edit.DebugString(hex_);
|
|
375
|
+
fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
|
|
375
376
|
}
|
|
376
377
|
++count_;
|
|
377
378
|
return VersionEditHandler::ApplyVersionEdit(edit, cfd);
|