@nxtedition/rocksdb 13.5.13 → 14.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +33 -2
- package/binding.gyp +2 -2
- package/chained-batch.js +9 -16
- package/deps/rocksdb/rocksdb/BUCK +18 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
- package/deps/rocksdb/rocksdb/Makefile +20 -9
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
- package/deps/rocksdb/rocksdb/db/c.cc +207 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
- package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
- package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
- package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
- package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
- package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
- package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
- package/deps/rocksdb/rocksdb/env/env.cc +12 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
- package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/options.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
- package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
- package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
- package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
- package/deps/rocksdb/rocksdb/table/format.cc +6 -12
- package/deps/rocksdb/rocksdb/table/format.h +10 -0
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
- package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
- package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
- package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
- package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
- package/deps/rocksdb/rocksdb/util/coding.h +3 -3
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
- package/deps/rocksdb/rocksdb/util/compression.h +5 -0
- package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
- package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
- package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
- package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
- package/index.js +3 -3
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/util.h +38 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
|
@@ -502,8 +502,7 @@ Status FlushJob::MemPurge() {
|
|
|
502
502
|
kMaxSequenceNumber, &job_context_->snapshot_seqs, earliest_snapshot_,
|
|
503
503
|
job_context_->earliest_write_conflict_snapshot,
|
|
504
504
|
job_context_->GetJobSnapshotSequence(), job_context_->snapshot_checker,
|
|
505
|
-
env, ShouldReportDetailedTime(env, ioptions.stats),
|
|
506
|
-
true /* internal key corruption is not ok */, range_del_agg.get(),
|
|
505
|
+
env, ShouldReportDetailedTime(env, ioptions.stats), range_del_agg.get(),
|
|
507
506
|
nullptr, ioptions.allow_data_in_errors,
|
|
508
507
|
ioptions.enforce_single_del_contracts,
|
|
509
508
|
/*manual_compaction_canceled=*/kManualCompactionCanceledFalse,
|
|
@@ -1105,13 +1104,13 @@ Status FlushJob::WriteLevel0Table() {
|
|
|
1105
1104
|
const uint64_t micros = clock_->NowMicros() - start_micros;
|
|
1106
1105
|
const uint64_t cpu_micros = clock_->CPUMicros() - start_cpu_micros;
|
|
1107
1106
|
flush_stats.micros = micros;
|
|
1108
|
-
flush_stats.cpu_micros
|
|
1107
|
+
flush_stats.cpu_micros += cpu_micros;
|
|
1109
1108
|
|
|
1110
1109
|
ROCKS_LOG_INFO(db_options_.info_log,
|
|
1111
1110
|
"[%s] [JOB %d] Flush lasted %" PRIu64
|
|
1112
1111
|
" microseconds, and %" PRIu64 " cpu microseconds.\n",
|
|
1113
1112
|
cfd_->GetName().c_str(), job_context_->job_id, micros,
|
|
1114
|
-
cpu_micros);
|
|
1113
|
+
flush_stats.cpu_micros);
|
|
1115
1114
|
|
|
1116
1115
|
if (has_output) {
|
|
1117
1116
|
flush_stats.bytes_written = meta_.fd.GetFileSize();
|
|
@@ -380,8 +380,11 @@ void Reader::MaybeVerifyPredecessorWALInfo(
|
|
|
380
380
|
} else {
|
|
381
381
|
if (observed_predecessor_wal_info_.GetLogNumber() !=
|
|
382
382
|
recorded_predecessor_log_number) {
|
|
383
|
-
std::string reason =
|
|
384
|
-
|
|
383
|
+
std::string reason =
|
|
384
|
+
"Mismatched predecessor log number of WAL file " +
|
|
385
|
+
file_->file_name() + " Recorded " +
|
|
386
|
+
std::to_string(recorded_predecessor_log_number) + ". Observed " +
|
|
387
|
+
std::to_string(observed_predecessor_wal_info_.GetLogNumber());
|
|
385
388
|
ReportCorruption(fragment.size(), reason.c_str(),
|
|
386
389
|
recorded_predecessor_log_number);
|
|
387
390
|
} else if (observed_predecessor_wal_info_.GetLastSeqnoRecorded() !=
|
|
@@ -70,7 +70,9 @@ ImmutableMemTableOptions::ImmutableMemTableOptions(
|
|
|
70
70
|
protection_bytes_per_key(
|
|
71
71
|
mutable_cf_options.memtable_protection_bytes_per_key),
|
|
72
72
|
allow_data_in_errors(ioptions.allow_data_in_errors),
|
|
73
|
-
paranoid_memory_checks(mutable_cf_options.paranoid_memory_checks)
|
|
73
|
+
paranoid_memory_checks(mutable_cf_options.paranoid_memory_checks),
|
|
74
|
+
memtable_veirfy_per_key_checksum_on_seek(
|
|
75
|
+
mutable_cf_options.memtable_veirfy_per_key_checksum_on_seek) {}
|
|
74
76
|
|
|
75
77
|
MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
76
78
|
const ImmutableOptions& ioptions,
|
|
@@ -115,7 +117,13 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
|
115
117
|
oldest_key_time_(std::numeric_limits<uint64_t>::max()),
|
|
116
118
|
approximate_memory_usage_(0),
|
|
117
119
|
memtable_max_range_deletions_(
|
|
118
|
-
mutable_cf_options.memtable_max_range_deletions)
|
|
120
|
+
mutable_cf_options.memtable_max_range_deletions),
|
|
121
|
+
key_validation_callback_(
|
|
122
|
+
(moptions_.protection_bytes_per_key != 0 &&
|
|
123
|
+
moptions_.memtable_veirfy_per_key_checksum_on_seek)
|
|
124
|
+
? std::bind(&MemTable::ValidateKey, this, std::placeholders::_1,
|
|
125
|
+
std::placeholders::_2)
|
|
126
|
+
: std::function<Status(const char*, bool)>(nullptr)) {
|
|
119
127
|
UpdateFlushState();
|
|
120
128
|
// something went wrong if we need to flush before inserting anything
|
|
121
129
|
assert(!ShouldScheduleFlush());
|
|
@@ -134,6 +142,16 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
|
134
142
|
auto new_cache = std::make_shared<FragmentedRangeTombstoneListCache>();
|
|
135
143
|
size_t size = cached_range_tombstone_.Size();
|
|
136
144
|
for (size_t i = 0; i < size; ++i) {
|
|
145
|
+
#if defined(__cpp_lib_atomic_shared_ptr)
|
|
146
|
+
std::atomic<std::shared_ptr<FragmentedRangeTombstoneListCache>>*
|
|
147
|
+
local_cache_ref_ptr = cached_range_tombstone_.AccessAtCore(i);
|
|
148
|
+
auto new_local_cache_ref = std::make_shared<
|
|
149
|
+
const std::shared_ptr<FragmentedRangeTombstoneListCache>>(new_cache);
|
|
150
|
+
std::shared_ptr<FragmentedRangeTombstoneListCache> aliased_ptr(
|
|
151
|
+
new_local_cache_ref, new_cache.get());
|
|
152
|
+
local_cache_ref_ptr->store(std::move(aliased_ptr),
|
|
153
|
+
std::memory_order_relaxed);
|
|
154
|
+
#else
|
|
137
155
|
std::shared_ptr<FragmentedRangeTombstoneListCache>* local_cache_ref_ptr =
|
|
138
156
|
cached_range_tombstone_.AccessAtCore(i);
|
|
139
157
|
auto new_local_cache_ref = std::make_shared<
|
|
@@ -143,6 +161,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp,
|
|
|
143
161
|
std::shared_ptr<FragmentedRangeTombstoneListCache>(new_local_cache_ref,
|
|
144
162
|
new_cache.get()),
|
|
145
163
|
std::memory_order_relaxed);
|
|
164
|
+
#endif
|
|
146
165
|
}
|
|
147
166
|
const Comparator* ucmp = cmp.user_comparator();
|
|
148
167
|
assert(ucmp);
|
|
@@ -168,7 +187,7 @@ size_t MemTable::ApproximateMemoryUsage() {
|
|
|
168
187
|
}
|
|
169
188
|
total_usage += usage;
|
|
170
189
|
}
|
|
171
|
-
approximate_memory_usage_.
|
|
190
|
+
approximate_memory_usage_.StoreRelaxed(total_usage);
|
|
172
191
|
// otherwise, return the actual usage
|
|
173
192
|
return total_usage;
|
|
174
193
|
}
|
|
@@ -182,12 +201,12 @@ bool MemTable::ShouldFlushNow() {
|
|
|
182
201
|
// This is set if memtable_max_range_deletions is > 0,
|
|
183
202
|
// and that many range deletions are done
|
|
184
203
|
if (memtable_max_range_deletions_ > 0 &&
|
|
185
|
-
num_range_deletes_.
|
|
204
|
+
num_range_deletes_.LoadRelaxed() >=
|
|
186
205
|
static_cast<uint64_t>(memtable_max_range_deletions_)) {
|
|
187
206
|
return true;
|
|
188
207
|
}
|
|
189
208
|
|
|
190
|
-
size_t write_buffer_size = write_buffer_size_.
|
|
209
|
+
size_t write_buffer_size = write_buffer_size_.LoadRelaxed();
|
|
191
210
|
// In a lot of times, we cannot allocate arena blocks that exactly matches the
|
|
192
211
|
// buffer size. Thus we have to decide if we should over-allocate or
|
|
193
212
|
// under-allocate.
|
|
@@ -200,10 +219,10 @@ bool MemTable::ShouldFlushNow() {
|
|
|
200
219
|
assert(range_del_table_->ApproximateMemoryUsage() == 0);
|
|
201
220
|
// If arena still have room for new block allocation, we can safely say it
|
|
202
221
|
// shouldn't flush.
|
|
203
|
-
auto allocated_memory =
|
|
204
|
-
|
|
222
|
+
auto allocated_memory =
|
|
223
|
+
table_->ApproximateMemoryUsage() + arena_.MemoryAllocatedBytes();
|
|
205
224
|
|
|
206
|
-
approximate_memory_usage_.
|
|
225
|
+
approximate_memory_usage_.StoreRelaxed(allocated_memory);
|
|
207
226
|
|
|
208
227
|
// if we can still allocate one more block without exceeding the
|
|
209
228
|
// over-allocation ratio, then we should not flush.
|
|
@@ -383,7 +402,11 @@ class MemTableIterator : public InternalIterator {
|
|
|
383
402
|
!mem.GetImmutableMemTableOptions()->inplace_update_support),
|
|
384
403
|
arena_mode_(arena != nullptr),
|
|
385
404
|
paranoid_memory_checks_(mem.moptions_.paranoid_memory_checks),
|
|
386
|
-
|
|
405
|
+
validate_on_seek_(
|
|
406
|
+
mem.moptions_.paranoid_memory_checks ||
|
|
407
|
+
mem.moptions_.memtable_veirfy_per_key_checksum_on_seek),
|
|
408
|
+
allow_data_in_error_(mem.moptions_.allow_data_in_errors),
|
|
409
|
+
key_validation_callback_(mem.key_validation_callback_) {
|
|
387
410
|
if (kind == kRangeDelEntries) {
|
|
388
411
|
iter_ = mem.range_del_table_->GetIterator(arena);
|
|
389
412
|
} else if (prefix_extractor_ != nullptr &&
|
|
@@ -452,8 +475,10 @@ class MemTableIterator : public InternalIterator {
|
|
|
452
475
|
}
|
|
453
476
|
}
|
|
454
477
|
}
|
|
455
|
-
if (
|
|
456
|
-
status_ = iter_->SeekAndValidate(k, nullptr,
|
|
478
|
+
if (validate_on_seek_) {
|
|
479
|
+
status_ = iter_->SeekAndValidate(k, nullptr, allow_data_in_error_,
|
|
480
|
+
paranoid_memory_checks_,
|
|
481
|
+
key_validation_callback_);
|
|
457
482
|
} else {
|
|
458
483
|
iter_->Seek(k, nullptr);
|
|
459
484
|
}
|
|
@@ -477,8 +502,10 @@ class MemTableIterator : public InternalIterator {
|
|
|
477
502
|
}
|
|
478
503
|
}
|
|
479
504
|
}
|
|
480
|
-
if (
|
|
481
|
-
status_ = iter_->SeekAndValidate(k, nullptr,
|
|
505
|
+
if (validate_on_seek_) {
|
|
506
|
+
status_ = iter_->SeekAndValidate(k, nullptr, allow_data_in_error_,
|
|
507
|
+
paranoid_memory_checks_,
|
|
508
|
+
key_validation_callback_);
|
|
482
509
|
} else {
|
|
483
510
|
iter_->Seek(k, nullptr);
|
|
484
511
|
}
|
|
@@ -507,7 +534,7 @@ class MemTableIterator : public InternalIterator {
|
|
|
507
534
|
PERF_COUNTER_ADD(next_on_memtable_count, 1);
|
|
508
535
|
assert(Valid());
|
|
509
536
|
if (paranoid_memory_checks_) {
|
|
510
|
-
status_ = iter_->NextAndValidate(
|
|
537
|
+
status_ = iter_->NextAndValidate(allow_data_in_error_);
|
|
511
538
|
} else {
|
|
512
539
|
iter_->Next();
|
|
513
540
|
TEST_SYNC_POINT_CALLBACK("MemTableIterator::Next:0", iter_);
|
|
@@ -529,7 +556,7 @@ class MemTableIterator : public InternalIterator {
|
|
|
529
556
|
PERF_COUNTER_ADD(prev_on_memtable_count, 1);
|
|
530
557
|
assert(Valid());
|
|
531
558
|
if (paranoid_memory_checks_) {
|
|
532
|
-
status_ = iter_->PrevAndValidate(
|
|
559
|
+
status_ = iter_->PrevAndValidate(allow_data_in_error_);
|
|
533
560
|
} else {
|
|
534
561
|
iter_->Prev();
|
|
535
562
|
}
|
|
@@ -588,7 +615,9 @@ class MemTableIterator : public InternalIterator {
|
|
|
588
615
|
bool value_pinned_;
|
|
589
616
|
bool arena_mode_;
|
|
590
617
|
const bool paranoid_memory_checks_;
|
|
591
|
-
const bool
|
|
618
|
+
const bool validate_on_seek_;
|
|
619
|
+
const bool allow_data_in_error_;
|
|
620
|
+
const std::function<Status(const char*, bool)> key_validation_callback_;
|
|
592
621
|
|
|
593
622
|
void VerifyEntryChecksum() {
|
|
594
623
|
if (protection_bytes_per_key_ > 0 && Valid()) {
|
|
@@ -745,7 +774,7 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator(
|
|
|
745
774
|
const ReadOptions& read_options, SequenceNumber read_seq,
|
|
746
775
|
bool immutable_memtable) {
|
|
747
776
|
if (read_options.ignore_range_deletions ||
|
|
748
|
-
is_range_del_table_empty_.
|
|
777
|
+
is_range_del_table_empty_.LoadRelaxed()) {
|
|
749
778
|
return nullptr;
|
|
750
779
|
}
|
|
751
780
|
return NewRangeTombstoneIteratorInternal(read_options, read_seq,
|
|
@@ -756,7 +785,7 @@ FragmentedRangeTombstoneIterator*
|
|
|
756
785
|
MemTable::NewTimestampStrippingRangeTombstoneIterator(
|
|
757
786
|
const ReadOptions& read_options, SequenceNumber read_seq, size_t ts_sz) {
|
|
758
787
|
if (read_options.ignore_range_deletions ||
|
|
759
|
-
is_range_del_table_empty_.
|
|
788
|
+
is_range_del_table_empty_.LoadRelaxed()) {
|
|
760
789
|
return nullptr;
|
|
761
790
|
}
|
|
762
791
|
if (!timestamp_stripping_fragmented_range_tombstone_list_) {
|
|
@@ -790,8 +819,13 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIteratorInternal(
|
|
|
790
819
|
|
|
791
820
|
// takes current cache
|
|
792
821
|
std::shared_ptr<FragmentedRangeTombstoneListCache> cache =
|
|
822
|
+
#if defined(__cpp_lib_atomic_shared_ptr)
|
|
823
|
+
cached_range_tombstone_.Access()->load(std::memory_order_relaxed)
|
|
824
|
+
#else
|
|
793
825
|
std::atomic_load_explicit(cached_range_tombstone_.Access(),
|
|
794
|
-
std::memory_order_relaxed)
|
|
826
|
+
std::memory_order_relaxed)
|
|
827
|
+
#endif
|
|
828
|
+
;
|
|
795
829
|
// construct fragmented tombstone list if necessary
|
|
796
830
|
if (!cache->initialized.load(std::memory_order_acquire)) {
|
|
797
831
|
cache->reader_mutex.lock();
|
|
@@ -815,7 +849,7 @@ void MemTable::ConstructFragmentedRangeTombstones() {
|
|
|
815
849
|
// There should be no concurrent Construction.
|
|
816
850
|
// We could also check fragmented_range_tombstone_list_ to avoid repeate
|
|
817
851
|
// constructions. We just construct them here again to be safe.
|
|
818
|
-
if (!is_range_del_table_empty_.
|
|
852
|
+
if (!is_range_del_table_empty_.LoadRelaxed()) {
|
|
819
853
|
// TODO: plumb Env::IOActivity, Env::IOPriority
|
|
820
854
|
auto* unfragmented_iter = new MemTableIterator(
|
|
821
855
|
MemTableIterator::kRangeDelEntries, *this, ReadOptions());
|
|
@@ -838,7 +872,7 @@ ReadOnlyMemTable::MemTableStats MemTable::ApproximateStats(
|
|
|
838
872
|
if (entry_count == 0) {
|
|
839
873
|
return {0, 0};
|
|
840
874
|
}
|
|
841
|
-
uint64_t n = num_entries_.
|
|
875
|
+
uint64_t n = num_entries_.LoadRelaxed();
|
|
842
876
|
if (n == 0) {
|
|
843
877
|
return {0, 0};
|
|
844
878
|
}
|
|
@@ -848,7 +882,7 @@ ReadOnlyMemTable::MemTableStats MemTable::ApproximateStats(
|
|
|
848
882
|
// the inaccuracy.
|
|
849
883
|
entry_count = n;
|
|
850
884
|
}
|
|
851
|
-
uint64_t data_size = data_size_.
|
|
885
|
+
uint64_t data_size = data_size_.LoadRelaxed();
|
|
852
886
|
return {entry_count * (data_size / n), entry_count};
|
|
853
887
|
}
|
|
854
888
|
|
|
@@ -978,17 +1012,14 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
978
1012
|
|
|
979
1013
|
// this is a bit ugly, but is the way to avoid locked instructions
|
|
980
1014
|
// when incrementing an atomic
|
|
981
|
-
num_entries_.
|
|
982
|
-
|
|
983
|
-
data_size_.store(data_size_.load(std::memory_order_relaxed) + encoded_len,
|
|
984
|
-
std::memory_order_relaxed);
|
|
1015
|
+
num_entries_.StoreRelaxed(num_entries_.LoadRelaxed() + 1);
|
|
1016
|
+
data_size_.StoreRelaxed(data_size_.LoadRelaxed() + encoded_len);
|
|
985
1017
|
if (type == kTypeDeletion || type == kTypeSingleDeletion ||
|
|
986
1018
|
type == kTypeDeletionWithTimestamp) {
|
|
987
|
-
num_deletes_.
|
|
988
|
-
std::memory_order_relaxed);
|
|
1019
|
+
num_deletes_.StoreRelaxed(num_deletes_.LoadRelaxed() + 1);
|
|
989
1020
|
} else if (type == kTypeRangeDeletion) {
|
|
990
|
-
uint64_t val = num_range_deletes_.
|
|
991
|
-
num_range_deletes_.
|
|
1021
|
+
uint64_t val = num_range_deletes_.LoadRelaxed() + 1;
|
|
1022
|
+
num_range_deletes_.StoreRelaxed(val);
|
|
992
1023
|
}
|
|
993
1024
|
|
|
994
1025
|
if (bloom_filter_ && prefix_extractor_ &&
|
|
@@ -1059,6 +1090,16 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
1059
1090
|
range_del_mutex_.lock();
|
|
1060
1091
|
}
|
|
1061
1092
|
for (size_t i = 0; i < size; ++i) {
|
|
1093
|
+
#if defined(__cpp_lib_atomic_shared_ptr)
|
|
1094
|
+
std::atomic<std::shared_ptr<FragmentedRangeTombstoneListCache>>*
|
|
1095
|
+
local_cache_ref_ptr = cached_range_tombstone_.AccessAtCore(i);
|
|
1096
|
+
auto new_local_cache_ref = std::make_shared<
|
|
1097
|
+
const std::shared_ptr<FragmentedRangeTombstoneListCache>>(new_cache);
|
|
1098
|
+
std::shared_ptr<FragmentedRangeTombstoneListCache> aliased_ptr(
|
|
1099
|
+
new_local_cache_ref, new_cache.get());
|
|
1100
|
+
local_cache_ref_ptr->store(std::move(aliased_ptr),
|
|
1101
|
+
std::memory_order_relaxed);
|
|
1102
|
+
#else
|
|
1062
1103
|
std::shared_ptr<FragmentedRangeTombstoneListCache>* local_cache_ref_ptr =
|
|
1063
1104
|
cached_range_tombstone_.AccessAtCore(i);
|
|
1064
1105
|
auto new_local_cache_ref = std::make_shared<
|
|
@@ -1073,12 +1114,13 @@ Status MemTable::Add(SequenceNumber s, ValueType type,
|
|
|
1073
1114
|
std::shared_ptr<FragmentedRangeTombstoneListCache>(
|
|
1074
1115
|
new_local_cache_ref, new_cache.get()),
|
|
1075
1116
|
std::memory_order_relaxed);
|
|
1117
|
+
#endif
|
|
1076
1118
|
}
|
|
1077
1119
|
|
|
1078
1120
|
if (allow_concurrent) {
|
|
1079
1121
|
range_del_mutex_.unlock();
|
|
1080
1122
|
}
|
|
1081
|
-
is_range_del_table_empty_.
|
|
1123
|
+
is_range_del_table_empty_.StoreRelaxed(false);
|
|
1082
1124
|
}
|
|
1083
1125
|
UpdateOldestKeyTime();
|
|
1084
1126
|
|
|
@@ -1469,11 +1511,13 @@ void MemTable::GetFromTable(const LookupKey& key,
|
|
|
1469
1511
|
saver.allow_data_in_errors = moptions_.allow_data_in_errors;
|
|
1470
1512
|
saver.protection_bytes_per_key = moptions_.protection_bytes_per_key;
|
|
1471
1513
|
|
|
1472
|
-
if (!moptions_.paranoid_memory_checks
|
|
1514
|
+
if (!moptions_.paranoid_memory_checks &&
|
|
1515
|
+
!moptions_.memtable_veirfy_per_key_checksum_on_seek) {
|
|
1473
1516
|
table_->Get(key, &saver, SaveValue);
|
|
1474
1517
|
} else {
|
|
1475
|
-
Status check_s = table_->GetAndValidate(
|
|
1476
|
-
|
|
1518
|
+
Status check_s = table_->GetAndValidate(
|
|
1519
|
+
key, &saver, SaveValue, moptions_.allow_data_in_errors,
|
|
1520
|
+
moptions_.paranoid_memory_checks, key_validation_callback_);
|
|
1477
1521
|
if (check_s.IsCorruption()) {
|
|
1478
1522
|
*(saver.status) = check_s;
|
|
1479
1523
|
// Should stop searching the LSM.
|
|
@@ -1484,6 +1528,11 @@ void MemTable::GetFromTable(const LookupKey& key,
|
|
|
1484
1528
|
*seq = saver.seq;
|
|
1485
1529
|
}
|
|
1486
1530
|
|
|
1531
|
+
Status MemTable::ValidateKey(const char* key, bool allow_data_in_errors) {
|
|
1532
|
+
return VerifyEntryChecksum(key, moptions_.protection_bytes_per_key,
|
|
1533
|
+
allow_data_in_errors);
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1487
1536
|
void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
1488
1537
|
ReadCallback* callback, bool immutable_memtable) {
|
|
1489
1538
|
// The sequence number is updated synchronously in version_set.h
|
|
@@ -1497,7 +1546,7 @@ void MemTable::MultiGet(const ReadOptions& read_options, MultiGetRange* range,
|
|
|
1497
1546
|
// range tombstones. This is the simplest way to ensure range tombstones are
|
|
1498
1547
|
// handled. TODO: allow Bloom checks where max_covering_tombstone_seq==0
|
|
1499
1548
|
bool no_range_del = read_options.ignore_range_deletions ||
|
|
1500
|
-
is_range_del_table_empty_.
|
|
1549
|
+
is_range_del_table_empty_.LoadRelaxed();
|
|
1501
1550
|
MultiGetRange temp_range(*range, range->begin(), range->end());
|
|
1502
1551
|
if (bloom_filter_ && no_range_del) {
|
|
1503
1552
|
bool whole_key =
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
9
9
|
|
|
10
10
|
#pragma once
|
|
11
|
-
#include <atomic>
|
|
12
11
|
#include <deque>
|
|
13
12
|
#include <functional>
|
|
14
13
|
#include <memory>
|
|
@@ -65,6 +64,7 @@ struct ImmutableMemTableOptions {
|
|
|
65
64
|
uint32_t protection_bytes_per_key;
|
|
66
65
|
bool allow_data_in_errors;
|
|
67
66
|
bool paranoid_memory_checks;
|
|
67
|
+
bool memtable_veirfy_per_key_checksum_on_seek;
|
|
68
68
|
};
|
|
69
69
|
|
|
70
70
|
// Batched counters to updated when inserting keys in one write batch.
|
|
@@ -568,7 +568,7 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
568
568
|
// As a cheap version of `ApproximateMemoryUsage()`, this function doesn't
|
|
569
569
|
// require external synchronization. The value may be less accurate though
|
|
570
570
|
size_t ApproximateMemoryUsageFast() const {
|
|
571
|
-
return approximate_memory_usage_.
|
|
571
|
+
return approximate_memory_usage_.LoadRelaxed();
|
|
572
572
|
}
|
|
573
573
|
|
|
574
574
|
size_t MemoryAllocatedBytes() const override {
|
|
@@ -689,16 +689,13 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
689
689
|
// Used in concurrent memtable inserts.
|
|
690
690
|
void BatchPostProcess(const MemTablePostProcessInfo& update_counters) {
|
|
691
691
|
table_->BatchPostProcess();
|
|
692
|
-
num_entries_.
|
|
693
|
-
|
|
694
|
-
data_size_.fetch_add(update_counters.data_size, std::memory_order_relaxed);
|
|
692
|
+
num_entries_.FetchAddRelaxed(update_counters.num_entries);
|
|
693
|
+
data_size_.FetchAddRelaxed(update_counters.data_size);
|
|
695
694
|
if (update_counters.num_deletes != 0) {
|
|
696
|
-
num_deletes_.
|
|
697
|
-
std::memory_order_relaxed);
|
|
695
|
+
num_deletes_.FetchAddRelaxed(update_counters.num_deletes);
|
|
698
696
|
}
|
|
699
697
|
if (update_counters.num_range_deletes > 0) {
|
|
700
|
-
num_range_deletes_.
|
|
701
|
-
std::memory_order_relaxed);
|
|
698
|
+
num_range_deletes_.FetchAddRelaxed(update_counters.num_range_deletes);
|
|
702
699
|
// noop for skip-list memtable
|
|
703
700
|
// Besides correctness test in stress test, memtable flush record count
|
|
704
701
|
// check will catch this if it were not noop.
|
|
@@ -707,35 +704,26 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
707
704
|
UpdateFlushState();
|
|
708
705
|
}
|
|
709
706
|
|
|
710
|
-
uint64_t NumEntries() const override {
|
|
711
|
-
return num_entries_.load(std::memory_order_relaxed);
|
|
712
|
-
}
|
|
707
|
+
uint64_t NumEntries() const override { return num_entries_.LoadRelaxed(); }
|
|
713
708
|
|
|
714
|
-
uint64_t NumDeletion() const override {
|
|
715
|
-
return num_deletes_.load(std::memory_order_relaxed);
|
|
716
|
-
}
|
|
709
|
+
uint64_t NumDeletion() const override { return num_deletes_.LoadRelaxed(); }
|
|
717
710
|
|
|
718
711
|
uint64_t NumRangeDeletion() const override {
|
|
719
|
-
return num_range_deletes_.
|
|
712
|
+
return num_range_deletes_.LoadRelaxed();
|
|
720
713
|
}
|
|
721
714
|
|
|
722
|
-
uint64_t GetDataSize() const override {
|
|
723
|
-
return data_size_.load(std::memory_order_relaxed);
|
|
724
|
-
}
|
|
715
|
+
uint64_t GetDataSize() const override { return data_size_.LoadRelaxed(); }
|
|
725
716
|
|
|
726
|
-
size_t write_buffer_size() const {
|
|
727
|
-
return write_buffer_size_.load(std::memory_order_relaxed);
|
|
728
|
-
}
|
|
717
|
+
size_t write_buffer_size() const { return write_buffer_size_.LoadRelaxed(); }
|
|
729
718
|
|
|
730
719
|
// Dynamically change the memtable's capacity. If set below the current usage,
|
|
731
720
|
// the next key added will trigger a flush. Can only increase size when
|
|
732
721
|
// memtable prefix bloom is disabled, since we can't easily allocate more
|
|
733
|
-
// space.
|
|
722
|
+
// space. Non-atomic update ok because this is only called with DB mutex held.
|
|
734
723
|
void UpdateWriteBufferSize(size_t new_write_buffer_size) {
|
|
735
724
|
if (bloom_filter_ == nullptr ||
|
|
736
|
-
new_write_buffer_size < write_buffer_size_) {
|
|
737
|
-
write_buffer_size_.
|
|
738
|
-
std::memory_order_relaxed);
|
|
725
|
+
new_write_buffer_size < write_buffer_size_.LoadRelaxed()) {
|
|
726
|
+
write_buffer_size_.StoreRelaxed(new_write_buffer_size);
|
|
739
727
|
}
|
|
740
728
|
}
|
|
741
729
|
|
|
@@ -827,7 +815,7 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
827
815
|
|
|
828
816
|
bool IsFragmentedRangeTombstonesConstructed() const override {
|
|
829
817
|
return fragmented_range_tombstone_list_.get() != nullptr ||
|
|
830
|
-
is_range_del_table_empty_;
|
|
818
|
+
is_range_del_table_empty_.LoadRelaxed();
|
|
831
819
|
}
|
|
832
820
|
|
|
833
821
|
// Gets the newest user defined timestamps in the memtable. This should only
|
|
@@ -839,6 +827,9 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
839
827
|
uint32_t protection_bytes_per_key,
|
|
840
828
|
bool allow_data_in_errors = false);
|
|
841
829
|
|
|
830
|
+
// Validate the checksum of the key/value pair.
|
|
831
|
+
Status ValidateKey(const char* key, bool allow_data_in_errors);
|
|
832
|
+
|
|
842
833
|
private:
|
|
843
834
|
enum FlushStateEnum { FLUSH_NOT_REQUESTED, FLUSH_REQUESTED, FLUSH_SCHEDULED };
|
|
844
835
|
|
|
@@ -853,16 +844,22 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
853
844
|
ConcurrentArena arena_;
|
|
854
845
|
std::unique_ptr<MemTableRep> table_;
|
|
855
846
|
std::unique_ptr<MemTableRep> range_del_table_;
|
|
856
|
-
|
|
847
|
+
// This is OK to be relaxed access because consistency between table_ and
|
|
848
|
+
// range_del_table_ is provided by explicit multi-versioning with sequence
|
|
849
|
+
// numbers. It's ok for stale memory to say the range_del_table_ is empty when
|
|
850
|
+
// it's actually not because if it was relevant to our read (based on sequence
|
|
851
|
+
// number), the relaxed memory read would get a sufficiently updated value
|
|
852
|
+
// because of the ordering provided by LastPublishedSequence().
|
|
853
|
+
RelaxedAtomic<bool> is_range_del_table_empty_;
|
|
857
854
|
|
|
858
855
|
// Total data size of all data inserted
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
856
|
+
RelaxedAtomic<uint64_t> data_size_;
|
|
857
|
+
RelaxedAtomic<uint64_t> num_entries_;
|
|
858
|
+
RelaxedAtomic<uint64_t> num_deletes_;
|
|
859
|
+
RelaxedAtomic<uint64_t> num_range_deletes_;
|
|
863
860
|
|
|
864
861
|
// Dynamically changeable memtable option
|
|
865
|
-
|
|
862
|
+
RelaxedAtomic<size_t> write_buffer_size_;
|
|
866
863
|
|
|
867
864
|
// The sequence number of the kv that was inserted first
|
|
868
865
|
std::atomic<SequenceNumber> first_seqno_;
|
|
@@ -898,7 +895,7 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
898
895
|
|
|
899
896
|
// keep track of memory usage in table_, arena_, and range_del_table_.
|
|
900
897
|
// Gets refreshed inside `ApproximateMemoryUsage()` or `ShouldFlushNow`
|
|
901
|
-
|
|
898
|
+
RelaxedAtomic<uint64_t> approximate_memory_usage_;
|
|
902
899
|
|
|
903
900
|
// max range deletions in a memtable, before automatic flushing, 0 for
|
|
904
901
|
// unlimited.
|
|
@@ -949,14 +946,22 @@ class MemTable final : public ReadOnlyMemTable {
|
|
|
949
946
|
|
|
950
947
|
// makes sure there is a single range tombstone writer to invalidate cache
|
|
951
948
|
std::mutex range_del_mutex_;
|
|
949
|
+
#if defined(__cpp_lib_atomic_shared_ptr)
|
|
950
|
+
CoreLocalArray<
|
|
951
|
+
std::atomic<std::shared_ptr<FragmentedRangeTombstoneListCache>>>
|
|
952
|
+
cached_range_tombstone_;
|
|
953
|
+
#else
|
|
952
954
|
CoreLocalArray<std::shared_ptr<FragmentedRangeTombstoneListCache>>
|
|
953
955
|
cached_range_tombstone_;
|
|
954
956
|
|
|
957
|
+
#endif
|
|
955
958
|
void UpdateEntryChecksum(const ProtectionInfoKVOS64* kv_prot_info,
|
|
956
959
|
const Slice& key, const Slice& value, ValueType type,
|
|
957
960
|
SequenceNumber s, char* checksum_ptr);
|
|
958
961
|
|
|
959
962
|
void MaybeUpdateNewestUDT(const Slice& user_key);
|
|
963
|
+
|
|
964
|
+
const std::function<Status(const char*, bool)> key_validation_callback_;
|
|
960
965
|
};
|
|
961
966
|
|
|
962
967
|
const char* EncodeKey(std::string* scratch, const Slice& target);
|
|
@@ -497,6 +497,7 @@ Status MergeHelper::MergeUntil(InternalIterator* iter,
|
|
|
497
497
|
ikey.sequence <= latest_snapshot_
|
|
498
498
|
? CompactionFilter::Decision::kKeep
|
|
499
499
|
: FilterMerge(orig_ikey.user_key, value_slice);
|
|
500
|
+
// FIXME: should also check for kRemove here
|
|
500
501
|
if (filter != CompactionFilter::Decision::kRemoveAndSkipUntil &&
|
|
501
502
|
range_del_agg != nullptr &&
|
|
502
503
|
range_del_agg->ShouldDelete(
|
|
@@ -32,6 +32,7 @@ bool MergeOperator::FullMergeV3(const MergeOperationInputV3& merge_in,
|
|
|
32
32
|
MergeOperationOutputV3* merge_out) const {
|
|
33
33
|
assert(merge_out);
|
|
34
34
|
|
|
35
|
+
Slice value_of_default; // avoid warning about in_v2 pointing at this
|
|
35
36
|
MergeOperationInput in_v2(merge_in.key, nullptr, merge_in.operand_list,
|
|
36
37
|
merge_in.logger);
|
|
37
38
|
|
|
@@ -66,7 +67,6 @@ bool MergeOperator::FullMergeV3(const MergeOperationInputV3& merge_in,
|
|
|
66
67
|
const bool has_default_column =
|
|
67
68
|
WideColumnsHelper::HasDefaultColumn(existing_columns);
|
|
68
69
|
|
|
69
|
-
Slice value_of_default;
|
|
70
70
|
if (has_default_column) {
|
|
71
71
|
value_of_default = existing_columns.front().value();
|
|
72
72
|
}
|
|
@@ -10,24 +10,25 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
10
10
|
using MultiScanIterator = MultiScan::MultiScanIterator;
|
|
11
11
|
|
|
12
12
|
MultiScan::MultiScan(const ReadOptions& read_options,
|
|
13
|
-
const
|
|
13
|
+
const MultiScanArgs& scan_opts, DB* db,
|
|
14
14
|
ColumnFamilyHandle* cfh)
|
|
15
15
|
: read_options_(read_options), scan_opts_(scan_opts), db_(db), cfh_(cfh) {
|
|
16
16
|
bool slow_path = false;
|
|
17
17
|
// Setup read_options with iterate_uuper_bound based on the first scan.
|
|
18
18
|
// Subsequent scans will update and allocate a new DB iterator as necessary
|
|
19
|
-
if (scan_opts[0].range.limit) {
|
|
20
|
-
upper_bound_ = *scan_opts[0].range.limit;
|
|
19
|
+
if (scan_opts.GetScanRanges()[0].range.limit) {
|
|
20
|
+
upper_bound_ = *scan_opts.GetScanRanges()[0].range.limit;
|
|
21
21
|
read_options_.iterate_upper_bound = &upper_bound_;
|
|
22
22
|
} else {
|
|
23
23
|
read_options_.iterate_upper_bound = nullptr;
|
|
24
24
|
}
|
|
25
|
-
for (auto opts : scan_opts) {
|
|
25
|
+
for (const auto& opts : scan_opts.GetScanRanges()) {
|
|
26
26
|
// Check that all the ScanOptions either specify an upper bound or not. If
|
|
27
27
|
// its mixed we take the slow path which avoids calling Prepare: we have to
|
|
28
28
|
// reallocate the Iterator with updated read_options everytime we switch
|
|
29
29
|
// between upper bound or no upper bound, which complicates Prepare.
|
|
30
|
-
if (opts.range.limit.has_value() !=
|
|
30
|
+
if (opts.range.limit.has_value() !=
|
|
31
|
+
scan_opts.GetScanRanges()[0].range.limit.has_value()) {
|
|
31
32
|
slow_path = true;
|
|
32
33
|
break;
|
|
33
34
|
}
|
|
@@ -39,6 +40,11 @@ MultiScan::MultiScan(const ReadOptions& read_options,
|
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
MultiScanIterator& MultiScanIterator::operator++() {
|
|
43
|
+
status_ = db_iter_->status();
|
|
44
|
+
if (!status_.ok()) {
|
|
45
|
+
throw MultiScanException(status_);
|
|
46
|
+
}
|
|
47
|
+
|
|
42
48
|
if (idx_ >= scan_opts_.size()) {
|
|
43
49
|
throw std::logic_error("Index out of range");
|
|
44
50
|
}
|
|
@@ -396,7 +396,7 @@ const char* VersionEdit::DecodeNewFile4From(Slice* input) {
|
|
|
396
396
|
return "temperature field wrong size";
|
|
397
397
|
} else {
|
|
398
398
|
Temperature casted_field = static_cast<Temperature>(field[0]);
|
|
399
|
-
if (casted_field
|
|
399
|
+
if (casted_field < Temperature::kLastTemperature) {
|
|
400
400
|
f.temperature = casted_field;
|
|
401
401
|
}
|
|
402
402
|
}
|
|
@@ -110,7 +110,7 @@ constexpr uint64_t kUnknownOldestAncesterTime = 0;
|
|
|
110
110
|
constexpr uint64_t kUnknownNewestKeyTime = 0;
|
|
111
111
|
constexpr uint64_t kUnknownFileCreationTime = 0;
|
|
112
112
|
constexpr uint64_t kUnknownEpochNumber = 0;
|
|
113
|
-
// If `Options::
|
|
113
|
+
// If `Options::cf_allow_ingest_behind` is true, this epoch number
|
|
114
114
|
// will be dedicated to files ingested behind.
|
|
115
115
|
constexpr uint64_t kReservedEpochNumberForFileIngestedBehind = 1;
|
|
116
116
|
|