@nxtedition/rocksdb 13.1.4 → 13.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +43 -16
- package/deps/rocksdb/rocksdb/{TARGETS → BUCK} +27 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +3 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/cache/cache.cc +3 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +2 -0
- package/deps/rocksdb/rocksdb/db/attribute_group_iterator_impl.h +34 -9
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +7 -6
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +5 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +22 -14
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/builder.cc +13 -24
- package/deps/rocksdb/rocksdb/db/coalescing_iterator.h +35 -10
- package/deps/rocksdb/rocksdb/db/column_family.cc +21 -10
- package/deps/rocksdb/rocksdb/db/column_family.h +15 -8
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +98 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +126 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +51 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +24 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +52 -22
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +9 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +36 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +6 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +30 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +26 -23
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +43 -33
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +19 -9
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.h +6 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +632 -411
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +171 -51
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +37 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +51 -11
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +10 -3
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +350 -154
- package/deps/rocksdb/rocksdb/db/convenience.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +62 -27
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +68 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +91 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +134 -70
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +71 -23
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +43 -16
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc +47 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +27 -19
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +38 -25
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +7 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +258 -42
- package/deps/rocksdb/rocksdb/db/db_io_failure_test.cc +161 -9
- package/deps/rocksdb/rocksdb/db/db_iter.cc +118 -86
- package/deps/rocksdb/rocksdb/db/db_iter.h +44 -17
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +27 -6
- package/deps/rocksdb/rocksdb/db/db_test.cc +48 -16
- package/deps/rocksdb/rocksdb/db/db_test2.cc +60 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +97 -44
- package/deps/rocksdb/rocksdb/db/db_test_util.h +7 -1
- package/deps/rocksdb/rocksdb/db/dbformat.cc +15 -5
- package/deps/rocksdb/rocksdb/db/dbformat.h +137 -55
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +54 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +663 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +152 -91
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +134 -11
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +55 -9
- package/deps/rocksdb/rocksdb/db/flush_job.cc +52 -29
- package/deps/rocksdb/rocksdb/db/flush_job.h +5 -3
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +18 -12
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +23 -29
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +3 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +2 -0
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +9 -6
- package/deps/rocksdb/rocksdb/db/internal_stats.h +54 -0
- package/deps/rocksdb/rocksdb/db/job_context.h +1 -1
- package/deps/rocksdb/rocksdb/db/log_reader.cc +6 -7
- package/deps/rocksdb/rocksdb/db/manifest_ops.cc +47 -0
- package/deps/rocksdb/rocksdb/db/manifest_ops.h +20 -0
- package/deps/rocksdb/rocksdb/db/memtable.cc +165 -64
- package/deps/rocksdb/rocksdb/db/memtable.h +422 -243
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +99 -68
- package/deps/rocksdb/rocksdb/db/memtable_list.h +63 -38
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +28 -25
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_impl.h +118 -60
- package/deps/rocksdb/rocksdb/db/multi_cf_iterator_test.cc +344 -89
- package/deps/rocksdb/rocksdb/db/range_tombstone_fragmenter.h +2 -3
- package/deps/rocksdb/rocksdb/db/repair.cc +15 -14
- package/deps/rocksdb/rocksdb/db/repair_test.cc +0 -13
- package/deps/rocksdb/rocksdb/db/snapshot_checker.h +7 -0
- package/deps/rocksdb/rocksdb/db/table_cache.cc +62 -65
- package/deps/rocksdb/rocksdb/db/table_cache.h +70 -76
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +5 -6
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +1 -1
- package/deps/rocksdb/rocksdb/db/transaction_log_impl.cc +8 -7
- package/deps/rocksdb/rocksdb/db/version_builder.cc +17 -19
- package/deps/rocksdb/rocksdb/db/version_builder.h +13 -12
- package/deps/rocksdb/rocksdb/db/version_edit.h +30 -0
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +3 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +89 -129
- package/deps/rocksdb/rocksdb/db/version_set.h +12 -4
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -2
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +12 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +0 -15
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +0 -2
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +9 -7
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.cc +0 -8
- package/deps/rocksdb/rocksdb/db/wide/wide_columns_helper.h +28 -2
- package/deps/rocksdb/rocksdb/db/write_batch.cc +32 -10
- package/deps/rocksdb/rocksdb/db/write_batch_internal.h +9 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_thread.cc +3 -1
- package/deps/rocksdb/rocksdb/db/write_thread.h +6 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc +15 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc +7 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +18 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +100 -22
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +15 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +34 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +223 -78
- package/deps/rocksdb/rocksdb/env/file_system.cc +6 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +53 -0
- package/deps/rocksdb/rocksdb/env/io_posix.cc +63 -17
- package/deps/rocksdb/rocksdb/env/io_posix.h +30 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +132 -48
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +92 -24
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +727 -109
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +3 -4
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/attribute_groups.h +20 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/configurable.h +9 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/convenience.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +10 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/experimental.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +34 -37
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator_base.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +56 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +36 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +11 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +1 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/options_type.h +84 -60
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/secondary_index.h +102 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h +89 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +32 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +30 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +23 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +2 -0
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +79 -21
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +41 -18
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +1 -5
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.cc +169 -0
- package/deps/rocksdb/rocksdb/memtable/wbwi_memtable.h +400 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc +2 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +137 -82
- package/deps/rocksdb/rocksdb/options/cf_options.h +18 -6
- package/deps/rocksdb/rocksdb/options/configurable.cc +31 -17
- package/deps/rocksdb/rocksdb/options/configurable_helper.h +7 -6
- package/deps/rocksdb/rocksdb/options/options_helper.cc +10 -8
- package/deps/rocksdb/rocksdb/options/options_parser.cc +74 -54
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +89 -0
- package/deps/rocksdb/rocksdb/options/options_test.cc +112 -26
- package/deps/rocksdb/rocksdb/port/port.h +5 -9
- package/deps/rocksdb/rocksdb/src.mk +8 -0
- package/deps/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/block_based/block.h +1 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +62 -80
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.h +13 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +16 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +38 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +12 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +4 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +204 -1
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +3 -3
- package/deps/rocksdb/rocksdb/table/block_fetcher_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/format.cc +3 -3
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +4 -1
- package/deps/rocksdb/rocksdb/table/mock_table.cc +0 -50
- package/deps/rocksdb/rocksdb/table/mock_table.h +53 -0
- package/deps/rocksdb/rocksdb/table/plain/plain_table_factory.h +4 -0
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +10 -5
- package/deps/rocksdb/rocksdb/table/table_builder.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_properties.cc +181 -0
- package/deps/rocksdb/rocksdb/table/table_reader_bench.cc +5 -5
- package/deps/rocksdb/rocksdb/table/table_test.cc +71 -64
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py +45 -45
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py +35 -35
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +43 -43
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +1 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +1 -1
- package/deps/rocksdb/rocksdb/unreleased_history/add.sh +13 -0
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +24 -5
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +7 -0
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.cc +0 -52
- package/deps/rocksdb/rocksdb/util/file_checksum_helper.h +1 -10
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_operation.h +1 -0
- package/deps/rocksdb/rocksdb/util/udt_util.cc +50 -4
- package/deps/rocksdb/rocksdb/util/udt_util.h +24 -11
- package/deps/rocksdb/rocksdb/util/udt_util_test.cc +26 -13
- package/deps/rocksdb/rocksdb/utilities/memory/memory_test.cc +1 -16
- package/deps/rocksdb/rocksdb/utilities/options/options_util_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.cc +214 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index.h +60 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/faiss_ivf_index_test.cc +124 -0
- package/deps/rocksdb/rocksdb/utilities/secondary_index/secondary_index_mixin.h +441 -0
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.cc +34 -3
- package/deps/rocksdb/rocksdb/utilities/table_properties_collectors/compact_for_tiering_collector.h +7 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +437 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +34 -11
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +14 -7
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc +7 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc +17 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.cc +69 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +20 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +1290 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +324 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc +18 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h +8 -1
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +57 -12
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +32 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h +33 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +721 -9
- package/deps/rocksdb/rocksdb.gyp +2 -0
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
|
@@ -178,11 +178,9 @@ std::string ParsedInternalKey::DebugString(bool log_err_key, bool hex,
|
|
|
178
178
|
result += "<redacted>";
|
|
179
179
|
}
|
|
180
180
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
static_cast<int>(type));
|
|
181
|
+
result += "' seq:" + std::to_string(sequence);
|
|
182
|
+
result += ", type:" + std::to_string(type);
|
|
184
183
|
|
|
185
|
-
result += buf;
|
|
186
184
|
return result;
|
|
187
185
|
}
|
|
188
186
|
|
|
@@ -272,11 +270,23 @@ LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
|
|
|
272
270
|
|
|
273
271
|
void IterKey::EnlargeBuffer(size_t key_size) {
|
|
274
272
|
// If size is smaller than buffer size, continue using current buffer,
|
|
275
|
-
// or the
|
|
273
|
+
// or the inline one, as default
|
|
276
274
|
assert(key_size > buf_size_);
|
|
277
275
|
// Need to enlarge the buffer.
|
|
278
276
|
ResetBuffer();
|
|
279
277
|
buf_ = new char[key_size];
|
|
280
278
|
buf_size_ = key_size;
|
|
281
279
|
}
|
|
280
|
+
|
|
281
|
+
void IterKey::EnlargeSecondaryBufferIfNeeded(size_t key_size) {
|
|
282
|
+
// If size is smaller than buffer size, continue using current buffer,
|
|
283
|
+
// or the inline one, as default
|
|
284
|
+
if (key_size <= secondary_buf_size_) {
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
// Need to enlarge the secondary buffer.
|
|
288
|
+
ResetSecondaryBuffer();
|
|
289
|
+
secondary_buf_ = new char[key_size];
|
|
290
|
+
secondary_buf_size_ = key_size;
|
|
291
|
+
}
|
|
282
292
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
#pragma once
|
|
11
11
|
#include <stdio.h>
|
|
12
12
|
|
|
13
|
+
#include <array>
|
|
13
14
|
#include <memory>
|
|
14
15
|
#include <optional>
|
|
15
16
|
#include <string>
|
|
@@ -562,18 +563,28 @@ inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
|
|
|
562
563
|
// allocation for smaller keys.
|
|
563
564
|
// 3. It tracks user key or internal key, and allow conversion between them.
|
|
564
565
|
class IterKey {
|
|
566
|
+
static constexpr size_t kInlineBufferSize = 39;
|
|
567
|
+
// This is only used by user-defined timestamps in MemTable only feature,
|
|
568
|
+
// which only supports uint64_t timestamps.
|
|
569
|
+
static constexpr char kTsMin[] = "\x00\x00\x00\x00\x00\x00\x00\x00";
|
|
570
|
+
|
|
565
571
|
public:
|
|
566
572
|
IterKey()
|
|
567
573
|
: buf_(space_),
|
|
568
574
|
key_(buf_),
|
|
569
575
|
key_size_(0),
|
|
570
|
-
buf_size_(
|
|
571
|
-
is_user_key_(true)
|
|
576
|
+
buf_size_(kInlineBufferSize),
|
|
577
|
+
is_user_key_(true),
|
|
578
|
+
secondary_buf_(space_for_secondary_buf_),
|
|
579
|
+
secondary_buf_size_(kInlineBufferSize) {}
|
|
572
580
|
// No copying allowed
|
|
573
581
|
IterKey(const IterKey&) = delete;
|
|
574
582
|
void operator=(const IterKey&) = delete;
|
|
575
583
|
|
|
576
|
-
~IterKey() {
|
|
584
|
+
~IterKey() {
|
|
585
|
+
ResetBuffer();
|
|
586
|
+
ResetSecondaryBuffer();
|
|
587
|
+
}
|
|
577
588
|
|
|
578
589
|
// The bool will be picked up by the next calls to SetKey
|
|
579
590
|
void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
|
|
@@ -641,13 +652,15 @@ class IterKey {
|
|
|
641
652
|
const char* non_shared_data,
|
|
642
653
|
const size_t non_shared_len,
|
|
643
654
|
const size_t ts_sz) {
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
655
|
+
// This function is only used by the UDT in memtable feature, which only
|
|
656
|
+
// support built in comparators with uint64 timestamps.
|
|
657
|
+
assert(ts_sz == sizeof(uint64_t));
|
|
658
|
+
size_t next_key_slice_index = 0;
|
|
647
659
|
if (IsUserKey()) {
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
660
|
+
key_slices_[next_key_slice_index++] = Slice(key_, shared_len);
|
|
661
|
+
key_slices_[next_key_slice_index++] =
|
|
662
|
+
Slice(non_shared_data, non_shared_len);
|
|
663
|
+
key_slices_[next_key_slice_index++] = Slice(kTsMin, ts_sz);
|
|
651
664
|
} else {
|
|
652
665
|
assert(shared_len + non_shared_len >= kNumInternalBytes);
|
|
653
666
|
// Invaraint: shared_user_key_len + shared_internal_bytes_len = shared_len
|
|
@@ -664,30 +677,46 @@ class IterKey {
|
|
|
664
677
|
|
|
665
678
|
// One Slice among the three Slices will get split into two Slices, plus
|
|
666
679
|
// a timestamp slice.
|
|
667
|
-
key_parts_with_ts.reserve(5);
|
|
668
680
|
bool ts_added = false;
|
|
669
681
|
// Add slice parts and find the right location to add the min timestamp.
|
|
670
682
|
MaybeAddKeyPartsWithTimestamp(
|
|
671
683
|
key_, shared_user_key_len,
|
|
672
684
|
shared_internal_bytes_len + non_shared_len < kNumInternalBytes,
|
|
673
|
-
shared_len + non_shared_len - kNumInternalBytes,
|
|
674
|
-
|
|
685
|
+
shared_len + non_shared_len - kNumInternalBytes, ts_sz,
|
|
686
|
+
&next_key_slice_index, &ts_added);
|
|
675
687
|
MaybeAddKeyPartsWithTimestamp(
|
|
676
688
|
key_ + user_key_len, shared_internal_bytes_len,
|
|
677
689
|
non_shared_len < kNumInternalBytes,
|
|
678
|
-
shared_internal_bytes_len + non_shared_len - kNumInternalBytes,
|
|
679
|
-
|
|
690
|
+
shared_internal_bytes_len + non_shared_len - kNumInternalBytes, ts_sz,
|
|
691
|
+
&next_key_slice_index, &ts_added);
|
|
680
692
|
MaybeAddKeyPartsWithTimestamp(non_shared_data, non_shared_len,
|
|
681
693
|
non_shared_len >= kNumInternalBytes,
|
|
682
|
-
non_shared_len - kNumInternalBytes,
|
|
683
|
-
|
|
694
|
+
non_shared_len - kNumInternalBytes, ts_sz,
|
|
695
|
+
&next_key_slice_index, &ts_added);
|
|
684
696
|
assert(ts_added);
|
|
685
697
|
}
|
|
698
|
+
SetKeyImpl(next_key_slice_index,
|
|
699
|
+
/* total_bytes= */ shared_len + non_shared_len + ts_sz);
|
|
700
|
+
}
|
|
686
701
|
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
702
|
+
Slice SetKeyWithPaddedMinTimestamp(const Slice& key, size_t ts_sz) {
|
|
703
|
+
// This function is only used by the UDT in memtable feature, which only
|
|
704
|
+
// support built in comparators with uint64 timestamps.
|
|
705
|
+
assert(ts_sz == sizeof(uint64_t));
|
|
706
|
+
size_t num_key_slices = 0;
|
|
707
|
+
if (is_user_key_) {
|
|
708
|
+
key_slices_[0] = key;
|
|
709
|
+
key_slices_[1] = Slice(kTsMin, ts_sz);
|
|
710
|
+
num_key_slices = 2;
|
|
711
|
+
} else {
|
|
712
|
+
assert(key.size() >= kNumInternalBytes);
|
|
713
|
+
size_t user_key_size = key.size() - kNumInternalBytes;
|
|
714
|
+
key_slices_[0] = Slice(key.data(), user_key_size);
|
|
715
|
+
key_slices_[1] = Slice(kTsMin, ts_sz);
|
|
716
|
+
key_slices_[2] = Slice(key.data() + user_key_size, kNumInternalBytes);
|
|
717
|
+
num_key_slices = 3;
|
|
718
|
+
}
|
|
719
|
+
return SetKeyImpl(num_key_slices, key.size() + ts_sz);
|
|
691
720
|
}
|
|
692
721
|
|
|
693
722
|
Slice SetKey(const Slice& key, bool copy = true) {
|
|
@@ -718,15 +747,6 @@ class IterKey {
|
|
|
718
747
|
return Slice(key_, key_n);
|
|
719
748
|
}
|
|
720
749
|
|
|
721
|
-
// Copy the key into IterKey own buf_
|
|
722
|
-
void OwnKey() {
|
|
723
|
-
assert(IsKeyPinned() == true);
|
|
724
|
-
|
|
725
|
-
Reserve(key_size_);
|
|
726
|
-
memcpy(buf_, key_, key_size_);
|
|
727
|
-
key_ = buf_;
|
|
728
|
-
}
|
|
729
|
-
|
|
730
750
|
// Update the sequence number in the internal key. Guarantees not to
|
|
731
751
|
// invalidate slices to the key (and the user key).
|
|
732
752
|
void UpdateInternalKey(uint64_t seq, ValueType t, const Slice* ts = nullptr) {
|
|
@@ -738,10 +758,15 @@ class IterKey {
|
|
|
738
758
|
ts->size());
|
|
739
759
|
}
|
|
740
760
|
uint64_t newval = (seq << 8) | t;
|
|
741
|
-
|
|
761
|
+
if (key_ == buf_) {
|
|
762
|
+
EncodeFixed64(&buf_[key_size_ - kNumInternalBytes], newval);
|
|
763
|
+
} else {
|
|
764
|
+
assert(key_ == secondary_buf_);
|
|
765
|
+
EncodeFixed64(&secondary_buf_[key_size_ - kNumInternalBytes], newval);
|
|
766
|
+
}
|
|
742
767
|
}
|
|
743
768
|
|
|
744
|
-
bool IsKeyPinned() const { return
|
|
769
|
+
bool IsKeyPinned() const { return key_ != buf_ && key_ != secondary_buf_; }
|
|
745
770
|
|
|
746
771
|
// If `ts` is provided, user_key should not contain timestamp,
|
|
747
772
|
// and `ts` is appended after user_key.
|
|
@@ -806,8 +831,24 @@ class IterKey {
|
|
|
806
831
|
const char* key_;
|
|
807
832
|
size_t key_size_;
|
|
808
833
|
size_t buf_size_;
|
|
809
|
-
char space_[
|
|
834
|
+
char space_[kInlineBufferSize]; // Avoid allocation for short keys
|
|
810
835
|
bool is_user_key_;
|
|
836
|
+
// Below variables are only used by user-defined timestamps in MemTable only
|
|
837
|
+
// feature for iterating keys in an index block or a data block.
|
|
838
|
+
//
|
|
839
|
+
// We will alternate between buf_ and secondary_buf_ to hold the key. key_
|
|
840
|
+
// will be modified in accordance to point to the right one. This is to avoid
|
|
841
|
+
// an extra copy when we need to copy some shared bytes from previous key
|
|
842
|
+
// (delta encoding), and we need to pad a min timestamp at the right location.
|
|
843
|
+
char space_for_secondary_buf_[kInlineBufferSize]; // Avoid allocation for
|
|
844
|
+
// short keys
|
|
845
|
+
char* secondary_buf_;
|
|
846
|
+
size_t secondary_buf_size_;
|
|
847
|
+
// Use to track the pieces that together make the whole key. We then copy
|
|
848
|
+
// these pieces in order either into buf_ or secondary_buf_ depending on where
|
|
849
|
+
// the previous key is held.
|
|
850
|
+
std::array<Slice, 5> key_slices_;
|
|
851
|
+
// End of variables used by user-defined timestamps in MemTable only feature.
|
|
811
852
|
|
|
812
853
|
Slice SetKeyImpl(const Slice& key, bool copy) {
|
|
813
854
|
size_t size = key.size();
|
|
@@ -824,18 +865,64 @@ class IterKey {
|
|
|
824
865
|
return Slice(key_, key_size_);
|
|
825
866
|
}
|
|
826
867
|
|
|
868
|
+
Slice SetKeyImpl(size_t num_key_slices, size_t total_bytes) {
|
|
869
|
+
assert(num_key_slices <= 5);
|
|
870
|
+
char* buf_start = nullptr;
|
|
871
|
+
if (key_ == buf_) {
|
|
872
|
+
// If the previous key is in buf_, we copy key_slices_ in order into
|
|
873
|
+
// secondary_buf_.
|
|
874
|
+
EnlargeSecondaryBufferIfNeeded(total_bytes);
|
|
875
|
+
buf_start = secondary_buf_;
|
|
876
|
+
key_ = secondary_buf_;
|
|
877
|
+
} else {
|
|
878
|
+
// Copy key_slices_ in order into buf_.
|
|
879
|
+
EnlargeBufferIfNeeded(total_bytes);
|
|
880
|
+
buf_start = buf_;
|
|
881
|
+
key_ = buf_;
|
|
882
|
+
}
|
|
883
|
+
#ifndef NDEBUG
|
|
884
|
+
size_t actual_total_bytes = 0;
|
|
885
|
+
#endif // NDEBUG
|
|
886
|
+
for (size_t i = 0; i < num_key_slices; i++) {
|
|
887
|
+
size_t key_slice_size = key_slices_[i].size();
|
|
888
|
+
memcpy(buf_start, key_slices_[i].data(), key_slice_size);
|
|
889
|
+
buf_start += key_slice_size;
|
|
890
|
+
#ifndef NDEBUG
|
|
891
|
+
actual_total_bytes += key_slice_size;
|
|
892
|
+
#endif // NDEBUG
|
|
893
|
+
}
|
|
894
|
+
#ifndef NDEBUG
|
|
895
|
+
assert(actual_total_bytes == total_bytes);
|
|
896
|
+
#endif // NDEBUG
|
|
897
|
+
key_size_ = total_bytes;
|
|
898
|
+
return Slice(key_, key_size_);
|
|
899
|
+
}
|
|
900
|
+
|
|
827
901
|
void ResetBuffer() {
|
|
902
|
+
if (key_ == buf_) {
|
|
903
|
+
key_size_ = 0;
|
|
904
|
+
}
|
|
828
905
|
if (buf_ != space_) {
|
|
829
906
|
delete[] buf_;
|
|
830
907
|
buf_ = space_;
|
|
831
908
|
}
|
|
832
|
-
buf_size_ =
|
|
833
|
-
|
|
909
|
+
buf_size_ = kInlineBufferSize;
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
void ResetSecondaryBuffer() {
|
|
913
|
+
if (key_ == secondary_buf_) {
|
|
914
|
+
key_size_ = 0;
|
|
915
|
+
}
|
|
916
|
+
if (secondary_buf_ != space_for_secondary_buf_) {
|
|
917
|
+
delete[] secondary_buf_;
|
|
918
|
+
secondary_buf_ = space_for_secondary_buf_;
|
|
919
|
+
}
|
|
920
|
+
secondary_buf_size_ = kInlineBufferSize;
|
|
834
921
|
}
|
|
835
922
|
|
|
836
923
|
// Enlarge the buffer size if needed based on key_size.
|
|
837
|
-
// By default,
|
|
838
|
-
// larger than the
|
|
924
|
+
// By default, inline buffer is used. Once there is a key
|
|
925
|
+
// larger than the inline buffer, another buffer is dynamically
|
|
839
926
|
// allocated, until a larger key buffer is requested. In that case, we
|
|
840
927
|
// reallocate buffer and delete the old one.
|
|
841
928
|
void EnlargeBufferIfNeeded(size_t key_size) {
|
|
@@ -846,23 +933,27 @@ class IterKey {
|
|
|
846
933
|
}
|
|
847
934
|
}
|
|
848
935
|
|
|
936
|
+
void EnlargeSecondaryBufferIfNeeded(size_t key_size);
|
|
937
|
+
|
|
849
938
|
void EnlargeBuffer(size_t key_size);
|
|
850
939
|
|
|
851
940
|
void MaybeAddKeyPartsWithTimestamp(const char* slice_data,
|
|
852
941
|
const size_t slice_sz, bool add_timestamp,
|
|
853
|
-
const size_t left_sz,
|
|
854
|
-
|
|
855
|
-
std::vector<Slice>& key_parts,
|
|
942
|
+
const size_t left_sz, const size_t ts_sz,
|
|
943
|
+
size_t* next_key_slice_idx,
|
|
856
944
|
bool* ts_added) {
|
|
945
|
+
assert(next_key_slice_idx);
|
|
857
946
|
if (add_timestamp && !*ts_added) {
|
|
858
947
|
assert(slice_sz >= left_sz);
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
948
|
+
key_slices_[(*next_key_slice_idx)++] = Slice(slice_data, left_sz);
|
|
949
|
+
key_slices_[(*next_key_slice_idx)++] = Slice(kTsMin, ts_sz);
|
|
950
|
+
key_slices_[(*next_key_slice_idx)++] =
|
|
951
|
+
Slice(slice_data + left_sz, slice_sz - left_sz);
|
|
862
952
|
*ts_added = true;
|
|
863
953
|
} else {
|
|
864
|
-
|
|
954
|
+
key_slices_[(*next_key_slice_idx)++] = Slice(slice_data, slice_sz);
|
|
865
955
|
}
|
|
956
|
+
assert(*next_key_slice_idx <= 5);
|
|
866
957
|
}
|
|
867
958
|
};
|
|
868
959
|
|
|
@@ -936,22 +1027,13 @@ struct RangeTombstone {
|
|
|
936
1027
|
// User-defined timestamp is enabled, `sk` and `ek` should be user key
|
|
937
1028
|
// with timestamp, `ts` will replace the timestamps in `sk` and
|
|
938
1029
|
// `ek`.
|
|
939
|
-
|
|
940
|
-
// will be replaced with min timestamp.
|
|
941
|
-
RangeTombstone(Slice sk, Slice ek, SequenceNumber sn, Slice ts,
|
|
942
|
-
bool logical_strip_timestamp)
|
|
943
|
-
: seq_(sn) {
|
|
1030
|
+
RangeTombstone(Slice sk, Slice ek, SequenceNumber sn, Slice ts) : seq_(sn) {
|
|
944
1031
|
const size_t ts_sz = ts.size();
|
|
945
1032
|
assert(ts_sz > 0);
|
|
946
1033
|
pinned_start_key_.reserve(sk.size());
|
|
947
1034
|
pinned_end_key_.reserve(ek.size());
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
AppendUserKeyWithMinTimestamp(&pinned_end_key_, ek, ts_sz);
|
|
951
|
-
} else {
|
|
952
|
-
AppendUserKeyWithDifferentTimestamp(&pinned_start_key_, sk, ts);
|
|
953
|
-
AppendUserKeyWithDifferentTimestamp(&pinned_end_key_, ek, ts);
|
|
954
|
-
}
|
|
1035
|
+
AppendUserKeyWithDifferentTimestamp(&pinned_start_key_, sk, ts);
|
|
1036
|
+
AppendUserKeyWithDifferentTimestamp(&pinned_end_key_, ek, ts);
|
|
955
1037
|
start_key_ = pinned_start_key_;
|
|
956
1038
|
end_key_ = pinned_end_key_;
|
|
957
1039
|
ts_ = Slice(pinned_start_key_.data() + sk.size() - ts_sz, ts_sz);
|
|
@@ -132,6 +132,7 @@ void EventHelpers::LogAndNotifyTableFileCreationFinished(
|
|
|
132
132
|
<< table_properties.compression_name << "compression_options"
|
|
133
133
|
<< table_properties.compression_options << "creation_time"
|
|
134
134
|
<< table_properties.creation_time << "oldest_key_time"
|
|
135
|
+
<< table_properties.newest_key_time << "newest_key_time"
|
|
135
136
|
<< table_properties.oldest_key_time << "file_creation_time"
|
|
136
137
|
<< table_properties.file_creation_time
|
|
137
138
|
<< "slow_compression_estimated_data_size"
|
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
#include <vector>
|
|
14
14
|
|
|
15
15
|
#include "db/db_impl/db_impl.h"
|
|
16
|
+
#include "db/manifest_ops.h"
|
|
17
|
+
#include "db/version_edit_handler.h"
|
|
16
18
|
#include "db/version_util.h"
|
|
17
19
|
#include "logging/logging.h"
|
|
18
20
|
#include "util/atomic.h"
|
|
@@ -40,6 +42,58 @@ Status SuggestCompactRange(DB* db, const Slice* begin, const Slice* end) {
|
|
|
40
42
|
return SuggestCompactRange(db, db->DefaultColumnFamily(), begin, end);
|
|
41
43
|
}
|
|
42
44
|
|
|
45
|
+
Status GetFileChecksumsFromCurrentManifest(FileSystem* fs,
|
|
46
|
+
const std::string& dbname,
|
|
47
|
+
FileChecksumList* checksum_list) {
|
|
48
|
+
std::string manifest_path;
|
|
49
|
+
uint64_t manifest_file_number;
|
|
50
|
+
Status s = GetCurrentManifestPath(dbname, fs, true /* is_retry */,
|
|
51
|
+
&manifest_path, &manifest_file_number);
|
|
52
|
+
if (!s.ok()) {
|
|
53
|
+
return s;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
if (checksum_list == nullptr) {
|
|
57
|
+
return Status::InvalidArgument("checksum_list is nullptr");
|
|
58
|
+
}
|
|
59
|
+
assert(checksum_list);
|
|
60
|
+
|
|
61
|
+
const ReadOptions read_options(Env::IOActivity::kReadManifest);
|
|
62
|
+
checksum_list->reset();
|
|
63
|
+
|
|
64
|
+
std::unique_ptr<SequentialFileReader> file_reader;
|
|
65
|
+
{
|
|
66
|
+
std::unique_ptr<FSSequentialFile> file;
|
|
67
|
+
s = fs->NewSequentialFile(manifest_path,
|
|
68
|
+
fs->OptimizeForManifestRead(FileOptions()), &file,
|
|
69
|
+
nullptr /* dbg */);
|
|
70
|
+
if (!s.ok()) {
|
|
71
|
+
return s;
|
|
72
|
+
}
|
|
73
|
+
file_reader.reset(new SequentialFileReader(std::move(file), manifest_path));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
struct LogReporter : public log::Reader::Reporter {
|
|
77
|
+
Status* status_ptr;
|
|
78
|
+
void Corruption(size_t /*bytes*/, const Status& st) override {
|
|
79
|
+
if (status_ptr->ok()) {
|
|
80
|
+
*status_ptr = st;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
} reporter;
|
|
84
|
+
reporter.status_ptr = &s;
|
|
85
|
+
log::Reader reader(nullptr, std::move(file_reader), &reporter,
|
|
86
|
+
true /* checksum */, 0 /* log_number */);
|
|
87
|
+
|
|
88
|
+
// Read all records from the manifest file...
|
|
89
|
+
uint64_t manifest_file_size = std::numeric_limits<uint64_t>::max();
|
|
90
|
+
FileChecksumRetriever retriever(read_options, manifest_file_size,
|
|
91
|
+
*checksum_list);
|
|
92
|
+
retriever.Iterate(reader, &s);
|
|
93
|
+
|
|
94
|
+
return retriever.status();
|
|
95
|
+
}
|
|
96
|
+
|
|
43
97
|
Status UpdateManifestForFilesState(
|
|
44
98
|
const DBOptions& db_opts, const std::string& db_name,
|
|
45
99
|
const std::vector<ColumnFamilyDescriptor>& column_families,
|