@nxtedition/rocksdb 13.5.9 → 13.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/BUCK +2 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
- package/deps/rocksdb/rocksdb/Makefile +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -5
- package/deps/rocksdb/rocksdb/db/c.cc +13 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +0 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +2 -3
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +5 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +11 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +10 -16
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +2 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +164 -0
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +74 -3
- package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +39 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -83
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -11
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +0 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -9
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +16 -54
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +0 -6
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +186 -0
- package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +3 -40
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -54
- package/deps/rocksdb/rocksdb/db/db_test.cc +0 -292
- package/deps/rocksdb/rocksdb/db/db_test2.cc +0 -1235
- package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -0
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +11 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +11 -11
- package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +70 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +15 -8
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +4 -0
- package/deps/rocksdb/rocksdb/env/composite_env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env.cc +4 -0
- package/deps/rocksdb/rocksdb/env/env_encryption.cc +38 -3
- package/deps/rocksdb/rocksdb/env/env_test.cc +36 -1
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -4
- package/deps/rocksdb/rocksdb/env/io_posix.cc +16 -0
- package/deps/rocksdb/rocksdb/env/io_posix.h +3 -0
- package/deps/rocksdb/rocksdb/env/mock_env.cc +5 -0
- package/deps/rocksdb/rocksdb/file/readahead_raf.cc +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +25 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +10 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +29 -28
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +26 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +9 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +142 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +2 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/options/options_helper.h +3 -0
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.cc +20 -0
- package/deps/rocksdb/rocksdb/port/win/io_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.cc +31 -34
- package/deps/rocksdb/rocksdb/table/block_based/block.h +2 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +43 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +367 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +69 -23
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +54 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +27 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +167 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +6 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +12 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_type.h +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -3
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +10 -7
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +244 -0
- package/deps/rocksdb/rocksdb/table/external_table.cc +1 -1
- package/deps/rocksdb/rocksdb/table/format.cc +51 -33
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +13 -8
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -3
- package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +5 -1
- package/deps/rocksdb/rocksdb/table/table_test.cc +629 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.cc +0 -1
- package/deps/rocksdb/rocksdb/test_util/testutil.h +5 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +183 -94
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +71 -0
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +37 -22
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +308 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +189 -0
- package/deps/rocksdb/rocksdb/util/cast_util.h +22 -11
- package/deps/rocksdb/rocksdb/util/coding.h +4 -3
- package/deps/rocksdb/rocksdb/util/compression.cc +2 -0
- package/deps/rocksdb/rocksdb/util/compression.h +16 -6
- package/deps/rocksdb/rocksdb/util/compression_test.cc +1679 -15
- package/deps/rocksdb/rocksdb/util/stop_watch.h +17 -7
- package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +17 -3
- package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +18 -2
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +22 -3
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +22 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +15 -4
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +61 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +3 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +3 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +9 -3
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +9 -0
- package/deps/rocksdb/rocksdb.gyp +15 -1
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +0 -131
- package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +0 -90
|
@@ -46,6 +46,7 @@
|
|
|
46
46
|
#include "rocksdb/table.h"
|
|
47
47
|
#include "rocksdb/table_properties.h"
|
|
48
48
|
#include "rocksdb/trace_record.h"
|
|
49
|
+
#include "rocksdb/user_defined_index.h"
|
|
49
50
|
#include "table/block_based/binary_search_index_reader.h"
|
|
50
51
|
#include "table/block_based/block.h"
|
|
51
52
|
#include "table/block_based/block_based_table_factory.h"
|
|
@@ -58,6 +59,7 @@
|
|
|
58
59
|
#include "table/block_based/hash_index_reader.h"
|
|
59
60
|
#include "table/block_based/partitioned_filter_block.h"
|
|
60
61
|
#include "table/block_based/partitioned_index_reader.h"
|
|
62
|
+
#include "table/block_based/user_defined_index_wrapper.h"
|
|
61
63
|
#include "table/block_fetcher.h"
|
|
62
64
|
#include "table/format.h"
|
|
63
65
|
#include "table/get_context.h"
|
|
@@ -104,7 +106,11 @@ CacheAllocationPtr CopyBufferToHeap(MemoryAllocator* allocator, Slice& buf) {
|
|
|
104
106
|
bool use_block_cache_for_lookup) const; \
|
|
105
107
|
template Status BlockBasedTable::LookupAndPinBlocksInCache<T>( \
|
|
106
108
|
const ReadOptions& ro, const BlockHandle& handle, \
|
|
107
|
-
CachableEntry<T>* out_parsed_block) const;
|
|
109
|
+
CachableEntry<T>* out_parsed_block) const; \
|
|
110
|
+
template Status BlockBasedTable::CreateAndPinBlockInCache<T>( \
|
|
111
|
+
const ReadOptions& ro, const BlockHandle& handle, \
|
|
112
|
+
BlockContents* block_contents, CachableEntry<T>* out_parsed_block) \
|
|
113
|
+
const;
|
|
108
114
|
|
|
109
115
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(ParsedFullFilterBlock);
|
|
110
116
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(DecompressorDict);
|
|
@@ -113,6 +119,7 @@ INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kIndex);
|
|
|
113
119
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kFilterPartitionIndex);
|
|
114
120
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kRangeDeletion);
|
|
115
121
|
INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kMetaIndex);
|
|
122
|
+
INSTANTIATE_BLOCKLIKE_TEMPLATES(Block_kUserDefinedIndex);
|
|
116
123
|
|
|
117
124
|
} // namespace ROCKSDB_NAMESPACE
|
|
118
125
|
|
|
@@ -1318,6 +1325,34 @@ Status BlockBasedTable::PrefetchIndexAndFilterBlocks(
|
|
|
1318
1325
|
if (!s.ok()) {
|
|
1319
1326
|
return s;
|
|
1320
1327
|
}
|
|
1328
|
+
if (table_options.user_defined_index_factory != nullptr) {
|
|
1329
|
+
std::string udi_name(table_options.user_defined_index_factory->Name());
|
|
1330
|
+
BlockHandle udi_block_handle;
|
|
1331
|
+
|
|
1332
|
+
// Should we use FindOptionalMetaBlock here?
|
|
1333
|
+
s = FindMetaBlock(meta_iter, kUserDefinedIndexPrefix + udi_name,
|
|
1334
|
+
&udi_block_handle);
|
|
1335
|
+
if (!s.ok()) {
|
|
1336
|
+
return s;
|
|
1337
|
+
}
|
|
1338
|
+
// Read the block, and allocate on heap or pin in cache. The UDI block is
|
|
1339
|
+
// not compressed. RetrieveBlock will verify the checksum.
|
|
1340
|
+
s = RetrieveBlock(prefetch_buffer, ro, udi_block_handle,
|
|
1341
|
+
rep_->decompressor.get(), &rep_->udi_block,
|
|
1342
|
+
/*get_context=*/nullptr, lookup_context,
|
|
1343
|
+
/*for_compaction=*/false, use_cache, /*async_read=*/false,
|
|
1344
|
+
/*use_block_cache_for_lookup=*/false);
|
|
1345
|
+
if (!s.ok()) {
|
|
1346
|
+
return s;
|
|
1347
|
+
}
|
|
1348
|
+
assert(!rep_->udi_block.IsEmpty());
|
|
1349
|
+
|
|
1350
|
+
std::unique_ptr<UserDefinedIndexReader> udi_reader =
|
|
1351
|
+
table_options.user_defined_index_factory->NewReader(
|
|
1352
|
+
rep_->udi_block.GetValue()->data);
|
|
1353
|
+
index_reader = std::make_unique<UserDefinedIndexReaderWrapper>(
|
|
1354
|
+
udi_name, std::move(index_reader), std::move(udi_reader));
|
|
1355
|
+
}
|
|
1321
1356
|
|
|
1322
1357
|
rep_->index_reader = std::move(index_reader);
|
|
1323
1358
|
|
|
@@ -1704,6 +1739,17 @@ Status BlockBasedTable::LookupAndPinBlocksInCache(
|
|
|
1704
1739
|
return s;
|
|
1705
1740
|
}
|
|
1706
1741
|
|
|
1742
|
+
template <typename TBlocklike>
|
|
1743
|
+
Status BlockBasedTable::CreateAndPinBlockInCache(
|
|
1744
|
+
const ReadOptions& ro, const BlockHandle& handle, BlockContents* contents,
|
|
1745
|
+
CachableEntry<TBlocklike>* out_parsed_block) const {
|
|
1746
|
+
return MaybeReadBlockAndLoadToCache(
|
|
1747
|
+
nullptr, ro, handle, rep_->decompressor.get(),
|
|
1748
|
+
/*for_compaction=*/false, out_parsed_block, nullptr, nullptr, contents,
|
|
1749
|
+
/*async_read=*/false,
|
|
1750
|
+
/*use_block_cache_for_lookup=*/true);
|
|
1751
|
+
}
|
|
1752
|
+
|
|
1707
1753
|
// If contents is nullptr, this function looks up the block caches for the
|
|
1708
1754
|
// data block referenced by handle, and read the block from disk if necessary.
|
|
1709
1755
|
// If contents is non-null, it skips the cache lookup and disk read, since
|
|
@@ -1765,8 +1811,7 @@ BlockBasedTable::MaybeReadBlockAndLoadToCache(
|
|
|
1765
1811
|
ro.fill_cache) {
|
|
1766
1812
|
Statistics* statistics = rep_->ioptions.stats;
|
|
1767
1813
|
const bool maybe_compressed =
|
|
1768
|
-
TBlocklike::kBlockType
|
|
1769
|
-
TBlocklike::kBlockType != BlockType::kCompressionDictionary &&
|
|
1814
|
+
BlockTypeMaybeCompressed(TBlocklike::kBlockType) &&
|
|
1770
1815
|
rep_->decompressor;
|
|
1771
1816
|
// This flag, if true, tells BlockFetcher to return the uncompressed
|
|
1772
1817
|
// block when ReadBlockContents() is called.
|
|
@@ -1910,6 +1955,7 @@ BlockBasedTable::SaveLookupContextOrTraceRecord(
|
|
|
1910
1955
|
trace_block_type = TraceType::kBlockTraceRangeDeletionBlock;
|
|
1911
1956
|
break;
|
|
1912
1957
|
case BlockType::kIndex:
|
|
1958
|
+
case BlockType::kUserDefinedIndex:
|
|
1913
1959
|
trace_block_type = TraceType::kBlockTraceIndexBlock;
|
|
1914
1960
|
break;
|
|
1915
1961
|
default:
|
|
@@ -2002,9 +2048,7 @@ WithBlocklikeCheck<Status, TBlocklike> BlockBasedTable::RetrieveBlock(
|
|
|
2002
2048
|
}
|
|
2003
2049
|
|
|
2004
2050
|
const bool maybe_compressed =
|
|
2005
|
-
TBlocklike::kBlockType
|
|
2006
|
-
TBlocklike::kBlockType != BlockType::kCompressionDictionary &&
|
|
2007
|
-
rep_->decompressor;
|
|
2051
|
+
BlockTypeMaybeCompressed(TBlocklike::kBlockType) && rep_->decompressor;
|
|
2008
2052
|
std::unique_ptr<TBlocklike> block;
|
|
2009
2053
|
|
|
2010
2054
|
{
|
|
@@ -2747,6 +2791,10 @@ BlockType BlockBasedTable::GetBlockTypeForMetaBlockByName(
|
|
|
2747
2791
|
return BlockType::kIndex;
|
|
2748
2792
|
}
|
|
2749
2793
|
|
|
2794
|
+
if (meta_block_name.starts_with(kUserDefinedIndexPrefix)) {
|
|
2795
|
+
return BlockType::kUserDefinedIndex;
|
|
2796
|
+
}
|
|
2797
|
+
|
|
2750
2798
|
if (meta_block_name.starts_with(kObsoleteFilterBlockPrefix)) {
|
|
2751
2799
|
// Obsolete but possible in old files
|
|
2752
2800
|
return BlockType::kInvalid;
|
|
@@ -228,11 +228,15 @@ class BlockBasedTable : public TableReader {
|
|
|
228
228
|
|
|
229
229
|
// Create an iterator for index access. If iter is null, then a new object
|
|
230
230
|
// is created on the heap, and the callee will have the ownership.
|
|
231
|
-
// If a non-null iter is passed in, it
|
|
232
|
-
// is either the same as iter or a new on-heap object
|
|
233
|
-
//
|
|
234
|
-
//
|
|
235
|
-
//
|
|
231
|
+
// If a non-null iter is passed in, it may be used, and the returned value
|
|
232
|
+
// is either the same as iter or a new on-heap object.
|
|
233
|
+
// In the latter case the return value points to a different object then
|
|
234
|
+
// iter, and the callee has the ownership of the returned object.
|
|
235
|
+
//
|
|
236
|
+
// Under all circumstances, the caller MUST use the returned iterator
|
|
237
|
+
// for further operations. If the returned iterator != iter, then the
|
|
238
|
+
// caller MUST ensure that iter stays in scope until the returned
|
|
239
|
+
// iterator is destroyed.
|
|
236
240
|
virtual InternalIteratorBase<IndexValue>* NewIterator(
|
|
237
241
|
const ReadOptions& read_options, bool disable_prefix_seek,
|
|
238
242
|
IndexBlockIter* iter, GetContext* get_context,
|
|
@@ -295,11 +299,21 @@ class BlockBasedTable : public TableReader {
|
|
|
295
299
|
Status GetKVPairsFromDataBlocks(const ReadOptions& read_options,
|
|
296
300
|
std::vector<KVPairBlock>* kv_pair_blocks);
|
|
297
301
|
|
|
302
|
+
// Look up the block cache for the specified block.
|
|
303
|
+
// out_parsed_block is set to nullptr if the block is not found in the cache.
|
|
298
304
|
template <typename TBlocklike>
|
|
299
305
|
Status LookupAndPinBlocksInCache(
|
|
300
306
|
const ReadOptions& ro, const BlockHandle& handle,
|
|
301
307
|
CachableEntry<TBlocklike>* out_parsed_block) const;
|
|
302
308
|
|
|
309
|
+
// Create the block given in `block_contents` and insert it into block cache.
|
|
310
|
+
// `out_parsed_block` points to the inserted block if successful.
|
|
311
|
+
template <typename TBlocklike>
|
|
312
|
+
Status CreateAndPinBlockInCache(
|
|
313
|
+
const ReadOptions& ro, const BlockHandle& handle,
|
|
314
|
+
BlockContents* block_contents,
|
|
315
|
+
CachableEntry<TBlocklike>* out_parsed_block) const;
|
|
316
|
+
|
|
303
317
|
struct Rep;
|
|
304
318
|
|
|
305
319
|
Rep* get_rep() { return rep_; }
|
|
@@ -544,6 +558,12 @@ class BlockBasedTable : public TableReader {
|
|
|
544
558
|
|
|
545
559
|
bool TimestampMayMatch(const ReadOptions& read_options) const;
|
|
546
560
|
|
|
561
|
+
bool BlockTypeMaybeCompressed(BlockType type) const {
|
|
562
|
+
return type != BlockType::kFilter &&
|
|
563
|
+
type != BlockType::kCompressionDictionary &&
|
|
564
|
+
type != BlockType::kUserDefinedIndex;
|
|
565
|
+
}
|
|
566
|
+
|
|
547
567
|
// A cumulative data block file read in MultiGet lower than this size will
|
|
548
568
|
// use a stack buffer
|
|
549
569
|
static constexpr size_t kMultiGetReadStackBufSize = 8192;
|
|
@@ -689,6 +709,8 @@ struct BlockBasedTable::Rep {
|
|
|
689
709
|
std::unique_ptr<CacheReservationManager::CacheReservationHandle>
|
|
690
710
|
table_reader_cache_res_handle = nullptr;
|
|
691
711
|
|
|
712
|
+
CachableEntry<Block_kUserDefinedIndex> udi_block;
|
|
713
|
+
|
|
692
714
|
SequenceNumber get_global_seqno(BlockType block_type) const {
|
|
693
715
|
return (block_type == BlockType::kFilterPartitionIndex ||
|
|
694
716
|
block_type == BlockType::kCompressionDictionary)
|
|
@@ -173,7 +173,7 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
|
|
|
173
173
|
0 /* _tail_size */, user_defined_timestamps_persisted);
|
|
174
174
|
|
|
175
175
|
std::unique_ptr<RandomAccessFileReader> file;
|
|
176
|
-
NewFileReader(table_name, foptions, &file);
|
|
176
|
+
NewFileReader(table_name, foptions, &file, ioptions.statistics.get());
|
|
177
177
|
|
|
178
178
|
uint64_t file_size = 0;
|
|
179
179
|
ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size));
|
|
@@ -222,12 +222,15 @@ class BlockBasedTableReaderBaseTest : public testing::Test {
|
|
|
222
222
|
}
|
|
223
223
|
|
|
224
224
|
void NewFileReader(const std::string& filename, const FileOptions& opt,
|
|
225
|
-
std::unique_ptr<RandomAccessFileReader>* reader
|
|
225
|
+
std::unique_ptr<RandomAccessFileReader>* reader,
|
|
226
|
+
Statistics* stats = nullptr) {
|
|
226
227
|
std::string path = Path(filename);
|
|
227
228
|
std::unique_ptr<FSRandomAccessFile> f;
|
|
228
229
|
ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr));
|
|
229
230
|
reader->reset(new RandomAccessFileReader(std::move(f), path,
|
|
230
|
-
env_->GetSystemClock().get()
|
|
231
|
+
env_->GetSystemClock().get(),
|
|
232
|
+
/*io_tracer=*/nullptr,
|
|
233
|
+
/*stats=*/stats));
|
|
231
234
|
}
|
|
232
235
|
};
|
|
233
236
|
|
|
@@ -990,6 +993,167 @@ TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) {
|
|
|
990
993
|
ASSERT_EQ(s.code(), Status::kCorruption);
|
|
991
994
|
}
|
|
992
995
|
|
|
996
|
+
TEST_P(BlockBasedTableReaderTest, MultiScanPrepare) {
|
|
997
|
+
Options options;
|
|
998
|
+
options.statistics = CreateDBStatistics();
|
|
999
|
+
ReadOptions read_opts;
|
|
1000
|
+
size_t ts_sz = options.comparator->timestamp_size();
|
|
1001
|
+
std::vector<std::pair<std::string, std::string>> kv =
|
|
1002
|
+
BlockBasedTableReaderBaseTest::GenerateKVMap(
|
|
1003
|
+
100 /* num_block */,
|
|
1004
|
+
true /* mixed_with_human_readable_string_value */, ts_sz);
|
|
1005
|
+
|
|
1006
|
+
std::string table_name = "BlockBasedTableReaderTest_NewIterator" +
|
|
1007
|
+
CompressionTypeToString(compression_type_);
|
|
1008
|
+
|
|
1009
|
+
ImmutableOptions ioptions(options);
|
|
1010
|
+
CreateTable(table_name, ioptions, compression_type_, kv,
|
|
1011
|
+
compression_parallel_threads_, compression_dict_bytes_);
|
|
1012
|
+
|
|
1013
|
+
std::unique_ptr<BlockBasedTable> table;
|
|
1014
|
+
FileOptions foptions;
|
|
1015
|
+
foptions.use_direct_reads = true;
|
|
1016
|
+
InternalKeyComparator comparator(options.comparator);
|
|
1017
|
+
NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
|
|
1018
|
+
true /* bool prefetch_index_and_filter_in_cache */,
|
|
1019
|
+
nullptr /* status */, persist_udt_);
|
|
1020
|
+
|
|
1021
|
+
std::unique_ptr<InternalIterator> iter;
|
|
1022
|
+
iter.reset(table->NewIterator(
|
|
1023
|
+
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
|
|
1024
|
+
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
|
|
1025
|
+
|
|
1026
|
+
// Should coalesce into a single I/O
|
|
1027
|
+
std::vector<ScanOptions> scan_options(
|
|
1028
|
+
{ScanOptions(ExtractUserKey(kv[0].first),
|
|
1029
|
+
ExtractUserKey(kv[kEntriesPerBlock].first)),
|
|
1030
|
+
ScanOptions(ExtractUserKey(kv[2 * kEntriesPerBlock].first),
|
|
1031
|
+
ExtractUserKey(kv[3 * kEntriesPerBlock].first))});
|
|
1032
|
+
|
|
1033
|
+
auto read_count_before =
|
|
1034
|
+
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
|
|
1035
|
+
iter->Prepare(&scan_options);
|
|
1036
|
+
auto read_count_after =
|
|
1037
|
+
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
|
|
1038
|
+
ASSERT_EQ(read_count_before + 1, read_count_after);
|
|
1039
|
+
iter->Seek(kv[0].first);
|
|
1040
|
+
for (size_t i = 0; i < kEntriesPerBlock + 1; ++i) {
|
|
1041
|
+
ASSERT_TRUE(iter->Valid());
|
|
1042
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1043
|
+
iter->Next();
|
|
1044
|
+
}
|
|
1045
|
+
// Iter may still be valid after scan range. Upper layer (DBIter) handles
|
|
1046
|
+
// exact upper bound checking. So we don't check !iter->Valid() here.
|
|
1047
|
+
ASSERT_OK(iter->status());
|
|
1048
|
+
iter->Seek(kv[2 * kEntriesPerBlock].first);
|
|
1049
|
+
for (size_t i = 2 * kEntriesPerBlock; i < 3 * kEntriesPerBlock; ++i) {
|
|
1050
|
+
ASSERT_TRUE(iter->Valid());
|
|
1051
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1052
|
+
iter->Next();
|
|
1053
|
+
}
|
|
1054
|
+
ASSERT_OK(iter->status());
|
|
1055
|
+
|
|
1056
|
+
iter.reset(table->NewIterator(
|
|
1057
|
+
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
|
|
1058
|
+
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
|
|
1059
|
+
// No IO coalesce, should do MultiRead with 2 read requests.
|
|
1060
|
+
scan_options = {ScanOptions(ExtractUserKey(kv[70 * kEntriesPerBlock].first),
|
|
1061
|
+
ExtractUserKey(kv[75 * kEntriesPerBlock].first)),
|
|
1062
|
+
ScanOptions(ExtractUserKey(kv[90 * kEntriesPerBlock].first),
|
|
1063
|
+
ExtractUserKey(kv[95 * kEntriesPerBlock].first))};
|
|
1064
|
+
read_count_before =
|
|
1065
|
+
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
|
|
1066
|
+
iter->Prepare(&scan_options);
|
|
1067
|
+
read_count_after =
|
|
1068
|
+
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
|
|
1069
|
+
ASSERT_EQ(read_count_before + 2, read_count_after);
|
|
1070
|
+
|
|
1071
|
+
iter->Seek(kv[70 * kEntriesPerBlock].first);
|
|
1072
|
+
for (size_t i = 70 * kEntriesPerBlock; i < 75 * kEntriesPerBlock; ++i) {
|
|
1073
|
+
ASSERT_TRUE(iter->Valid());
|
|
1074
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1075
|
+
iter->Next();
|
|
1076
|
+
}
|
|
1077
|
+
ASSERT_OK(iter->status());
|
|
1078
|
+
iter->Seek(kv[90 * kEntriesPerBlock].first);
|
|
1079
|
+
for (size_t i = 90 * kEntriesPerBlock; i < 95 * kEntriesPerBlock; ++i) {
|
|
1080
|
+
ASSERT_TRUE(iter->Valid());
|
|
1081
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1082
|
+
iter->Next();
|
|
1083
|
+
}
|
|
1084
|
+
ASSERT_OK(iter->status());
|
|
1085
|
+
|
|
1086
|
+
iter.reset(table->NewIterator(
|
|
1087
|
+
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
|
|
1088
|
+
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
|
|
1089
|
+
// Should do two I/Os since blocks 80-81 and 90-95 are already in block cache,
|
|
1090
|
+
// reads from blocks 50-79 and 82-.. are co
|
|
1091
|
+
scan_options = {ScanOptions(ExtractUserKey(kv[50 * kEntriesPerBlock].first))};
|
|
1092
|
+
read_count_before =
|
|
1093
|
+
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
|
|
1094
|
+
iter->Prepare(&scan_options);
|
|
1095
|
+
read_count_after =
|
|
1096
|
+
options.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
|
|
1097
|
+
ASSERT_EQ(read_count_before + 3, read_count_after);
|
|
1098
|
+
iter->Seek(kv[50 * kEntriesPerBlock].first);
|
|
1099
|
+
for (size_t i = 50 * kEntriesPerBlock; i < 100 * kEntriesPerBlock; ++i) {
|
|
1100
|
+
ASSERT_TRUE(iter->Valid());
|
|
1101
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1102
|
+
iter->Next();
|
|
1103
|
+
}
|
|
1104
|
+
ASSERT_FALSE(iter->Valid());
|
|
1105
|
+
ASSERT_OK(iter->status());
|
|
1106
|
+
|
|
1107
|
+
// Check cases when Seek key does not match start key in ScanOptions
|
|
1108
|
+
iter.reset(table->NewIterator(
|
|
1109
|
+
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
|
|
1110
|
+
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
|
|
1111
|
+
scan_options = {ScanOptions(ExtractUserKey(kv[10 * kEntriesPerBlock].first),
|
|
1112
|
+
ExtractUserKey(kv[20 * kEntriesPerBlock].first)),
|
|
1113
|
+
ScanOptions(ExtractUserKey(kv[30 * kEntriesPerBlock].first),
|
|
1114
|
+
ExtractUserKey(kv[40 * kEntriesPerBlock].first))};
|
|
1115
|
+
iter->Prepare(&scan_options);
|
|
1116
|
+
// Match start key
|
|
1117
|
+
iter->Seek(kv[10 * kEntriesPerBlock].first);
|
|
1118
|
+
for (size_t i = 10 * kEntriesPerBlock; i < 20 * kEntriesPerBlock; ++i) {
|
|
1119
|
+
ASSERT_TRUE(iter->Valid());
|
|
1120
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1121
|
+
iter->Next();
|
|
1122
|
+
}
|
|
1123
|
+
ASSERT_OK(iter->status());
|
|
1124
|
+
// Does not match start key of the second ScanOptions.
|
|
1125
|
+
iter->Seek(kv[50 * kEntriesPerBlock + 1].first);
|
|
1126
|
+
for (size_t i = 50 * kEntriesPerBlock + 1; i < 100 * kEntriesPerBlock; ++i) {
|
|
1127
|
+
ASSERT_TRUE(iter->Valid());
|
|
1128
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1129
|
+
iter->Next();
|
|
1130
|
+
}
|
|
1131
|
+
ASSERT_FALSE(iter->Valid());
|
|
1132
|
+
ASSERT_OK(iter->status());
|
|
1133
|
+
|
|
1134
|
+
iter.reset(table->NewIterator(
|
|
1135
|
+
read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
|
|
1136
|
+
/*skip_filters=*/false, TableReaderCaller::kUncategorized));
|
|
1137
|
+
scan_options = {ScanOptions(ExtractUserKey(kv[10 * kEntriesPerBlock].first)),
|
|
1138
|
+
ScanOptions(ExtractUserKey(kv[11 * kEntriesPerBlock].first))};
|
|
1139
|
+
iter->Prepare(&scan_options);
|
|
1140
|
+
// Does not match the first ScanOptions.
|
|
1141
|
+
iter->SeekToFirst();
|
|
1142
|
+
for (size_t i = 0; i < kEntriesPerBlock; ++i) {
|
|
1143
|
+
ASSERT_TRUE(iter->Valid());
|
|
1144
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1145
|
+
iter->Next();
|
|
1146
|
+
}
|
|
1147
|
+
ASSERT_OK(iter->status());
|
|
1148
|
+
iter->Seek(kv[10 * kEntriesPerBlock].first);
|
|
1149
|
+
for (size_t i = 10 * kEntriesPerBlock; i < 12 * kEntriesPerBlock; ++i) {
|
|
1150
|
+
ASSERT_TRUE(iter->Valid());
|
|
1151
|
+
ASSERT_EQ(iter->key().ToString(), kv[i].first);
|
|
1152
|
+
iter->Next();
|
|
1153
|
+
}
|
|
1154
|
+
ASSERT_OK(iter->status());
|
|
1155
|
+
}
|
|
1156
|
+
|
|
993
1157
|
// Param 1: compression type
|
|
994
1158
|
// Param 2: whether to use direct reads
|
|
995
1159
|
// Param 3: Block Based Table Index type, partitioned filters are also enabled
|
|
@@ -21,15 +21,19 @@
|
|
|
21
21
|
// An entry for a particular key-value pair has the form:
|
|
22
22
|
// shared_bytes: varint32
|
|
23
23
|
// unshared_bytes: varint32
|
|
24
|
-
// value_length: varint32
|
|
24
|
+
// value_length: varint32 (NOTE1)
|
|
25
25
|
// key_delta: char[unshared_bytes]
|
|
26
26
|
// value: char[value_length]
|
|
27
|
-
// shared_bytes == 0 for restart points.
|
|
27
|
+
// shared_bytes == 0 (explicitly stored) for restart points.
|
|
28
28
|
//
|
|
29
29
|
// The trailer of the block has the form:
|
|
30
30
|
// restarts: uint32[num_restarts]
|
|
31
31
|
// num_restarts: uint32
|
|
32
32
|
// restarts[i] contains the offset within the block of the ith restart point.
|
|
33
|
+
//
|
|
34
|
+
// NOTE1: omitted for format_version >= 4 index blocks, because the value is
|
|
35
|
+
// composed of one (shared_bytes > 0) or two (shared_bytes == 0) varints, whose
|
|
36
|
+
// length is self-describing.
|
|
33
37
|
|
|
34
38
|
#include "table/block_based/block_builder.h"
|
|
35
39
|
|
|
@@ -46,6 +46,12 @@ void BlockCreateContext::Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
|
|
|
46
46
|
protection_bytes_per_key);
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
void BlockCreateContext::Create(
|
|
50
|
+
std::unique_ptr<Block_kUserDefinedIndex>* parsed_out,
|
|
51
|
+
BlockContents&& block) {
|
|
52
|
+
parsed_out->reset(new Block_kUserDefinedIndex(std::move(block)));
|
|
53
|
+
}
|
|
54
|
+
|
|
49
55
|
void BlockCreateContext::Create(
|
|
50
56
|
std::unique_ptr<ParsedFullFilterBlock>* parsed_out, BlockContents&& block) {
|
|
51
57
|
parsed_out->reset(new ParsedFullFilterBlock(
|
|
@@ -67,6 +67,16 @@ class Block_kMetaIndex : public Block {
|
|
|
67
67
|
static constexpr BlockType kBlockType = BlockType::kMetaIndex;
|
|
68
68
|
};
|
|
69
69
|
|
|
70
|
+
class Block_kUserDefinedIndex : public BlockContents {
|
|
71
|
+
public:
|
|
72
|
+
static constexpr CacheEntryRole kCacheEntryRole = CacheEntryRole::kIndexBlock;
|
|
73
|
+
static constexpr BlockType kBlockType = BlockType::kUserDefinedIndex;
|
|
74
|
+
|
|
75
|
+
explicit Block_kUserDefinedIndex(BlockContents&& other)
|
|
76
|
+
: BlockContents(std::move(other)) {}
|
|
77
|
+
const Slice& ContentSlice() const { return data; }
|
|
78
|
+
};
|
|
79
|
+
|
|
70
80
|
struct BlockCreateContext : public Cache::CreateContext {
|
|
71
81
|
BlockCreateContext() {}
|
|
72
82
|
BlockCreateContext(const BlockBasedTableOptions* _table_options,
|
|
@@ -126,6 +136,8 @@ struct BlockCreateContext : public Cache::CreateContext {
|
|
|
126
136
|
BlockContents&& block);
|
|
127
137
|
void Create(std::unique_ptr<Block_kMetaIndex>* parsed_out,
|
|
128
138
|
BlockContents&& block);
|
|
139
|
+
void Create(std::unique_ptr<Block_kUserDefinedIndex>* parsed_out,
|
|
140
|
+
BlockContents&& block);
|
|
129
141
|
void Create(std::unique_ptr<ParsedFullFilterBlock>* parsed_out,
|
|
130
142
|
BlockContents&& block);
|
|
131
143
|
void Create(std::unique_ptr<DecompressorDict>* parsed_out,
|
|
@@ -1012,9 +1012,6 @@ class Standard128RibbonBitsBuilder : public XXPH3FilterBitsBuilder {
|
|
|
1012
1012
|
FastLocalBloomBitsBuilder bloom_fallback_;
|
|
1013
1013
|
};
|
|
1014
1014
|
|
|
1015
|
-
// for the linker, at least with DEBUG_LEVEL=2
|
|
1016
|
-
constexpr uint32_t Standard128RibbonBitsBuilder::kMaxRibbonEntries;
|
|
1017
|
-
|
|
1018
1015
|
class Standard128RibbonBitsReader : public BuiltinFilterBitsReader {
|
|
1019
1016
|
public:
|
|
1020
1017
|
Standard128RibbonBitsReader(const char* data, size_t len_bytes,
|
|
@@ -46,7 +46,7 @@ class IndexBuilder {
|
|
|
46
46
|
// primary index.
|
|
47
47
|
struct IndexBlocks {
|
|
48
48
|
Slice index_block_contents;
|
|
49
|
-
std::unordered_map<std::string, Slice
|
|
49
|
+
std::unordered_map<std::string, std::pair<BlockType, Slice>> meta_blocks;
|
|
50
50
|
};
|
|
51
51
|
IndexBuilder(const InternalKeyComparator* comparator, size_t ts_sz,
|
|
52
52
|
bool persist_user_defined_timestamps)
|
|
@@ -78,7 +78,8 @@ class IndexBuilder {
|
|
|
78
78
|
|
|
79
79
|
// This method will be called whenever a key is added. The subclasses may
|
|
80
80
|
// override OnKeyAdded() if they need to collect additional information.
|
|
81
|
-
virtual void OnKeyAdded(const Slice& /*key
|
|
81
|
+
virtual void OnKeyAdded(const Slice& /*key*/,
|
|
82
|
+
const std::optional<Slice>& /*value*/) {}
|
|
82
83
|
|
|
83
84
|
// Inform the index builder that all entries has been written. Block builder
|
|
84
85
|
// may therefore perform any operation required for block finalization.
|
|
@@ -180,7 +181,8 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
180
181
|
seperator_is_key_plus_seq_ = (format_version <= 2);
|
|
181
182
|
}
|
|
182
183
|
|
|
183
|
-
void OnKeyAdded(const Slice& key
|
|
184
|
+
void OnKeyAdded(const Slice& key,
|
|
185
|
+
const std::optional<Slice>& /*value*/) override {
|
|
184
186
|
if (include_first_key_ && current_block_first_internal_key_.empty()) {
|
|
185
187
|
current_block_first_internal_key_.assign(key.data(), key.size());
|
|
186
188
|
}
|
|
@@ -358,7 +360,8 @@ class HashIndexBuilder : public IndexBuilder {
|
|
|
358
360
|
separator_scratch);
|
|
359
361
|
}
|
|
360
362
|
|
|
361
|
-
void OnKeyAdded(const Slice& key
|
|
363
|
+
void OnKeyAdded(const Slice& key,
|
|
364
|
+
const std::optional<Slice>& /*value*/) override {
|
|
362
365
|
auto key_prefix = hash_key_extractor_->Transform(key);
|
|
363
366
|
bool is_first_entry = pending_block_num_ == 0;
|
|
364
367
|
|
|
@@ -393,9 +396,9 @@ class HashIndexBuilder : public IndexBuilder {
|
|
|
393
396
|
Status s = primary_index_builder_.Finish(index_blocks,
|
|
394
397
|
last_partition_block_handle);
|
|
395
398
|
index_blocks->meta_blocks.insert(
|
|
396
|
-
{kHashIndexPrefixesBlock.c_str(), prefix_block_});
|
|
397
|
-
index_blocks->meta_blocks.insert(
|
|
398
|
-
|
|
399
|
+
{kHashIndexPrefixesBlock.c_str(), {BlockType::kIndex, prefix_block_}});
|
|
400
|
+
index_blocks->meta_blocks.insert({kHashIndexPrefixesMetadataBlock.c_str(),
|
|
401
|
+
{BlockType::kIndex, prefix_meta_block_}});
|
|
399
402
|
return s;
|
|
400
403
|
}
|
|
401
404
|
|