@nxtedition/rocksdb 15.1.2 → 15.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/binding.cc +79 -38
- package/build.sh +1 -2
- package/deps/rocksdb/rocksdb/BUCK +10 -8
- package/deps/rocksdb/rocksdb/CMakeLists.txt +27 -2
- package/deps/rocksdb/rocksdb/Makefile +27 -116
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +101 -124
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +47 -30
- package/deps/rocksdb/rocksdb/db/c.cc +793 -131
- package/deps/rocksdb/rocksdb/db/c_test.c +571 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +226 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +95 -59
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +45 -35
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +47 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +82 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +69 -24
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +9 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +65 -0
- package/deps/rocksdb/rocksdb/db/db_etc3_test.cc +161 -0
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +20 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +13 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +114 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +39 -25
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +361 -0
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +83 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +249 -4
- package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/listener_test.cc +7 -17
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +41 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +2 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +7 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +299 -90
- package/deps/rocksdb/rocksdb/db/version_set.h +56 -9
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +41 -39
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +7 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +48 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +16 -5
- package/deps/rocksdb/rocksdb/env/env_test.cc +126 -41
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +14 -7
- package/deps/rocksdb/rocksdb/env/io_posix.cc +304 -112
- package/deps/rocksdb/rocksdb/env/io_posix.h +16 -4
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/folly.mk +148 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +29 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +73 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +246 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +15 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +67 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +1 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +6 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +8 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +0 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +33 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +5 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +2 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +6 -6
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +10 -5
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +15 -3
- package/deps/rocksdb/rocksdb/options/cf_options.h +7 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +27 -36
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -2
- package/deps/rocksdb/rocksdb/options/options.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +8 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +19 -3
- package/deps/rocksdb/rocksdb/src.mk +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +155 -32
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +169 -125
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +22 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +43 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +9 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +17 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +13 -18
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +29 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +79 -19
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +48 -20
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +51 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +19 -0
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +3 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +48 -39
- package/deps/rocksdb/rocksdb/test_util/sync_point.cc +4 -0
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +32 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +3 -2
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +63 -12
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +16 -1
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +5 -1
- package/deps/rocksdb/rocksdb/util/bit_fields.h +133 -23
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +2 -5
- package/deps/rocksdb/rocksdb/util/compression.cc +51 -23
- package/deps/rocksdb/rocksdb/util/compression_test.cc +525 -270
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -4
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +11 -2
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -1
- package/deps/rocksdb/rocksdb/util/slice_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -2
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -2
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +19 -2
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +75 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +303 -111
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +379 -0
- package/deps/rocksdb/rocksdb.gyp +1 -0
- package/iterator.js +66 -70
- package/package.json +6 -6
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +0 -183
|
@@ -208,10 +208,12 @@ class BlockBasedTable : public TableReader {
|
|
|
208
208
|
size_t ApproximateMemoryUsage() const override;
|
|
209
209
|
|
|
210
210
|
// convert SST file to a human readable form
|
|
211
|
-
Status DumpTable(WritableFile* out_file
|
|
211
|
+
Status DumpTable(WritableFile* out_file,
|
|
212
|
+
bool show_sequence_number_type = false) override;
|
|
212
213
|
|
|
213
214
|
Status VerifyChecksum(const ReadOptions& readOptions,
|
|
214
|
-
TableReaderCaller caller
|
|
215
|
+
TableReaderCaller caller,
|
|
216
|
+
bool meta_blocks_only = false) override;
|
|
215
217
|
|
|
216
218
|
void MarkObsolete(uint32_t uncache_aggressiveness) override;
|
|
217
219
|
|
|
@@ -429,7 +431,7 @@ class BlockBasedTable : public TableReader {
|
|
|
429
431
|
// 3. We disallowed any io to be performed, that is, read_options ==
|
|
430
432
|
// kBlockCacheTier
|
|
431
433
|
InternalIteratorBase<IndexValue>* NewIndexIterator(
|
|
432
|
-
const ReadOptions& read_options, bool
|
|
434
|
+
const ReadOptions& read_options, bool disable_prefix_seek,
|
|
433
435
|
IndexBlockIter* input_iter, GetContext* get_context,
|
|
434
436
|
BlockCacheLookupContext* lookup_context) const;
|
|
435
437
|
|
|
@@ -548,9 +550,11 @@ class BlockBasedTable : public TableReader {
|
|
|
548
550
|
|
|
549
551
|
// Helper functions for DumpTable()
|
|
550
552
|
Status DumpIndexBlock(std::ostream& out_stream);
|
|
551
|
-
Status DumpDataBlocks(std::ostream& out_stream
|
|
553
|
+
Status DumpDataBlocks(std::ostream& out_stream,
|
|
554
|
+
bool show_sequence_number_type = false);
|
|
552
555
|
void DumpKeyValue(const Slice& key, const Slice& value,
|
|
553
|
-
std::ostream& out_stream
|
|
556
|
+
std::ostream& out_stream,
|
|
557
|
+
bool show_sequence_number_type = false);
|
|
554
558
|
|
|
555
559
|
// Returns false if prefix_extractor exists and is compatible with that used
|
|
556
560
|
// in building the table file, otherwise true.
|
|
@@ -1274,13 +1274,13 @@ TEST_P(BlockBasedTableReaderMultiScanAsyncIOTest, MultiScanPrepare) {
|
|
|
1274
1274
|
ExtractUserKey(kv[33 * kEntriesPerBlock].first));
|
|
1275
1275
|
iter->Prepare(&scan_options);
|
|
1276
1276
|
iter->Seek(kv[32 * kEntriesPerBlock].first);
|
|
1277
|
+
auto key = iter->key();
|
|
1277
1278
|
ASSERT_OK(iter->status());
|
|
1278
|
-
iter->Seek(kv[34 * kEntriesPerBlock].first);
|
|
1279
|
-
ASSERT_OK(iter->status());
|
|
1280
|
-
// Seek key could not going backward
|
|
1281
1279
|
iter->Seek(kv[30 * kEntriesPerBlock].first);
|
|
1282
|
-
|
|
1283
|
-
|
|
1280
|
+
// When seek key goes backward, it is adjusted to the last seeked position.
|
|
1281
|
+
// Assert the key read is same as before.
|
|
1282
|
+
ASSERT_EQ(key, iter->key());
|
|
1283
|
+
ASSERT_OK(iter->status());
|
|
1284
1284
|
|
|
1285
1285
|
// Test prefetch limit reached.
|
|
1286
1286
|
iter.reset(table->NewIterator(
|
|
@@ -1333,9 +1333,10 @@ TEST_P(BlockBasedTableReaderMultiScanAsyncIOTest, MultiScanPrepare) {
|
|
|
1333
1333
|
std::cout << random_seed << std::endl;
|
|
1334
1334
|
SCOPED_TRACE("Random seed " + std::to_string(random_seed));
|
|
1335
1335
|
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1336
|
+
// Search key always start from the start key of first prepared range.
|
|
1337
|
+
int last_read_key_index = rnd.Uniform(100) + 5 * kEntriesPerBlock;
|
|
1338
|
+
while (last_read_key_index < 100 * kEntriesPerBlock) {
|
|
1339
|
+
iter->Seek(kv[last_read_key_index].first);
|
|
1339
1340
|
EXPECT_OK(iter->status());
|
|
1340
1341
|
// iterate for a few keys
|
|
1341
1342
|
while (iter->Valid()) {
|
|
@@ -68,6 +68,18 @@ class FilterBlockBuilder {
|
|
|
68
68
|
// For reporting stats on how many entries the builder considered unique
|
|
69
69
|
virtual size_t EstimateEntriesAdded() = 0;
|
|
70
70
|
|
|
71
|
+
// Returns an estimate of the current filter size based on the builder's
|
|
72
|
+
// state. Implementations should cache the estimate and update it via
|
|
73
|
+
// UpdateFilterSizeEstimate() to avoid recalculating on every key add.
|
|
74
|
+
//
|
|
75
|
+
// Can be called at any time during table construction, even before calling
|
|
76
|
+
// Finish(). Used during table construction to determine when to cut files.
|
|
77
|
+
virtual size_t CurrentFilterSizeEstimate() = 0;
|
|
78
|
+
|
|
79
|
+
// Provides a hook for filter builder when a data block is finalized, such as
|
|
80
|
+
// to update cached filter size estimates.
|
|
81
|
+
virtual void OnDataBlockFinalized(uint64_t /* num_data_blocks */) {}
|
|
82
|
+
|
|
71
83
|
// When using AddWithPrevKey, this must be called before Finish(). (May also
|
|
72
84
|
// be called without AddWithPrevKey, but prev_key_without_ts must be
|
|
73
85
|
// accurate regardless.)
|
|
@@ -110,6 +122,11 @@ class FilterBlockBuilder {
|
|
|
110
122
|
return filter;
|
|
111
123
|
}
|
|
112
124
|
#endif // NDEBUG
|
|
125
|
+
|
|
126
|
+
protected:
|
|
127
|
+
// Update cached filter size estimate. Subclasses should override to update
|
|
128
|
+
// estimates based on their internal state.
|
|
129
|
+
virtual void UpdateFilterSizeEstimate(uint64_t /* num_data_blocks */) {}
|
|
113
130
|
};
|
|
114
131
|
|
|
115
132
|
// A FilterBlockReader is used to parse filter from SST table.
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
#include <limits>
|
|
18
18
|
#include <memory>
|
|
19
19
|
|
|
20
|
-
#include "cache/cache_entry_roles.h"
|
|
21
20
|
#include "cache/cache_reservation_manager.h"
|
|
22
21
|
#include "logging/logging.h"
|
|
23
22
|
#include "port/lang.h"
|
|
@@ -29,8 +28,8 @@
|
|
|
29
28
|
#include "table/block_based/block_based_table_reader.h"
|
|
30
29
|
#include "table/block_based/filter_policy_internal.h"
|
|
31
30
|
#include "table/block_based/full_filter_block.h"
|
|
31
|
+
#include "util/atomic.h"
|
|
32
32
|
#include "util/bloom_impl.h"
|
|
33
|
-
#include "util/coding.h"
|
|
34
33
|
#include "util/hash.h"
|
|
35
34
|
#include "util/math.h"
|
|
36
35
|
#include "util/ribbon_config.h"
|
|
@@ -61,7 +60,7 @@ Slice FinishAlwaysTrue(std::unique_ptr<const char[]>* /*buf*/) {
|
|
|
61
60
|
|
|
62
61
|
// Base class for filter builders using the XXH3 preview hash,
|
|
63
62
|
// also known as Hash64 or GetSliceHash64.
|
|
64
|
-
class XXPH3FilterBitsBuilder : public
|
|
63
|
+
class XXPH3FilterBitsBuilder : public FilterBitsBuilder {
|
|
65
64
|
public:
|
|
66
65
|
explicit XXPH3FilterBitsBuilder(
|
|
67
66
|
std::atomic<int64_t>* aggregate_rounding_balance,
|
|
@@ -126,8 +125,11 @@ class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder {
|
|
|
126
125
|
}
|
|
127
126
|
}
|
|
128
127
|
|
|
128
|
+
// Returns an estimate of the number of entries added to the
|
|
129
|
+
// filter. This method is thread-safe and can be safely called
|
|
130
|
+
// from background threads during parallel compression.
|
|
129
131
|
size_t EstimateEntriesAdded() override {
|
|
130
|
-
return hash_entries_info_.
|
|
132
|
+
return hash_entries_info_.entries_count.LoadRelaxed();
|
|
131
133
|
}
|
|
132
134
|
|
|
133
135
|
Status MaybePostVerify(const Slice& filter_content) override;
|
|
@@ -147,6 +149,7 @@ class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder {
|
|
|
147
149
|
hash_entries_info_.xor_checksum ^= hash;
|
|
148
150
|
}
|
|
149
151
|
hash_entries_info_.entries.push_back(hash);
|
|
152
|
+
hash_entries_info_.entries_count.FetchAddRelaxed(1);
|
|
150
153
|
if (cache_res_mgr_ &&
|
|
151
154
|
// Traditional rounding to whole bucket size
|
|
152
155
|
((hash_entries_info_.entries.size() %
|
|
@@ -314,6 +317,10 @@ class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder {
|
|
|
314
317
|
// and has near-minimal peak memory use.
|
|
315
318
|
std::deque<uint64_t> entries;
|
|
316
319
|
|
|
320
|
+
// Tracks the number of entries added for thread-safe
|
|
321
|
+
// size estimation.
|
|
322
|
+
RelaxedAtomic<size_t> entries_count{0};
|
|
323
|
+
|
|
317
324
|
// If cache_res_mgr_ != nullptr,
|
|
318
325
|
// it manages cache charge for buckets of hash entries in (new) Bloom
|
|
319
326
|
// or Ribbon Filter construction.
|
|
@@ -332,6 +339,8 @@ class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder {
|
|
|
332
339
|
void Swap(HashEntriesInfo* other) {
|
|
333
340
|
assert(other != nullptr);
|
|
334
341
|
std::swap(entries, other->entries);
|
|
342
|
+
entries_count.StoreRelaxed(
|
|
343
|
+
other->entries_count.ExchangeRelaxed(entries_count.LoadRelaxed()));
|
|
335
344
|
std::swap(cache_res_bucket_handles, other->cache_res_bucket_handles);
|
|
336
345
|
std::swap(xor_checksum, other->xor_checksum);
|
|
337
346
|
std::swap(prev_alt_hash, other->prev_alt_hash);
|
|
@@ -339,6 +348,7 @@ class XXPH3FilterBitsBuilder : public BuiltinFilterBitsBuilder {
|
|
|
339
348
|
|
|
340
349
|
void Reset() {
|
|
341
350
|
entries.clear();
|
|
351
|
+
entries_count.StoreRelaxed(0);
|
|
342
352
|
cache_res_bucket_handles.clear();
|
|
343
353
|
xor_checksum = 0;
|
|
344
354
|
prev_alt_hash = {};
|
|
@@ -1066,7 +1076,7 @@ class Standard128RibbonBitsReader : public BuiltinFilterBitsReader {
|
|
|
1066
1076
|
|
|
1067
1077
|
using LegacyBloomImpl = LegacyLocalityBloomImpl</*ExtraRotates*/ false>;
|
|
1068
1078
|
|
|
1069
|
-
class LegacyBloomBitsBuilder : public
|
|
1079
|
+
class LegacyBloomBitsBuilder : public FilterBitsBuilder {
|
|
1070
1080
|
public:
|
|
1071
1081
|
explicit LegacyBloomBitsBuilder(const int bits_per_key, Logger* info_log);
|
|
1072
1082
|
|
|
@@ -90,6 +90,19 @@ class FilterBitsBuilder {
|
|
|
90
90
|
// <= the specified number of bytes. Callers (including RocksDB) should
|
|
91
91
|
// only use this result for optimizing performance and not as a guarantee.
|
|
92
92
|
virtual size_t ApproximateNumEntries(size_t bytes) = 0;
|
|
93
|
+
|
|
94
|
+
// Calculate number of bytes needed for a new filter, including
|
|
95
|
+
// metadata. Passing the result to ApproximateNumEntries should
|
|
96
|
+
// (ideally, usually) return >= the num_entry passed in.
|
|
97
|
+
// When optimize_filters_for_memory is enabled, this function
|
|
98
|
+
// is not authoritative but represents a target size that should
|
|
99
|
+
// be close to the average size.
|
|
100
|
+
virtual size_t CalculateSpace(size_t num_entries) = 0;
|
|
101
|
+
|
|
102
|
+
// Returns an estimate of the FP rate of the returned filter if
|
|
103
|
+
// `num_entries` keys are added and the filter returned by Finish
|
|
104
|
+
// is `bytes` bytes.
|
|
105
|
+
virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0;
|
|
93
106
|
};
|
|
94
107
|
|
|
95
108
|
// A class that checks if a key can be in filter
|
|
@@ -109,24 +122,6 @@ class FilterBitsReader {
|
|
|
109
122
|
}
|
|
110
123
|
};
|
|
111
124
|
|
|
112
|
-
// Exposes any extra information needed for testing built-in
|
|
113
|
-
// FilterBitsBuilders
|
|
114
|
-
class BuiltinFilterBitsBuilder : public FilterBitsBuilder {
|
|
115
|
-
public:
|
|
116
|
-
// Calculate number of bytes needed for a new filter, including
|
|
117
|
-
// metadata. Passing the result to ApproximateNumEntries should
|
|
118
|
-
// (ideally, usually) return >= the num_entry passed in.
|
|
119
|
-
// When optimize_filters_for_memory is enabled, this function
|
|
120
|
-
// is not authoritative but represents a target size that should
|
|
121
|
-
// be close to the average size.
|
|
122
|
-
virtual size_t CalculateSpace(size_t num_entries) = 0;
|
|
123
|
-
|
|
124
|
-
// Returns an estimate of the FP rate of the returned filter if
|
|
125
|
-
// `num_entries` keys are added and the filter returned by Finish
|
|
126
|
-
// is `bytes` bytes.
|
|
127
|
-
virtual double EstimatedFpRate(size_t num_entries, size_t bytes) = 0;
|
|
128
|
-
};
|
|
129
|
-
|
|
130
125
|
// Base class for RocksDB built-in filter reader with
|
|
131
126
|
// extra useful functionalities for inernal.
|
|
132
127
|
class BuiltinFilterBitsReader : public FilterBitsReader {
|
|
@@ -30,6 +30,35 @@ size_t FullFilterBlockBuilder::EstimateEntriesAdded() {
|
|
|
30
30
|
return filter_bits_builder_->EstimateEntriesAdded();
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
+
void FullFilterBlockBuilder::OnDataBlockFinalized(uint64_t num_data_blocks) {
|
|
34
|
+
UpdateFilterSizeEstimate(num_data_blocks);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
size_t FullFilterBlockBuilder::CurrentFilterSizeEstimate() {
|
|
38
|
+
return estimated_filter_size_;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
void FullFilterBlockBuilder::UpdateFilterSizeEstimate(
|
|
42
|
+
uint64_t num_data_blocks) {
|
|
43
|
+
size_t entries_added = filter_bits_builder_->EstimateEntriesAdded();
|
|
44
|
+
|
|
45
|
+
if (entries_added == 0) {
|
|
46
|
+
estimated_filter_size_ = 0;
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
size_t filter_size = filter_bits_builder_->CalculateSpace(entries_added);
|
|
51
|
+
|
|
52
|
+
// Reserve filter space for next data block ~2x the average.
|
|
53
|
+
size_t buffer_size = 0;
|
|
54
|
+
if (num_data_blocks > 0) {
|
|
55
|
+
buffer_size = (filter_size / num_data_blocks) * 2;
|
|
56
|
+
estimated_filter_size_ = filter_size + buffer_size;
|
|
57
|
+
} else {
|
|
58
|
+
estimated_filter_size_ = filter_size;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
33
62
|
void FullFilterBlockBuilder::AddWithPrevKey(
|
|
34
63
|
const Slice& key_without_ts, const Slice& /*prev_key_without_ts*/) {
|
|
35
64
|
FullFilterBlockBuilder::Add(key_without_ts);
|
|
@@ -57,6 +57,8 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|
|
57
57
|
return filter_bits_builder_->EstimateEntriesAdded() == 0;
|
|
58
58
|
}
|
|
59
59
|
size_t EstimateEntriesAdded() override;
|
|
60
|
+
size_t CurrentFilterSizeEstimate() override;
|
|
61
|
+
void OnDataBlockFinalized(uint64_t num_data_blocks) override;
|
|
60
62
|
Status Finish(const BlockHandle& last_partition_block_handle, Slice* filter,
|
|
61
63
|
std::unique_ptr<const char[]>* filter_owner = nullptr) override;
|
|
62
64
|
using FilterBlockBuilder::Finish;
|
|
@@ -73,6 +75,8 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|
|
73
75
|
|
|
74
76
|
std::unique_ptr<FilterBitsBuilder> filter_bits_builder_;
|
|
75
77
|
|
|
78
|
+
void UpdateFilterSizeEstimate(uint64_t num_data_blocks_written) override;
|
|
79
|
+
|
|
76
80
|
private:
|
|
77
81
|
// important: all of these might point to invalid addresses
|
|
78
82
|
// at the time of destruction of this filter block. destructor
|
|
@@ -80,6 +84,8 @@ class FullFilterBlockBuilder : public FilterBlockBuilder {
|
|
|
80
84
|
const SliceTransform* const prefix_extractor_;
|
|
81
85
|
const bool whole_key_filtering_;
|
|
82
86
|
std::unique_ptr<const char[]> filter_data_;
|
|
87
|
+
|
|
88
|
+
size_t estimated_filter_size_ = 0;
|
|
83
89
|
};
|
|
84
90
|
|
|
85
91
|
// A FilterBlockReader is used to parse filter from SST table.
|
|
@@ -52,6 +52,13 @@ class TestFilterBitsBuilder : public FilterBitsBuilder {
|
|
|
52
52
|
|
|
53
53
|
size_t ApproximateNumEntries(size_t bytes) override { return bytes / 4; }
|
|
54
54
|
|
|
55
|
+
size_t CalculateSpace(size_t num_entries) override { return num_entries * 4; }
|
|
56
|
+
|
|
57
|
+
double EstimatedFpRate(size_t /* num_entries */,
|
|
58
|
+
size_t /* bytes */) override {
|
|
59
|
+
return 0.0;
|
|
60
|
+
}
|
|
61
|
+
|
|
55
62
|
private:
|
|
56
63
|
std::vector<uint32_t> hash_entries_;
|
|
57
64
|
};
|
|
@@ -229,6 +236,14 @@ class CountUniqueFilterBitsBuilderWrapper : public FilterBitsBuilder {
|
|
|
229
236
|
return b_->ApproximateNumEntries(bytes);
|
|
230
237
|
}
|
|
231
238
|
|
|
239
|
+
size_t CalculateSpace(size_t num_entries) override {
|
|
240
|
+
return b_->CalculateSpace(num_entries);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
double EstimatedFpRate(size_t num_entries, size_t bytes) override {
|
|
244
|
+
return b_->EstimatedFpRate(num_entries, bytes);
|
|
245
|
+
}
|
|
246
|
+
|
|
232
247
|
size_t CountUnique() { return uniq_.size(); }
|
|
233
248
|
};
|
|
234
249
|
|
|
@@ -117,20 +117,18 @@ Slice ShortenedIndexBuilder::FindShortInternalKeySuccessor(
|
|
|
117
117
|
}
|
|
118
118
|
}
|
|
119
119
|
|
|
120
|
-
|
|
120
|
+
void ShortenedIndexBuilder::UpdateIndexSizeEstimate() {
|
|
121
121
|
uint64_t current_size =
|
|
122
|
-
must_use_separator_with_seq_
|
|
122
|
+
must_use_separator_with_seq_.LoadRelaxed()
|
|
123
123
|
? index_block_builder_.CurrentSizeEstimate()
|
|
124
124
|
: index_block_builder_without_seq_.CurrentSizeEstimate();
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
126
|
+
uint64_t final_estimate = current_size;
|
|
127
|
+
if (num_index_entries_ > 0) {
|
|
128
|
+
// Add buffer to generously account (in most cases) for the next index entry
|
|
129
|
+
final_estimate += (2 * (current_size / num_index_entries_));
|
|
128
130
|
}
|
|
129
|
-
|
|
130
|
-
uint64_t avg_entry_size = current_size / num_index_entries_;
|
|
131
|
-
|
|
132
|
-
// Add buffer to generously account (in most cases) for the next index entry
|
|
133
|
-
return current_size + (2 * avg_entry_size);
|
|
131
|
+
estimated_index_size_.StoreRelaxed(final_estimate);
|
|
134
132
|
}
|
|
135
133
|
|
|
136
134
|
PartitionedIndexBuilder* PartitionedIndexBuilder::CreateIndexBuilder(
|
|
@@ -188,8 +186,8 @@ void PartitionedIndexBuilder::MakeNewSubIndexBuilder() {
|
|
|
188
186
|
// must_use_separator_with_seq_ is true (internal-key mode) (set to false by
|
|
189
187
|
// default on Creation) so that flush policy can point to
|
|
190
188
|
// sub_index_builder_->index_block_builder_
|
|
191
|
-
if (must_use_separator_with_seq_) {
|
|
192
|
-
sub_index_builder_->must_use_separator_with_seq_
|
|
189
|
+
if (must_use_separator_with_seq_.LoadRelaxed()) {
|
|
190
|
+
sub_index_builder_->must_use_separator_with_seq_.StoreRelaxed(true);
|
|
193
191
|
builder_to_monitor = &sub_index_builder_->index_block_builder_;
|
|
194
192
|
} else {
|
|
195
193
|
builder_to_monitor = &sub_index_builder_->index_block_builder_without_seq_;
|
|
@@ -237,6 +235,11 @@ void PartitionedIndexBuilder::MaybeFlush(const Slice& index_key,
|
|
|
237
235
|
index_key, EncodedBlockHandle(index_value).AsSlice()));
|
|
238
236
|
if (do_flush) {
|
|
239
237
|
assert(entries_.back().value.get() == sub_index_builder_);
|
|
238
|
+
|
|
239
|
+
// Update estimate of completed partitions when a partition is flushed
|
|
240
|
+
estimated_completed_partitions_size_.FetchAddRelaxed(
|
|
241
|
+
sub_index_builder_->CurrentIndexSizeEstimate());
|
|
242
|
+
|
|
240
243
|
cut_filter_block = true;
|
|
241
244
|
MakeNewSubIndexBuilder();
|
|
242
245
|
}
|
|
@@ -254,9 +257,15 @@ void PartitionedIndexBuilder::FinishIndexEntry(const BlockHandle& block_handle,
|
|
|
254
257
|
skip_delta_encoding);
|
|
255
258
|
std::swap(entries_.back().key, entry->separator_with_seq);
|
|
256
259
|
|
|
257
|
-
|
|
260
|
+
// Update cached size estimate when data blocks are finalized for more
|
|
261
|
+
// accurate tail size estimation. This is needed for parallel compression
|
|
262
|
+
// which uses FinishIndexEntry() instead of AddIndexEntry().
|
|
263
|
+
UpdateIndexSizeEstimate();
|
|
264
|
+
|
|
265
|
+
if (!must_use_separator_with_seq_.LoadRelaxed() &&
|
|
266
|
+
entry->must_use_separator_with_seq) {
|
|
258
267
|
// We need to apply !must_use_separator_with_seq to all sub-index builders
|
|
259
|
-
must_use_separator_with_seq_
|
|
268
|
+
must_use_separator_with_seq_.StoreRelaxed(true);
|
|
260
269
|
flush_policy_->Retarget(sub_index_builder_->index_block_builder_);
|
|
261
270
|
}
|
|
262
271
|
// NOTE: not compatible with coupled partitioned filters so don't need to
|
|
@@ -278,10 +287,15 @@ Slice PartitionedIndexBuilder::AddIndexEntry(
|
|
|
278
287
|
separator_scratch, skip_delta_encoding);
|
|
279
288
|
entries_.back().key.assign(sep.data(), sep.size());
|
|
280
289
|
|
|
281
|
-
|
|
282
|
-
|
|
290
|
+
// Update cached size estimate when data blocks are finalized for more
|
|
291
|
+
// accurate tail size estimation. This ensures the estimate reflects current
|
|
292
|
+
// state after each data block is added.
|
|
293
|
+
UpdateIndexSizeEstimate();
|
|
294
|
+
|
|
295
|
+
if (!must_use_separator_with_seq_.LoadRelaxed() &&
|
|
296
|
+
sub_index_builder_->must_use_separator_with_seq_.LoadRelaxed()) {
|
|
283
297
|
// We need to apply !must_use_separator_with_seq to all sub-index builders
|
|
284
|
-
must_use_separator_with_seq_
|
|
298
|
+
must_use_separator_with_seq_.StoreRelaxed(true);
|
|
285
299
|
flush_policy_->Retarget(sub_index_builder_->index_block_builder_);
|
|
286
300
|
}
|
|
287
301
|
if (UNLIKELY(first_key_in_next_block == nullptr)) {
|
|
@@ -315,7 +329,7 @@ Status PartitionedIndexBuilder::Finish(
|
|
|
315
329
|
const Slice handle_delta_encoding_slice(handle_delta_encoding);
|
|
316
330
|
index_block_builder_.Add(last_entry.key, handle_encoding.AsSlice(),
|
|
317
331
|
&handle_delta_encoding_slice);
|
|
318
|
-
if (!must_use_separator_with_seq_) {
|
|
332
|
+
if (!must_use_separator_with_seq_.LoadRelaxed()) {
|
|
319
333
|
index_block_builder_without_seq_.Add(ExtractUserKey(last_entry.key),
|
|
320
334
|
handle_encoding.AsSlice(),
|
|
321
335
|
&handle_delta_encoding_slice);
|
|
@@ -324,7 +338,7 @@ Status PartitionedIndexBuilder::Finish(
|
|
|
324
338
|
}
|
|
325
339
|
// If there is no sub_index left, then return the 2nd level index.
|
|
326
340
|
if (UNLIKELY(entries_.empty())) {
|
|
327
|
-
if (must_use_separator_with_seq_) {
|
|
341
|
+
if (must_use_separator_with_seq_.LoadRelaxed()) {
|
|
328
342
|
index_blocks->index_block_contents = index_block_builder_.Finish();
|
|
329
343
|
} else {
|
|
330
344
|
index_blocks->index_block_contents =
|
|
@@ -338,7 +352,8 @@ Status PartitionedIndexBuilder::Finish(
|
|
|
338
352
|
// expect more calls to Finish
|
|
339
353
|
Entry& entry = entries_.front();
|
|
340
354
|
// Apply the policy to all sub-indexes
|
|
341
|
-
entry.value->must_use_separator_with_seq_
|
|
355
|
+
entry.value->must_use_separator_with_seq_.StoreRelaxed(
|
|
356
|
+
must_use_separator_with_seq_.LoadRelaxed());
|
|
342
357
|
auto s = entry.value->Finish(index_blocks);
|
|
343
358
|
index_size_ += index_blocks->index_block_contents.size();
|
|
344
359
|
finishing_indexes_ = true;
|
|
@@ -347,4 +362,49 @@ Status PartitionedIndexBuilder::Finish(
|
|
|
347
362
|
}
|
|
348
363
|
|
|
349
364
|
size_t PartitionedIndexBuilder::NumPartitions() const { return partition_cnt_; }
|
|
365
|
+
|
|
366
|
+
void PartitionedIndexBuilder::UpdateIndexSizeEstimate() {
|
|
367
|
+
uint64_t total_size = 0;
|
|
368
|
+
|
|
369
|
+
// Ignore last entry which is a placeholder for the partition being built
|
|
370
|
+
size_t completed_partitions = entries_.size() > 0 ? entries_.size() - 1 : 0;
|
|
371
|
+
|
|
372
|
+
// Use running estimate of completed partitions instead of IndexSize() which
|
|
373
|
+
// is only available after calling Finish().
|
|
374
|
+
uint64_t completed_partitions_size =
|
|
375
|
+
estimated_completed_partitions_size_.LoadRelaxed();
|
|
376
|
+
total_size += completed_partitions_size;
|
|
377
|
+
|
|
378
|
+
// Add current active partition size if it exists
|
|
379
|
+
uint64_t current_sub_index_size = 0;
|
|
380
|
+
if (sub_index_builder_ != nullptr) {
|
|
381
|
+
current_sub_index_size = sub_index_builder_->CurrentIndexSizeEstimate();
|
|
382
|
+
total_size += current_sub_index_size;
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
// Add buffer for top-level index and next partition
|
|
386
|
+
uint64_t buffer_size = 0;
|
|
387
|
+
if (completed_partitions > 0) {
|
|
388
|
+
// Calculate top-level index size. Each top-level entry consists of:
|
|
389
|
+
// separator key (~20-50 bytes) + BlockHandle (~20 bytes) + overhead
|
|
390
|
+
// Estimate ~70 bytes per top-level entry as a reasonable average
|
|
391
|
+
auto estimated_top_level_size = completed_partitions * 70;
|
|
392
|
+
total_size += completed_partitions * 70;
|
|
393
|
+
|
|
394
|
+
// Buffer for next partition + next top-level entry
|
|
395
|
+
uint64_t avg_partition_size =
|
|
396
|
+
completed_partitions_size / completed_partitions;
|
|
397
|
+
uint64_t avg_top_level_entry_size =
|
|
398
|
+
estimated_top_level_size / completed_partitions;
|
|
399
|
+
|
|
400
|
+
buffer_size = 2 * (avg_partition_size + avg_top_level_entry_size);
|
|
401
|
+
total_size += buffer_size;
|
|
402
|
+
} else if (sub_index_builder_ != nullptr) {
|
|
403
|
+
// For the first partition, estimate using the current partition's state
|
|
404
|
+
buffer_size = 2 * current_sub_index_size;
|
|
405
|
+
total_size += buffer_size;
|
|
406
|
+
}
|
|
407
|
+
estimated_index_size_.StoreRelaxed(total_size);
|
|
408
|
+
}
|
|
409
|
+
|
|
350
410
|
} // namespace ROCKSDB_NAMESPACE
|
|
@@ -158,12 +158,15 @@ class IndexBuilder {
|
|
|
158
158
|
// Get the size for index block. Must be called after ::Finish.
|
|
159
159
|
virtual size_t IndexSize() const = 0;
|
|
160
160
|
|
|
161
|
-
//
|
|
162
|
-
//
|
|
161
|
+
// Returns an estimate of the current index size based on the builder's state.
|
|
162
|
+
// Implementations should cache the estimate and update it via
|
|
163
|
+
// UpdateIndexSizeEstimate() to avoid recalculating on every key add,
|
|
164
|
+
// which is critical for performance in the compaction hot path.
|
|
163
165
|
//
|
|
164
|
-
//
|
|
165
|
-
//
|
|
166
|
-
|
|
166
|
+
// This function is only called by the SST "emit thread" but must be
|
|
167
|
+
// thread safe with concurrent calls to UpdateIndexSizeEstimate() from another
|
|
168
|
+
// thread (such as during parallel compression).
|
|
169
|
+
virtual uint64_t CurrentIndexSizeEstimate() const = 0;
|
|
167
170
|
|
|
168
171
|
virtual bool separator_is_key_plus_seq() { return true; }
|
|
169
172
|
|
|
@@ -187,6 +190,13 @@ class IndexBuilder {
|
|
|
187
190
|
l_user_key, r_user_key) == 0;
|
|
188
191
|
}
|
|
189
192
|
|
|
193
|
+
// Updates the cached index size estimate used by CurrentIndexSizeEstimate().
|
|
194
|
+
//
|
|
195
|
+
// This function can be called from the SST "write thread" (via
|
|
196
|
+
// FinishIndexEntry()), and needs to be thread safe with
|
|
197
|
+
// CurrentIndexSizeEstimate() called from the SST "emit thread".
|
|
198
|
+
virtual void UpdateIndexSizeEstimate() {}
|
|
199
|
+
|
|
190
200
|
const InternalKeyComparator* comparator_;
|
|
191
201
|
// Size of user-defined timestamp in bytes.
|
|
192
202
|
size_t ts_sz_;
|
|
@@ -234,7 +244,7 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
234
244
|
include_first_key_(include_first_key),
|
|
235
245
|
shortening_mode_(shortening_mode) {
|
|
236
246
|
// Making the default true will disable the feature for old versions
|
|
237
|
-
must_use_separator_with_seq_
|
|
247
|
+
must_use_separator_with_seq_.StoreRelaxed(format_version <= 2);
|
|
238
248
|
}
|
|
239
249
|
|
|
240
250
|
void OnKeyAdded(const Slice& key,
|
|
@@ -257,10 +267,10 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
257
267
|
} else {
|
|
258
268
|
separator_with_seq = last_key_in_current_block;
|
|
259
269
|
}
|
|
260
|
-
if (!must_use_separator_with_seq_ &&
|
|
270
|
+
if (!must_use_separator_with_seq_.LoadRelaxed() &&
|
|
261
271
|
ShouldUseKeyPlusSeqAsSeparator(last_key_in_current_block,
|
|
262
272
|
*first_key_in_next_block)) {
|
|
263
|
-
must_use_separator_with_seq_
|
|
273
|
+
must_use_separator_with_seq_.StoreRelaxed(true);
|
|
264
274
|
}
|
|
265
275
|
} else {
|
|
266
276
|
if (shortening_mode_ == BlockBasedTableOptions::IndexShorteningMode::
|
|
@@ -333,6 +343,7 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
333
343
|
}
|
|
334
344
|
|
|
335
345
|
++num_index_entries_;
|
|
346
|
+
UpdateIndexSizeEstimate();
|
|
336
347
|
}
|
|
337
348
|
|
|
338
349
|
Slice AddIndexEntry(const Slice& last_key_in_current_block,
|
|
@@ -347,7 +358,8 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
347
358
|
Slice first_internal_key = GetFirstInternalKey(&first_internal_key_buf);
|
|
348
359
|
|
|
349
360
|
AddIndexEntryImpl(separator_with_seq, first_internal_key, block_handle,
|
|
350
|
-
must_use_separator_with_seq_,
|
|
361
|
+
must_use_separator_with_seq_.LoadRelaxed(),
|
|
362
|
+
skip_delta_encoding);
|
|
351
363
|
current_block_first_internal_key_.clear();
|
|
352
364
|
return separator_with_seq;
|
|
353
365
|
}
|
|
@@ -396,7 +408,7 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
396
408
|
&entry->separator_with_seq);
|
|
397
409
|
Slice first_internal_key = GetFirstInternalKey(&entry->first_internal_key);
|
|
398
410
|
entry->SaveFrom(separator, first_internal_key,
|
|
399
|
-
must_use_separator_with_seq_);
|
|
411
|
+
must_use_separator_with_seq_.LoadRelaxed());
|
|
400
412
|
current_block_first_internal_key_.clear();
|
|
401
413
|
}
|
|
402
414
|
|
|
@@ -413,7 +425,7 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
413
425
|
using IndexBuilder::Finish;
|
|
414
426
|
Status Finish(IndexBlocks* index_blocks,
|
|
415
427
|
const BlockHandle& /*last_partition_block_handle*/) override {
|
|
416
|
-
if (must_use_separator_with_seq_) {
|
|
428
|
+
if (must_use_separator_with_seq_.LoadRelaxed()) {
|
|
417
429
|
index_blocks->index_block_contents = index_block_builder_.Finish();
|
|
418
430
|
} else {
|
|
419
431
|
index_blocks->index_block_contents =
|
|
@@ -425,10 +437,15 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
425
437
|
|
|
426
438
|
size_t IndexSize() const override { return index_size_; }
|
|
427
439
|
|
|
428
|
-
uint64_t
|
|
440
|
+
uint64_t CurrentIndexSizeEstimate() const override {
|
|
441
|
+
return estimated_index_size_.LoadRelaxed();
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
// Updates the cached size estimate to minimize CPU usage in hot path
|
|
445
|
+
void UpdateIndexSizeEstimate() override;
|
|
429
446
|
|
|
430
447
|
bool separator_is_key_plus_seq() override {
|
|
431
|
-
return must_use_separator_with_seq_;
|
|
448
|
+
return must_use_separator_with_seq_.LoadRelaxed();
|
|
432
449
|
}
|
|
433
450
|
|
|
434
451
|
// Changes *key to a short string >= *key.
|
|
@@ -452,12 +469,14 @@ class ShortenedIndexBuilder : public IndexBuilder {
|
|
|
452
469
|
// before).
|
|
453
470
|
BlockBuilder index_block_builder_without_seq_;
|
|
454
471
|
const bool use_value_delta_encoding_;
|
|
455
|
-
bool must_use_separator_with_seq_;
|
|
472
|
+
RelaxedAtomic<bool> must_use_separator_with_seq_;
|
|
456
473
|
const bool include_first_key_;
|
|
457
474
|
BlockBasedTableOptions::IndexShorteningMode shortening_mode_;
|
|
458
475
|
BlockHandle last_encoded_handle_ = BlockHandle::NullBlockHandle();
|
|
459
476
|
std::string current_block_first_internal_key_;
|
|
460
477
|
uint64_t num_index_entries_ = 0;
|
|
478
|
+
// Cache for index size estimate to avoid recalculating in hot path
|
|
479
|
+
RelaxedAtomic<uint64_t> estimated_index_size_{0};
|
|
461
480
|
};
|
|
462
481
|
|
|
463
482
|
// HashIndexBuilder contains a binary-searchable primary index and the
|
|
@@ -579,8 +598,7 @@ class HashIndexBuilder : public IndexBuilder {
|
|
|
579
598
|
prefix_meta_block_.size();
|
|
580
599
|
}
|
|
581
600
|
|
|
582
|
-
|
|
583
|
-
uint64_t EstimateCurrentIndexSize() const override { return 0; }
|
|
601
|
+
uint64_t CurrentIndexSizeEstimate() const override { return 0; }
|
|
584
602
|
|
|
585
603
|
bool separator_is_key_plus_seq() override {
|
|
586
604
|
return primary_index_builder_.separator_is_key_plus_seq();
|
|
@@ -658,8 +676,11 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|
|
658
676
|
size_t TopLevelIndexSize(uint64_t) const { return top_level_index_size_; }
|
|
659
677
|
size_t NumPartitions() const;
|
|
660
678
|
|
|
661
|
-
//
|
|
662
|
-
|
|
679
|
+
// Returns a cached estimate of the current index size. This
|
|
680
|
+
// estimate is updated when data blocks are added.
|
|
681
|
+
uint64_t CurrentIndexSizeEstimate() const override {
|
|
682
|
+
return estimated_index_size_.LoadRelaxed();
|
|
683
|
+
}
|
|
663
684
|
|
|
664
685
|
inline bool ShouldCutFilterBlock() {
|
|
665
686
|
// Current policy is to align the partitions of index and filters
|
|
@@ -679,8 +700,10 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|
|
679
700
|
// cutting the next partition
|
|
680
701
|
void RequestPartitionCut();
|
|
681
702
|
|
|
703
|
+
// This function must be thread safe because multiple worker threads might
|
|
704
|
+
// update the index builder state during parallel compression.
|
|
682
705
|
bool separator_is_key_plus_seq() override {
|
|
683
|
-
return must_use_separator_with_seq_;
|
|
706
|
+
return must_use_separator_with_seq_.LoadRelaxed();
|
|
684
707
|
}
|
|
685
708
|
|
|
686
709
|
bool get_use_value_delta_encoding() const {
|
|
@@ -694,6 +717,7 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|
|
694
717
|
size_t partition_cnt_ = 0;
|
|
695
718
|
|
|
696
719
|
void MakeNewSubIndexBuilder();
|
|
720
|
+
void UpdateIndexSizeEstimate() override;
|
|
697
721
|
|
|
698
722
|
struct Entry {
|
|
699
723
|
std::string key;
|
|
@@ -713,7 +737,7 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|
|
713
737
|
// true if Finish is called once but not complete yet.
|
|
714
738
|
bool finishing_indexes_ = false;
|
|
715
739
|
const BlockBasedTableOptions& table_opt_;
|
|
716
|
-
bool must_use_separator_with_seq_;
|
|
740
|
+
RelaxedAtomic<bool> must_use_separator_with_seq_;
|
|
717
741
|
bool use_value_delta_encoding_;
|
|
718
742
|
// true if an external entity (such as filter partition builder) request
|
|
719
743
|
// cutting the next partition
|
|
@@ -721,5 +745,9 @@ class PartitionedIndexBuilder : public IndexBuilder {
|
|
|
721
745
|
// true if it should cut the next filter partition block
|
|
722
746
|
bool cut_filter_block = false;
|
|
723
747
|
BlockHandle last_encoded_handle_;
|
|
748
|
+
// Cached estimate of current index size, updated when data blocks are added
|
|
749
|
+
RelaxedAtomic<uint64_t> estimated_index_size_{0};
|
|
750
|
+
// Running estimate of completed partitions total size
|
|
751
|
+
RelaxedAtomic<uint64_t> estimated_completed_partitions_size_{0};
|
|
724
752
|
};
|
|
725
753
|
} // namespace ROCKSDB_NAMESPACE
|