@nxtedition/rocksdb 15.1.2 → 15.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +15 -0
- package/binding.cc +79 -38
- package/build.sh +1 -2
- package/deps/rocksdb/rocksdb/BUCK +10 -8
- package/deps/rocksdb/rocksdb/CMakeLists.txt +27 -2
- package/deps/rocksdb/rocksdb/Makefile +27 -116
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +101 -124
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +47 -30
- package/deps/rocksdb/rocksdb/db/c.cc +793 -131
- package/deps/rocksdb/rocksdb/db/c_test.c +571 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +226 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +95 -59
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +45 -35
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -6
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +8 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +47 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +82 -0
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +69 -24
- package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +9 -1
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +65 -0
- package/deps/rocksdb/rocksdb/db/db_etc3_test.cc +161 -0
- package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +20 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +13 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +114 -39
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +3 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +39 -25
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +361 -0
- package/deps/rocksdb/rocksdb/db/db_options_test.cc +35 -0
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +83 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +249 -4
- package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +3 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -7
- package/deps/rocksdb/rocksdb/db/listener_test.cc +7 -17
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
- package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +41 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +2 -2
- package/deps/rocksdb/rocksdb/db/version_edit.h +7 -4
- package/deps/rocksdb/rocksdb/db/version_set.cc +299 -90
- package/deps/rocksdb/rocksdb/db/version_set.h +56 -9
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +41 -39
- package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +7 -1
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +48 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +16 -5
- package/deps/rocksdb/rocksdb/env/env_test.cc +126 -41
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +14 -7
- package/deps/rocksdb/rocksdb/env/io_posix.cc +304 -112
- package/deps/rocksdb/rocksdb/env/io_posix.h +16 -4
- package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
- package/deps/rocksdb/rocksdb/folly.mk +148 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +29 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +73 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +246 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +15 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +67 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +1 -7
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +6 -14
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +8 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +0 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +33 -5
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +6 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +5 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +2 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +6 -6
- package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +2 -2
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +10 -5
- package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +2 -2
- package/deps/rocksdb/rocksdb/options/cf_options.cc +15 -3
- package/deps/rocksdb/rocksdb/options/cf_options.h +7 -0
- package/deps/rocksdb/rocksdb/options/db_options.cc +27 -36
- package/deps/rocksdb/rocksdb/options/db_options.h +3 -2
- package/deps/rocksdb/rocksdb/options/options.cc +4 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +8 -2
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +19 -3
- package/deps/rocksdb/rocksdb/src.mk +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +155 -32
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +169 -125
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +22 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +43 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +9 -5
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +9 -8
- package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +17 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +13 -18
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +29 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +6 -0
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +15 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +79 -19
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +48 -20
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +51 -0
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +19 -0
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/external_table.cc +2 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +3 -2
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +3 -1
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_reader.h +4 -2
- package/deps/rocksdb/rocksdb/table/table_test.cc +48 -39
- package/deps/rocksdb/rocksdb/test_util/sync_point.cc +4 -0
- package/deps/rocksdb/rocksdb/test_util/sync_point.h +32 -0
- package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -2
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -4
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +3 -2
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +63 -12
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +16 -1
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +5 -1
- package/deps/rocksdb/rocksdb/util/bit_fields.h +133 -23
- package/deps/rocksdb/rocksdb/util/bloom_test.cc +2 -5
- package/deps/rocksdb/rocksdb/util/compression.cc +51 -23
- package/deps/rocksdb/rocksdb/util/compression_test.cc +525 -270
- package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -4
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +11 -2
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -1
- package/deps/rocksdb/rocksdb/util/slice_test.cc +92 -0
- package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -2
- package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -2
- package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +19 -2
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +75 -0
- package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +303 -111
- package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +379 -0
- package/deps/rocksdb/rocksdb.gyp +1 -0
- package/iterator.js +66 -70
- package/package.json +6 -6
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +0 -183
|
@@ -110,6 +110,19 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
|
|
|
110
110
|
}
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
+
// A convenience function for populating the Compressor* fields; see ~Rep()
|
|
114
|
+
Compressor* MaybeCloneSpecialized(
|
|
115
|
+
Compressor* compressor, CacheEntryRole block_type,
|
|
116
|
+
Compressor::DictSampleArgs&& dict_samples = {}) {
|
|
117
|
+
auto specialized =
|
|
118
|
+
compressor->MaybeCloneSpecialized(block_type, std::move(dict_samples));
|
|
119
|
+
if (specialized) {
|
|
120
|
+
// Caller is responsible for freeing when distinct
|
|
121
|
+
return specialized.release();
|
|
122
|
+
} else {
|
|
123
|
+
return compressor;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
113
126
|
} // namespace
|
|
114
127
|
|
|
115
128
|
// kBlockBasedTableMagicNumber was picked by running
|
|
@@ -824,15 +837,17 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
824
837
|
|
|
825
838
|
// *** Compressors & decompressors - Yes, it seems like a lot here but ***
|
|
826
839
|
// *** these are distinct fields to minimize extra conditionals and ***
|
|
827
|
-
// *** field reads on hot code paths.
|
|
840
|
+
// *** field reads on hot code paths. And to avoid interlocked ***
|
|
841
|
+
// *** instructions associated with shared_ptr. ***
|
|
828
842
|
|
|
829
843
|
// A compressor for blocks in general, without dictionary compression
|
|
830
844
|
std::unique_ptr<Compressor> basic_compressor;
|
|
831
|
-
// A compressor
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
//
|
|
835
|
-
|
|
845
|
+
// A compressor for data blocks, which might be tuned differently and might
|
|
846
|
+
// use dictionary compression (when applicable). See ~Rep() for some details.
|
|
847
|
+
UnownedPtr<Compressor> data_block_compressor = nullptr;
|
|
848
|
+
// A compressor for index blocks, which might be tuned differently from
|
|
849
|
+
// basic_compressor. See ~Rep() for some details.
|
|
850
|
+
UnownedPtr<Compressor> index_block_compressor = nullptr;
|
|
836
851
|
// A decompressor corresponding to basic_compressor (when non-nullptr).
|
|
837
852
|
// Used for verification and cache warming.
|
|
838
853
|
std::shared_ptr<Decompressor> basic_decompressor;
|
|
@@ -853,7 +868,7 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
853
868
|
compression_types_used;
|
|
854
869
|
|
|
855
870
|
// Working area for basic_compressor when compression_parallel_threads==1
|
|
856
|
-
WorkingAreaPair
|
|
871
|
+
WorkingAreaPair index_block_working_area;
|
|
857
872
|
// Working area for data_block_compressor, for emit/compaction thread
|
|
858
873
|
WorkingAreaPair data_block_working_area;
|
|
859
874
|
|
|
@@ -894,6 +909,7 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
894
909
|
std::unique_ptr<FilterBlockBuilder> filter_builder;
|
|
895
910
|
OffsetableCacheKey base_cache_key;
|
|
896
911
|
const TableFileCreationReason reason;
|
|
912
|
+
const bool target_file_size_is_upper_bound;
|
|
897
913
|
|
|
898
914
|
BlockHandle pending_handle; // Handle to add to index block
|
|
899
915
|
|
|
@@ -1041,6 +1057,8 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
1041
1057
|
use_delta_encoding_for_index_values(table_opt.format_version >= 4 &&
|
|
1042
1058
|
!table_opt.block_align),
|
|
1043
1059
|
reason(tbo.reason),
|
|
1060
|
+
target_file_size_is_upper_bound(
|
|
1061
|
+
tbo.moptions.target_file_size_is_upper_bound),
|
|
1044
1062
|
flush_block_policy(
|
|
1045
1063
|
table_options.flush_block_policy_factory->NewFlushBlockPolicy(
|
|
1046
1064
|
table_options, data_block)),
|
|
@@ -1096,7 +1114,10 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
1096
1114
|
filter_context, tbo.compression_opts, tbo.compression_type);
|
|
1097
1115
|
if (basic_compressor) {
|
|
1098
1116
|
if (table_options.enable_index_compression) {
|
|
1099
|
-
|
|
1117
|
+
index_block_compressor = MaybeCloneSpecialized(
|
|
1118
|
+
basic_compressor.get(), CacheEntryRole::kIndexBlock);
|
|
1119
|
+
index_block_working_area.compress =
|
|
1120
|
+
index_block_compressor->ObtainWorkingArea();
|
|
1100
1121
|
}
|
|
1101
1122
|
max_dict_sample_bytes = basic_compressor->GetMaxSampleSizeIfWantDict(
|
|
1102
1123
|
CacheEntryRole::kDataBlock);
|
|
@@ -1111,8 +1132,10 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
1111
1132
|
tbo.compression_opts.max_dict_buffer_bytes);
|
|
1112
1133
|
}
|
|
1113
1134
|
} else {
|
|
1114
|
-
// No distinct data block compressor using dictionary
|
|
1115
|
-
|
|
1135
|
+
// No distinct data block compressor using dictionary, but
|
|
1136
|
+
// implementation might still want to specialize for data blocks
|
|
1137
|
+
data_block_compressor = MaybeCloneSpecialized(
|
|
1138
|
+
basic_compressor.get(), CacheEntryRole::kDataBlock);
|
|
1116
1139
|
data_block_working_area.compress =
|
|
1117
1140
|
data_block_compressor->ObtainWorkingArea();
|
|
1118
1141
|
}
|
|
@@ -1126,8 +1149,9 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
1126
1149
|
if (table_options.verify_compression) {
|
|
1127
1150
|
verify_decompressor = basic_decompressor.get();
|
|
1128
1151
|
if (table_options.enable_index_compression) {
|
|
1129
|
-
|
|
1130
|
-
|
|
1152
|
+
index_block_working_area.verify =
|
|
1153
|
+
verify_decompressor->ObtainWorkingArea(
|
|
1154
|
+
index_block_compressor->GetPreferredCompressionType());
|
|
1131
1155
|
}
|
|
1132
1156
|
if (state == State::kUnbuffered) {
|
|
1133
1157
|
assert(data_block_compressor);
|
|
@@ -1292,8 +1316,19 @@ struct BlockBasedTableBuilder::Rep {
|
|
|
1292
1316
|
}
|
|
1293
1317
|
|
|
1294
1318
|
~Rep() {
|
|
1319
|
+
// Delete working areas before their compressors.
|
|
1320
|
+
index_block_working_area = {};
|
|
1321
|
+
data_block_working_area = {};
|
|
1295
1322
|
// Must have been cleaned up by StopParallelCompression
|
|
1296
1323
|
assert(pc_rep == nullptr);
|
|
1324
|
+
// Delete specialized compressors if they were distinct (avoiding extra
|
|
1325
|
+
// fields and interlocked instructions with shared_ptr)
|
|
1326
|
+
if (data_block_compressor.get() != basic_compressor.get()) {
|
|
1327
|
+
delete data_block_compressor.get();
|
|
1328
|
+
}
|
|
1329
|
+
if (index_block_compressor.get() != basic_compressor.get()) {
|
|
1330
|
+
delete index_block_compressor.get();
|
|
1331
|
+
}
|
|
1297
1332
|
}
|
|
1298
1333
|
|
|
1299
1334
|
Rep(const Rep&) = delete;
|
|
@@ -1611,6 +1646,17 @@ void BlockBasedTableBuilder::Flush(const Slice* first_key_in_next_block) {
|
|
|
1611
1646
|
rep_->data_begin_offset += uncompressed_block_data.size();
|
|
1612
1647
|
MaybeEnterUnbuffered(first_key_in_next_block);
|
|
1613
1648
|
} else {
|
|
1649
|
+
// Increment num_data_blocks when a data block is finalized in the
|
|
1650
|
+
// emit thread to avoid data races with write worker threads
|
|
1651
|
+
++r->props.num_data_blocks;
|
|
1652
|
+
|
|
1653
|
+
// Notify filter builder that a data block has been finalized
|
|
1654
|
+
// This must happen on the emit thread before the block is added to the
|
|
1655
|
+
// ring buffer to avoid race conditions with worker threads
|
|
1656
|
+
if (r->filter_builder) {
|
|
1657
|
+
r->filter_builder->OnDataBlockFinalized(r->props.num_data_blocks);
|
|
1658
|
+
}
|
|
1659
|
+
|
|
1614
1660
|
if (r->IsParallelCompressionActive()) {
|
|
1615
1661
|
EmitBlockForParallel(r->data_block.MutableBuffer(), r->last_ikey,
|
|
1616
1662
|
first_key_in_next_block);
|
|
@@ -1715,9 +1761,11 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
|
|
|
1715
1761
|
assert(!r->IsParallelCompressionActive());
|
|
1716
1762
|
CompressionType type;
|
|
1717
1763
|
bool is_data_block = block_type == BlockType::kData;
|
|
1764
|
+
// NOTE: only index and data blocks are currently compressed
|
|
1765
|
+
assert(is_data_block || block_type == BlockType::kIndex);
|
|
1718
1766
|
Status compress_status = CompressAndVerifyBlock(
|
|
1719
1767
|
uncompressed_block_data, is_data_block,
|
|
1720
|
-
is_data_block ? r->data_block_working_area : r->
|
|
1768
|
+
is_data_block ? r->data_block_working_area : r->index_block_working_area,
|
|
1721
1769
|
&r->single_threaded_compressed_output, &type);
|
|
1722
1770
|
r->SetStatus(compress_status);
|
|
1723
1771
|
if (UNLIKELY(!ok())) {
|
|
@@ -1735,7 +1783,6 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
|
|
|
1735
1783
|
if (is_data_block) {
|
|
1736
1784
|
r->props.data_size = r->get_offset();
|
|
1737
1785
|
r->props.uncompressed_data_size += uncompressed_block_data.size();
|
|
1738
|
-
++r->props.num_data_blocks;
|
|
1739
1786
|
}
|
|
1740
1787
|
}
|
|
1741
1788
|
|
|
@@ -1784,7 +1831,6 @@ void BlockBasedTableBuilder::BGWorker(WorkingAreaPair& working_area) {
|
|
|
1784
1831
|
if (LIKELY(ios.ok())) {
|
|
1785
1832
|
rep_->props.data_size = rep_->get_offset();
|
|
1786
1833
|
rep_->props.uncompressed_data_size += block_rep->uncompressed.size();
|
|
1787
|
-
++rep_->props.num_data_blocks;
|
|
1788
1834
|
|
|
1789
1835
|
rep_->index_builder->FinishIndexEntry(
|
|
1790
1836
|
rep_->pending_handle, block_rep->prepared_index_entry.get(),
|
|
@@ -1833,13 +1879,13 @@ Status BlockBasedTableBuilder::CompressAndVerifyBlock(
|
|
|
1833
1879
|
Rep* r = rep_.get();
|
|
1834
1880
|
Status status;
|
|
1835
1881
|
|
|
1836
|
-
Compressor
|
|
1882
|
+
UnownedPtr<Compressor> compressor = nullptr;
|
|
1837
1883
|
Decompressor* verify_decomp = nullptr;
|
|
1838
1884
|
if (is_data_block) {
|
|
1839
1885
|
compressor = r->data_block_compressor;
|
|
1840
1886
|
verify_decomp = r->data_block_verify_decompressor.get();
|
|
1841
1887
|
} else {
|
|
1842
|
-
compressor = r->
|
|
1888
|
+
compressor = r->index_block_compressor;
|
|
1843
1889
|
verify_decomp = r->verify_decompressor.get();
|
|
1844
1890
|
}
|
|
1845
1891
|
|
|
@@ -1940,6 +1986,9 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
|
|
|
1940
1986
|
const Slice& block_contents, CompressionType comp_type, BlockHandle* handle,
|
|
1941
1987
|
BlockType block_type, const Slice* uncompressed_block_data,
|
|
1942
1988
|
bool* skip_delta_encoding) {
|
|
1989
|
+
// Must have pre-checked status in single-threaded context
|
|
1990
|
+
assert(status().ok());
|
|
1991
|
+
assert(io_status().ok());
|
|
1943
1992
|
rep_->SetIOStatus(WriteMaybeCompressedBlockImpl(
|
|
1944
1993
|
block_contents, comp_type, handle, block_type, uncompressed_block_data,
|
|
1945
1994
|
skip_delta_encoding));
|
|
@@ -2014,8 +2063,6 @@ IOStatus BlockBasedTableBuilder::WriteMaybeCompressedBlockImpl(
|
|
|
2014
2063
|
|
|
2015
2064
|
handle->set_offset(offset);
|
|
2016
2065
|
handle->set_size(block_contents.size());
|
|
2017
|
-
assert(status().ok());
|
|
2018
|
-
assert(io_status().ok());
|
|
2019
2066
|
if (uncompressed_block_data == nullptr) {
|
|
2020
2067
|
uncompressed_block_data = &block_contents;
|
|
2021
2068
|
assert(comp_type == kNoCompression);
|
|
@@ -2103,7 +2150,7 @@ void BlockBasedTableBuilder::MaybeStartParallelCompression() {
|
|
|
2103
2150
|
// that latency. So even with some optimizations, turning on the parallel
|
|
2104
2151
|
// framework when compression is disabled just eats more CPU with little-to-no
|
|
2105
2152
|
// improvement in throughput.
|
|
2106
|
-
if (rep_->data_block_compressor
|
|
2153
|
+
if (!rep_->data_block_compressor) {
|
|
2107
2154
|
// Force the generally best configuration for no compression: no parallelism
|
|
2108
2155
|
return;
|
|
2109
2156
|
}
|
|
@@ -2140,8 +2187,9 @@ void BlockBasedTableBuilder::StopParallelCompression(bool abort) {
|
|
|
2140
2187
|
pc_rep.SetAbort(pc_rep.emit_thread_state);
|
|
2141
2188
|
} else if (pc_rep.emit_thread_state !=
|
|
2142
2189
|
ParallelCompressionRep::ThreadState::kEnd) {
|
|
2143
|
-
// In case we didn't do a final flush with no next key
|
|
2144
|
-
|
|
2190
|
+
// In case we didn't do a final flush with no next key, which might have
|
|
2191
|
+
// been skipped if !ok() was set after the start of Finish()
|
|
2192
|
+
assert(rep_->props.num_data_blocks == 0 || !ok());
|
|
2145
2193
|
pc_rep.SetNoMoreToEmit(pc_rep.emit_thread_state, pc_rep.emit_slot);
|
|
2146
2194
|
}
|
|
2147
2195
|
#ifdef BBTB_PC_WATCHDOG
|
|
@@ -2449,8 +2497,8 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
|
|
|
2449
2497
|
void BlockBasedTableBuilder::WriteCompressionDictBlock(
|
|
2450
2498
|
MetaIndexBuilder* meta_index_builder) {
|
|
2451
2499
|
Slice compression_dict;
|
|
2452
|
-
if (rep_->
|
|
2453
|
-
compression_dict = rep_->
|
|
2500
|
+
if (rep_->data_block_compressor) {
|
|
2501
|
+
compression_dict = rep_->data_block_compressor->GetSerializedDict();
|
|
2454
2502
|
}
|
|
2455
2503
|
if (!compression_dict.empty()) {
|
|
2456
2504
|
BlockHandle compression_dict_block_handle;
|
|
@@ -2545,6 +2593,7 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered(
|
|
|
2545
2593
|
// The below code is neither safe nor necessary for handling zero data
|
|
2546
2594
|
// blocks.
|
|
2547
2595
|
// For PostPopulateCompressionProperties()
|
|
2596
|
+
assert(!r->data_block_compressor);
|
|
2548
2597
|
r->data_block_compressor = r->basic_compressor.get();
|
|
2549
2598
|
return;
|
|
2550
2599
|
}
|
|
@@ -2586,15 +2635,12 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered(
|
|
|
2586
2635
|
|
|
2587
2636
|
assert(samples.sample_data.size() > 0);
|
|
2588
2637
|
|
|
2589
|
-
// final sample data block flushed, now we can generate dictionary
|
|
2590
|
-
|
|
2591
|
-
|
|
2638
|
+
// final sample data block flushed, now we can generate dictionary (or it
|
|
2639
|
+
// might opt not to use a dictionary and that's ok)
|
|
2640
|
+
r->data_block_compressor =
|
|
2641
|
+
MaybeCloneSpecialized(r->basic_compressor.get(),
|
|
2642
|
+
CacheEntryRole::kDataBlock, std::move(samples));
|
|
2592
2643
|
|
|
2593
|
-
// The compressor might opt not to use a dictionary, in which case we
|
|
2594
|
-
// can use the same compressor as for e.g. index blocks.
|
|
2595
|
-
r->data_block_compressor = r->compressor_with_dict
|
|
2596
|
-
? r->compressor_with_dict.get()
|
|
2597
|
-
: r->basic_compressor.get();
|
|
2598
2644
|
Slice serialized_dict = r->data_block_compressor->GetSerializedDict();
|
|
2599
2645
|
if (r->verify_decompressor) {
|
|
2600
2646
|
if (serialized_dict.empty()) {
|
|
@@ -2688,6 +2734,20 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered(
|
|
|
2688
2734
|
Status BlockBasedTableBuilder::Finish() {
|
|
2689
2735
|
Rep* r = rep_.get();
|
|
2690
2736
|
assert(r->state != Rep::State::kClosed);
|
|
2737
|
+
|
|
2738
|
+
#ifndef NDEBUG
|
|
2739
|
+
{
|
|
2740
|
+
// This sync point callback is a simple approximation of a failure detected
|
|
2741
|
+
// in parallel compression after the start of calling Finish() but before
|
|
2742
|
+
// Finish() calls Flush()
|
|
2743
|
+
IOStatus s = rep_->GetIOStatus();
|
|
2744
|
+
TEST_SYNC_POINT_CALLBACK("BlockBasedTableBuilder::Finish:ParallelIOStatus",
|
|
2745
|
+
&s);
|
|
2746
|
+
if (!s.ok()) {
|
|
2747
|
+
rep_->SetIOStatus(s);
|
|
2748
|
+
}
|
|
2749
|
+
}
|
|
2750
|
+
#endif // !NDEBUG
|
|
2691
2751
|
// To make sure properties block is able to keep the accurate size of index
|
|
2692
2752
|
// block, we will finish writing all index entries first, in Flush().
|
|
2693
2753
|
Flush(/*first_key_in_next_block=*/nullptr);
|
|
@@ -2701,6 +2761,8 @@ Status BlockBasedTableBuilder::Finish() {
|
|
|
2701
2761
|
|
|
2702
2762
|
r->props.tail_start_offset = r->offset.LoadRelaxed();
|
|
2703
2763
|
|
|
2764
|
+
uint64_t last_estimated_tail_size = EstimatedTailSize();
|
|
2765
|
+
|
|
2704
2766
|
// Write meta blocks, metaindex block and footer in the following order.
|
|
2705
2767
|
// 1. [meta block: filter]
|
|
2706
2768
|
// 2. [meta block: index]
|
|
@@ -2727,6 +2789,24 @@ Status BlockBasedTableBuilder::Finish() {
|
|
|
2727
2789
|
r->state = Rep::State::kClosed;
|
|
2728
2790
|
r->tail_size = r->offset.LoadRelaxed() - r->props.tail_start_offset;
|
|
2729
2791
|
|
|
2792
|
+
// Assert tail size estimation is an overestimate only when tail size
|
|
2793
|
+
// estimation option is enabled for compaction files with supported
|
|
2794
|
+
// index/filter types:
|
|
2795
|
+
// - Shortened indexes (kBinarySearch, kBinarySearchWithFirstKey)
|
|
2796
|
+
// - Partitioned indexes (kTwoLevelIndexSearch)
|
|
2797
|
+
// - Full filters
|
|
2798
|
+
// - Partitioned filters
|
|
2799
|
+
if (r->target_file_size_is_upper_bound &&
|
|
2800
|
+
r->reason == TableFileCreationReason::kCompaction &&
|
|
2801
|
+
r->table_options.index_type != BlockBasedTableOptions::kHashSearch) {
|
|
2802
|
+
ROCKS_LOG_WARN(r->ioptions.info_log,
|
|
2803
|
+
"File number: %" PRIu64 ", Estimated tail size = %" PRIu64
|
|
2804
|
+
" bytes, Actual tail size = %" PRIu64 " bytes",
|
|
2805
|
+
r->props.orig_file_number, last_estimated_tail_size,
|
|
2806
|
+
r->tail_size);
|
|
2807
|
+
assert(r->tail_size <= last_estimated_tail_size);
|
|
2808
|
+
}
|
|
2809
|
+
|
|
2730
2810
|
return r->GetStatus();
|
|
2731
2811
|
}
|
|
2732
2812
|
|
|
@@ -2764,6 +2844,49 @@ uint64_t BlockBasedTableBuilder::EstimatedFileSize() const {
|
|
|
2764
2844
|
}
|
|
2765
2845
|
}
|
|
2766
2846
|
|
|
2847
|
+
uint64_t BlockBasedTableBuilder::EstimatedTailSize() const {
|
|
2848
|
+
uint64_t estimated_tail_size = 0;
|
|
2849
|
+
|
|
2850
|
+
// 1. Estimate index size
|
|
2851
|
+
if (rep_->table_options.index_type ==
|
|
2852
|
+
BlockBasedTableOptions::kTwoLevelIndexSearch) {
|
|
2853
|
+
assert(rep_->p_index_builder_);
|
|
2854
|
+
estimated_tail_size += rep_->p_index_builder_->CurrentIndexSizeEstimate();
|
|
2855
|
+
} else {
|
|
2856
|
+
assert(rep_->index_builder);
|
|
2857
|
+
estimated_tail_size += rep_->index_builder->CurrentIndexSizeEstimate();
|
|
2858
|
+
}
|
|
2859
|
+
|
|
2860
|
+
// 2. Estimate filter size
|
|
2861
|
+
if (rep_->filter_builder) {
|
|
2862
|
+
estimated_tail_size += rep_->filter_builder->CurrentFilterSizeEstimate();
|
|
2863
|
+
}
|
|
2864
|
+
|
|
2865
|
+
// 3. Estimate compression dictionary size
|
|
2866
|
+
if (rep_->data_block_compressor) {
|
|
2867
|
+
Slice dict = rep_->data_block_compressor->GetSerializedDict();
|
|
2868
|
+
if (!dict.empty()) {
|
|
2869
|
+
estimated_tail_size += dict.size();
|
|
2870
|
+
}
|
|
2871
|
+
}
|
|
2872
|
+
|
|
2873
|
+
// 4. Estimate range deletion block size
|
|
2874
|
+
if (!rep_->range_del_block.empty()) {
|
|
2875
|
+
estimated_tail_size += rep_->range_del_block.CurrentSizeEstimate();
|
|
2876
|
+
}
|
|
2877
|
+
|
|
2878
|
+
// 5. Estimate properties block size conservatively (~1-2KB)
|
|
2879
|
+
estimated_tail_size += 2048;
|
|
2880
|
+
|
|
2881
|
+
// 6. Estimate meta-index block size conservatively (~1KB)
|
|
2882
|
+
estimated_tail_size += 1024;
|
|
2883
|
+
|
|
2884
|
+
// 7. Add footer size
|
|
2885
|
+
estimated_tail_size += Footer::kMaxEncodedLength;
|
|
2886
|
+
|
|
2887
|
+
return estimated_tail_size;
|
|
2888
|
+
}
|
|
2889
|
+
|
|
2767
2890
|
uint64_t BlockBasedTableBuilder::GetTailSize() const { return rep_->tail_size; }
|
|
2768
2891
|
|
|
2769
2892
|
bool BlockBasedTableBuilder::NeedCompact() const {
|
|
@@ -89,11 +89,15 @@ class BlockBasedTableBuilder : public TableBuilder {
|
|
|
89
89
|
// Finish() call, returns the size of the final generated file.
|
|
90
90
|
uint64_t FileSize() const override;
|
|
91
91
|
|
|
92
|
-
// Estimated size of the file generated so far
|
|
93
|
-
//
|
|
94
|
-
// is enabled.
|
|
92
|
+
// Estimated size of the file generated so far (based on data blocks, this
|
|
93
|
+
// estimate does not include meta blocks). This is used when FileSize() cannot
|
|
94
|
+
// estimate final SST size, e.g. parallel compression is enabled.
|
|
95
95
|
uint64_t EstimatedFileSize() const override;
|
|
96
96
|
|
|
97
|
+
// Estimated tail size of the SST file generated so far. The "tail" refers to
|
|
98
|
+
// all blocks written after data blocks (index + filter).
|
|
99
|
+
uint64_t EstimatedTailSize() const override;
|
|
100
|
+
|
|
97
101
|
// Get the size of the "tail" part of a SST file. "Tail" refers to
|
|
98
102
|
// all blocks after data blocks till the end of the SST file.
|
|
99
103
|
uint64_t GetTailSize() const override;
|