@nxtedition/rocksdb 15.1.2 → 15.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/.claude/settings.local.json +15 -0
  2. package/binding.cc +79 -38
  3. package/build.sh +1 -2
  4. package/deps/rocksdb/rocksdb/BUCK +10 -8
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +27 -2
  6. package/deps/rocksdb/rocksdb/Makefile +27 -116
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +1 -1
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +101 -124
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +47 -30
  10. package/deps/rocksdb/rocksdb/db/c.cc +793 -131
  11. package/deps/rocksdb/rocksdb/db/c_test.c +571 -0
  12. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +226 -0
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +4 -0
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +95 -59
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +2 -2
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +45 -35
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +8 -4
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +1 -1
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +11 -6
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +8 -2
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +47 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -2
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +82 -0
  24. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.cc +2 -2
  25. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +1 -1
  26. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +69 -24
  27. package/deps/rocksdb/rocksdb/db/db_bloom_filter_test.cc +9 -1
  28. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +65 -0
  29. package/deps/rocksdb/rocksdb/db/db_etc3_test.cc +161 -0
  30. package/deps/rocksdb/rocksdb/db/db_filesnapshot.cc +1 -0
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +20 -7
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +13 -0
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +114 -39
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +3 -0
  35. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +3 -3
  36. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +39 -25
  38. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +361 -0
  39. package/deps/rocksdb/rocksdb/db/db_options_test.cc +35 -0
  40. package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +83 -0
  41. package/deps/rocksdb/rocksdb/db/db_test.cc +249 -4
  42. package/deps/rocksdb/rocksdb/db/db_test2.cc +3 -0
  43. package/deps/rocksdb/rocksdb/db/db_test_util.cc +2 -1
  44. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +3 -2
  45. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -7
  46. package/deps/rocksdb/rocksdb/db/listener_test.cc +7 -17
  47. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +4 -2
  48. package/deps/rocksdb/rocksdb/db/obsolete_files_test.cc +41 -0
  49. package/deps/rocksdb/rocksdb/db/repair.cc +2 -2
  50. package/deps/rocksdb/rocksdb/db/version_edit.h +7 -4
  51. package/deps/rocksdb/rocksdb/db/version_set.cc +299 -90
  52. package/deps/rocksdb/rocksdb/db/version_set.h +56 -9
  53. package/deps/rocksdb/rocksdb/db/version_set_test.cc +41 -39
  54. package/deps/rocksdb/rocksdb/db/version_util.h +3 -2
  55. package/deps/rocksdb/rocksdb/db/wal_manager.cc +7 -1
  56. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +48 -10
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +1 -0
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +5 -1
  59. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +16 -5
  60. package/deps/rocksdb/rocksdb/env/env_test.cc +126 -41
  61. package/deps/rocksdb/rocksdb/env/fs_posix.cc +14 -7
  62. package/deps/rocksdb/rocksdb/env/io_posix.cc +304 -112
  63. package/deps/rocksdb/rocksdb/env/io_posix.h +16 -4
  64. package/deps/rocksdb/rocksdb/env/io_posix_test.cc +43 -0
  65. package/deps/rocksdb/rocksdb/folly.mk +148 -0
  66. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +29 -3
  67. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +73 -0
  68. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +246 -0
  69. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_filter.h +0 -2
  70. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +15 -9
  71. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +19 -9
  72. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  73. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +6 -4
  74. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +14 -0
  75. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +67 -6
  76. package/deps/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h +1 -7
  77. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +3 -0
  78. package/deps/rocksdb/rocksdb/include/rocksdb/thread_status.h +6 -14
  79. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/backup_engine.h +8 -1
  80. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h +2 -2
  81. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h +0 -4
  82. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h +33 -5
  83. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +6 -0
  84. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  85. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +2 -0
  86. package/deps/rocksdb/rocksdb/monitoring/thread_status_impl.cc +5 -2
  87. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.cc +2 -2
  88. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater.h +6 -6
  89. package/deps/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc +2 -2
  90. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.cc +10 -5
  91. package/deps/rocksdb/rocksdb/monitoring/thread_status_util.h +2 -2
  92. package/deps/rocksdb/rocksdb/options/cf_options.cc +15 -3
  93. package/deps/rocksdb/rocksdb/options/cf_options.h +7 -0
  94. package/deps/rocksdb/rocksdb/options/db_options.cc +27 -36
  95. package/deps/rocksdb/rocksdb/options/db_options.h +3 -2
  96. package/deps/rocksdb/rocksdb/options/options.cc +4 -0
  97. package/deps/rocksdb/rocksdb/options/options_helper.cc +8 -2
  98. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +4 -1
  99. package/deps/rocksdb/rocksdb/options/options_test.cc +19 -3
  100. package/deps/rocksdb/rocksdb/src.mk +1 -1
  101. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +155 -32
  102. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +7 -3
  103. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +169 -125
  104. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +22 -7
  105. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +43 -24
  106. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +9 -5
  107. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +9 -8
  108. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +17 -0
  109. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +15 -5
  110. package/deps/rocksdb/rocksdb/table/block_based/filter_policy_internal.h +13 -18
  111. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +29 -0
  112. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +6 -0
  113. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +15 -0
  114. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +79 -19
  115. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +48 -20
  116. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +51 -0
  117. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +19 -0
  118. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +1 -1
  119. package/deps/rocksdb/rocksdb/table/external_table.cc +2 -2
  120. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +3 -2
  121. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +3 -1
  122. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  123. package/deps/rocksdb/rocksdb/table/table_reader.h +4 -2
  124. package/deps/rocksdb/rocksdb/table/table_test.cc +48 -39
  125. package/deps/rocksdb/rocksdb/test_util/sync_point.cc +4 -0
  126. package/deps/rocksdb/rocksdb/test_util/sync_point.h +32 -0
  127. package/deps/rocksdb/rocksdb/test_util/testutil.h +6 -2
  128. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +14 -4
  129. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +8 -5
  130. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +3 -2
  131. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +63 -12
  132. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +16 -1
  133. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +5 -1
  134. package/deps/rocksdb/rocksdb/util/bit_fields.h +133 -23
  135. package/deps/rocksdb/rocksdb/util/bloom_test.cc +2 -5
  136. package/deps/rocksdb/rocksdb/util/compression.cc +51 -23
  137. package/deps/rocksdb/rocksdb/util/compression_test.cc +525 -270
  138. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -4
  139. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +11 -2
  140. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -1
  141. package/deps/rocksdb/rocksdb/util/slice_test.cc +92 -0
  142. package/deps/rocksdb/rocksdb/util/thread_list_test.cc +2 -2
  143. package/deps/rocksdb/rocksdb/util/thread_operation.h +2 -2
  144. package/deps/rocksdb/rocksdb/util/threadpool_imp.cc +2 -2
  145. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine.cc +19 -2
  146. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +75 -0
  147. package/deps/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc +1 -0
  148. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc +303 -111
  149. package/deps/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc +379 -0
  150. package/deps/rocksdb/rocksdb.gyp +1 -0
  151. package/iterator.js +66 -70
  152. package/package.json +6 -6
  153. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/index_builder_test.cc +0 -183
@@ -110,6 +110,19 @@ FilterBlockBuilder* CreateFilterBlockBuilder(
110
110
  }
111
111
  }
112
112
 
113
+ // A convenience function for populating the Compressor* fields; see ~Rep()
114
+ Compressor* MaybeCloneSpecialized(
115
+ Compressor* compressor, CacheEntryRole block_type,
116
+ Compressor::DictSampleArgs&& dict_samples = {}) {
117
+ auto specialized =
118
+ compressor->MaybeCloneSpecialized(block_type, std::move(dict_samples));
119
+ if (specialized) {
120
+ // Caller is responsible for freeing when distinct
121
+ return specialized.release();
122
+ } else {
123
+ return compressor;
124
+ }
125
+ }
113
126
  } // namespace
114
127
 
115
128
  // kBlockBasedTableMagicNumber was picked by running
@@ -824,15 +837,17 @@ struct BlockBasedTableBuilder::Rep {
824
837
 
825
838
  // *** Compressors & decompressors - Yes, it seems like a lot here but ***
826
839
  // *** these are distinct fields to minimize extra conditionals and ***
827
- // *** field reads on hot code paths. ***
840
+ // *** field reads on hot code paths. And to avoid interlocked ***
841
+ // *** instructions associated with shared_ptr. ***
828
842
 
829
843
  // A compressor for blocks in general, without dictionary compression
830
844
  std::unique_ptr<Compressor> basic_compressor;
831
- // A compressor using dictionary compression (when applicable)
832
- std::unique_ptr<Compressor> compressor_with_dict;
833
- // Once configured/determined, points to one of the above Compressors to
834
- // use on data blocks.
835
- Compressor* data_block_compressor = nullptr;
845
+ // A compressor for data blocks, which might be tuned differently and might
846
+ // use dictionary compression (when applicable). See ~Rep() for some details.
847
+ UnownedPtr<Compressor> data_block_compressor = nullptr;
848
+ // A compressor for index blocks, which might be tuned differently from
849
+ // basic_compressor. See ~Rep() for some details.
850
+ UnownedPtr<Compressor> index_block_compressor = nullptr;
836
851
  // A decompressor corresponding to basic_compressor (when non-nullptr).
837
852
  // Used for verification and cache warming.
838
853
  std::shared_ptr<Decompressor> basic_decompressor;
@@ -853,7 +868,7 @@ struct BlockBasedTableBuilder::Rep {
853
868
  compression_types_used;
854
869
 
855
870
  // Working area for basic_compressor when compression_parallel_threads==1
856
- WorkingAreaPair basic_working_area;
871
+ WorkingAreaPair index_block_working_area;
857
872
  // Working area for data_block_compressor, for emit/compaction thread
858
873
  WorkingAreaPair data_block_working_area;
859
874
 
@@ -894,6 +909,7 @@ struct BlockBasedTableBuilder::Rep {
894
909
  std::unique_ptr<FilterBlockBuilder> filter_builder;
895
910
  OffsetableCacheKey base_cache_key;
896
911
  const TableFileCreationReason reason;
912
+ const bool target_file_size_is_upper_bound;
897
913
 
898
914
  BlockHandle pending_handle; // Handle to add to index block
899
915
 
@@ -1041,6 +1057,8 @@ struct BlockBasedTableBuilder::Rep {
1041
1057
  use_delta_encoding_for_index_values(table_opt.format_version >= 4 &&
1042
1058
  !table_opt.block_align),
1043
1059
  reason(tbo.reason),
1060
+ target_file_size_is_upper_bound(
1061
+ tbo.moptions.target_file_size_is_upper_bound),
1044
1062
  flush_block_policy(
1045
1063
  table_options.flush_block_policy_factory->NewFlushBlockPolicy(
1046
1064
  table_options, data_block)),
@@ -1096,7 +1114,10 @@ struct BlockBasedTableBuilder::Rep {
1096
1114
  filter_context, tbo.compression_opts, tbo.compression_type);
1097
1115
  if (basic_compressor) {
1098
1116
  if (table_options.enable_index_compression) {
1099
- basic_working_area.compress = basic_compressor->ObtainWorkingArea();
1117
+ index_block_compressor = MaybeCloneSpecialized(
1118
+ basic_compressor.get(), CacheEntryRole::kIndexBlock);
1119
+ index_block_working_area.compress =
1120
+ index_block_compressor->ObtainWorkingArea();
1100
1121
  }
1101
1122
  max_dict_sample_bytes = basic_compressor->GetMaxSampleSizeIfWantDict(
1102
1123
  CacheEntryRole::kDataBlock);
@@ -1111,8 +1132,10 @@ struct BlockBasedTableBuilder::Rep {
1111
1132
  tbo.compression_opts.max_dict_buffer_bytes);
1112
1133
  }
1113
1134
  } else {
1114
- // No distinct data block compressor using dictionary
1115
- data_block_compressor = basic_compressor.get();
1135
+ // No distinct data block compressor using dictionary, but
1136
+ // implementation might still want to specialize for data blocks
1137
+ data_block_compressor = MaybeCloneSpecialized(
1138
+ basic_compressor.get(), CacheEntryRole::kDataBlock);
1116
1139
  data_block_working_area.compress =
1117
1140
  data_block_compressor->ObtainWorkingArea();
1118
1141
  }
@@ -1126,8 +1149,9 @@ struct BlockBasedTableBuilder::Rep {
1126
1149
  if (table_options.verify_compression) {
1127
1150
  verify_decompressor = basic_decompressor.get();
1128
1151
  if (table_options.enable_index_compression) {
1129
- basic_working_area.verify = verify_decompressor->ObtainWorkingArea(
1130
- basic_compressor->GetPreferredCompressionType());
1152
+ index_block_working_area.verify =
1153
+ verify_decompressor->ObtainWorkingArea(
1154
+ index_block_compressor->GetPreferredCompressionType());
1131
1155
  }
1132
1156
  if (state == State::kUnbuffered) {
1133
1157
  assert(data_block_compressor);
@@ -1292,8 +1316,19 @@ struct BlockBasedTableBuilder::Rep {
1292
1316
  }
1293
1317
 
1294
1318
  ~Rep() {
1319
+ // Delete working areas before their compressors.
1320
+ index_block_working_area = {};
1321
+ data_block_working_area = {};
1295
1322
  // Must have been cleaned up by StopParallelCompression
1296
1323
  assert(pc_rep == nullptr);
1324
+ // Delete specialized compressors if they were distinct (avoiding extra
1325
+ // fields and interlocked instructions with shared_ptr)
1326
+ if (data_block_compressor.get() != basic_compressor.get()) {
1327
+ delete data_block_compressor.get();
1328
+ }
1329
+ if (index_block_compressor.get() != basic_compressor.get()) {
1330
+ delete index_block_compressor.get();
1331
+ }
1297
1332
  }
1298
1333
 
1299
1334
  Rep(const Rep&) = delete;
@@ -1611,6 +1646,17 @@ void BlockBasedTableBuilder::Flush(const Slice* first_key_in_next_block) {
1611
1646
  rep_->data_begin_offset += uncompressed_block_data.size();
1612
1647
  MaybeEnterUnbuffered(first_key_in_next_block);
1613
1648
  } else {
1649
+ // Increment num_data_blocks when a data block is finalized in the
1650
+ // emit thread to avoid data races with write worker threads
1651
+ ++r->props.num_data_blocks;
1652
+
1653
+ // Notify filter builder that a data block has been finalized
1654
+ // This must happen on the emit thread before the block is added to the
1655
+ // ring buffer to avoid race conditions with worker threads
1656
+ if (r->filter_builder) {
1657
+ r->filter_builder->OnDataBlockFinalized(r->props.num_data_blocks);
1658
+ }
1659
+
1614
1660
  if (r->IsParallelCompressionActive()) {
1615
1661
  EmitBlockForParallel(r->data_block.MutableBuffer(), r->last_ikey,
1616
1662
  first_key_in_next_block);
@@ -1715,9 +1761,11 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
1715
1761
  assert(!r->IsParallelCompressionActive());
1716
1762
  CompressionType type;
1717
1763
  bool is_data_block = block_type == BlockType::kData;
1764
+ // NOTE: only index and data blocks are currently compressed
1765
+ assert(is_data_block || block_type == BlockType::kIndex);
1718
1766
  Status compress_status = CompressAndVerifyBlock(
1719
1767
  uncompressed_block_data, is_data_block,
1720
- is_data_block ? r->data_block_working_area : r->basic_working_area,
1768
+ is_data_block ? r->data_block_working_area : r->index_block_working_area,
1721
1769
  &r->single_threaded_compressed_output, &type);
1722
1770
  r->SetStatus(compress_status);
1723
1771
  if (UNLIKELY(!ok())) {
@@ -1735,7 +1783,6 @@ void BlockBasedTableBuilder::WriteBlock(const Slice& uncompressed_block_data,
1735
1783
  if (is_data_block) {
1736
1784
  r->props.data_size = r->get_offset();
1737
1785
  r->props.uncompressed_data_size += uncompressed_block_data.size();
1738
- ++r->props.num_data_blocks;
1739
1786
  }
1740
1787
  }
1741
1788
 
@@ -1784,7 +1831,6 @@ void BlockBasedTableBuilder::BGWorker(WorkingAreaPair& working_area) {
1784
1831
  if (LIKELY(ios.ok())) {
1785
1832
  rep_->props.data_size = rep_->get_offset();
1786
1833
  rep_->props.uncompressed_data_size += block_rep->uncompressed.size();
1787
- ++rep_->props.num_data_blocks;
1788
1834
 
1789
1835
  rep_->index_builder->FinishIndexEntry(
1790
1836
  rep_->pending_handle, block_rep->prepared_index_entry.get(),
@@ -1833,13 +1879,13 @@ Status BlockBasedTableBuilder::CompressAndVerifyBlock(
1833
1879
  Rep* r = rep_.get();
1834
1880
  Status status;
1835
1881
 
1836
- Compressor* compressor = nullptr;
1882
+ UnownedPtr<Compressor> compressor = nullptr;
1837
1883
  Decompressor* verify_decomp = nullptr;
1838
1884
  if (is_data_block) {
1839
1885
  compressor = r->data_block_compressor;
1840
1886
  verify_decomp = r->data_block_verify_decompressor.get();
1841
1887
  } else {
1842
- compressor = r->basic_compressor.get();
1888
+ compressor = r->index_block_compressor;
1843
1889
  verify_decomp = r->verify_decompressor.get();
1844
1890
  }
1845
1891
 
@@ -1940,6 +1986,9 @@ void BlockBasedTableBuilder::WriteMaybeCompressedBlock(
1940
1986
  const Slice& block_contents, CompressionType comp_type, BlockHandle* handle,
1941
1987
  BlockType block_type, const Slice* uncompressed_block_data,
1942
1988
  bool* skip_delta_encoding) {
1989
+ // Must have pre-checked status in single-threaded context
1990
+ assert(status().ok());
1991
+ assert(io_status().ok());
1943
1992
  rep_->SetIOStatus(WriteMaybeCompressedBlockImpl(
1944
1993
  block_contents, comp_type, handle, block_type, uncompressed_block_data,
1945
1994
  skip_delta_encoding));
@@ -2014,8 +2063,6 @@ IOStatus BlockBasedTableBuilder::WriteMaybeCompressedBlockImpl(
2014
2063
 
2015
2064
  handle->set_offset(offset);
2016
2065
  handle->set_size(block_contents.size());
2017
- assert(status().ok());
2018
- assert(io_status().ok());
2019
2066
  if (uncompressed_block_data == nullptr) {
2020
2067
  uncompressed_block_data = &block_contents;
2021
2068
  assert(comp_type == kNoCompression);
@@ -2103,7 +2150,7 @@ void BlockBasedTableBuilder::MaybeStartParallelCompression() {
2103
2150
  // that latency. So even with some optimizations, turning on the parallel
2104
2151
  // framework when compression is disabled just eats more CPU with little-to-no
2105
2152
  // improvement in throughput.
2106
- if (rep_->data_block_compressor == nullptr) {
2153
+ if (!rep_->data_block_compressor) {
2107
2154
  // Force the generally best configuration for no compression: no parallelism
2108
2155
  return;
2109
2156
  }
@@ -2140,8 +2187,9 @@ void BlockBasedTableBuilder::StopParallelCompression(bool abort) {
2140
2187
  pc_rep.SetAbort(pc_rep.emit_thread_state);
2141
2188
  } else if (pc_rep.emit_thread_state !=
2142
2189
  ParallelCompressionRep::ThreadState::kEnd) {
2143
- // In case we didn't do a final flush with no next key
2144
- assert(rep_->props.num_data_blocks == 0);
2190
+ // In case we didn't do a final flush with no next key, which might have
2191
+ // been skipped if !ok() was set after the start of Finish()
2192
+ assert(rep_->props.num_data_blocks == 0 || !ok());
2145
2193
  pc_rep.SetNoMoreToEmit(pc_rep.emit_thread_state, pc_rep.emit_slot);
2146
2194
  }
2147
2195
  #ifdef BBTB_PC_WATCHDOG
@@ -2449,8 +2497,8 @@ void BlockBasedTableBuilder::WritePropertiesBlock(
2449
2497
  void BlockBasedTableBuilder::WriteCompressionDictBlock(
2450
2498
  MetaIndexBuilder* meta_index_builder) {
2451
2499
  Slice compression_dict;
2452
- if (rep_->compressor_with_dict) {
2453
- compression_dict = rep_->compressor_with_dict->GetSerializedDict();
2500
+ if (rep_->data_block_compressor) {
2501
+ compression_dict = rep_->data_block_compressor->GetSerializedDict();
2454
2502
  }
2455
2503
  if (!compression_dict.empty()) {
2456
2504
  BlockHandle compression_dict_block_handle;
@@ -2545,6 +2593,7 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered(
2545
2593
  // The below code is neither safe nor necessary for handling zero data
2546
2594
  // blocks.
2547
2595
  // For PostPopulateCompressionProperties()
2596
+ assert(!r->data_block_compressor);
2548
2597
  r->data_block_compressor = r->basic_compressor.get();
2549
2598
  return;
2550
2599
  }
@@ -2586,15 +2635,12 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered(
2586
2635
 
2587
2636
  assert(samples.sample_data.size() > 0);
2588
2637
 
2589
- // final sample data block flushed, now we can generate dictionary
2590
- r->compressor_with_dict = r->basic_compressor->MaybeCloneSpecialized(
2591
- CacheEntryRole::kDataBlock, std::move(samples));
2638
+ // final sample data block flushed, now we can generate dictionary (or it
2639
+ // might opt not to use a dictionary and that's ok)
2640
+ r->data_block_compressor =
2641
+ MaybeCloneSpecialized(r->basic_compressor.get(),
2642
+ CacheEntryRole::kDataBlock, std::move(samples));
2592
2643
 
2593
- // The compressor might opt not to use a dictionary, in which case we
2594
- // can use the same compressor as for e.g. index blocks.
2595
- r->data_block_compressor = r->compressor_with_dict
2596
- ? r->compressor_with_dict.get()
2597
- : r->basic_compressor.get();
2598
2644
  Slice serialized_dict = r->data_block_compressor->GetSerializedDict();
2599
2645
  if (r->verify_decompressor) {
2600
2646
  if (serialized_dict.empty()) {
@@ -2688,6 +2734,20 @@ void BlockBasedTableBuilder::MaybeEnterUnbuffered(
2688
2734
  Status BlockBasedTableBuilder::Finish() {
2689
2735
  Rep* r = rep_.get();
2690
2736
  assert(r->state != Rep::State::kClosed);
2737
+
2738
+ #ifndef NDEBUG
2739
+ {
2740
+ // This sync point callback is a simple approximation of a failure detected
2741
+ // in parallel compression after the start of calling Finish() but before
2742
+ // Finish() calls Flush()
2743
+ IOStatus s = rep_->GetIOStatus();
2744
+ TEST_SYNC_POINT_CALLBACK("BlockBasedTableBuilder::Finish:ParallelIOStatus",
2745
+ &s);
2746
+ if (!s.ok()) {
2747
+ rep_->SetIOStatus(s);
2748
+ }
2749
+ }
2750
+ #endif // !NDEBUG
2691
2751
  // To make sure properties block is able to keep the accurate size of index
2692
2752
  // block, we will finish writing all index entries first, in Flush().
2693
2753
  Flush(/*first_key_in_next_block=*/nullptr);
@@ -2701,6 +2761,8 @@ Status BlockBasedTableBuilder::Finish() {
2701
2761
 
2702
2762
  r->props.tail_start_offset = r->offset.LoadRelaxed();
2703
2763
 
2764
+ uint64_t last_estimated_tail_size = EstimatedTailSize();
2765
+
2704
2766
  // Write meta blocks, metaindex block and footer in the following order.
2705
2767
  // 1. [meta block: filter]
2706
2768
  // 2. [meta block: index]
@@ -2727,6 +2789,24 @@ Status BlockBasedTableBuilder::Finish() {
2727
2789
  r->state = Rep::State::kClosed;
2728
2790
  r->tail_size = r->offset.LoadRelaxed() - r->props.tail_start_offset;
2729
2791
 
2792
+ // Assert tail size estimation is an overestimate only when tail size
2793
+ // estimation option is enabled for compaction files with supported
2794
+ // index/filter types:
2795
+ // - Shortened indexes (kBinarySearch, kBinarySearchWithFirstKey)
2796
+ // - Partitioned indexes (kTwoLevelIndexSearch)
2797
+ // - Full filters
2798
+ // - Partitioned filters
2799
+ if (r->target_file_size_is_upper_bound &&
2800
+ r->reason == TableFileCreationReason::kCompaction &&
2801
+ r->table_options.index_type != BlockBasedTableOptions::kHashSearch) {
2802
+ ROCKS_LOG_WARN(r->ioptions.info_log,
2803
+ "File number: %" PRIu64 ", Estimated tail size = %" PRIu64
2804
+ " bytes, Actual tail size = %" PRIu64 " bytes",
2805
+ r->props.orig_file_number, last_estimated_tail_size,
2806
+ r->tail_size);
2807
+ assert(r->tail_size <= last_estimated_tail_size);
2808
+ }
2809
+
2730
2810
  return r->GetStatus();
2731
2811
  }
2732
2812
 
@@ -2764,6 +2844,49 @@ uint64_t BlockBasedTableBuilder::EstimatedFileSize() const {
2764
2844
  }
2765
2845
  }
2766
2846
 
2847
+ uint64_t BlockBasedTableBuilder::EstimatedTailSize() const {
2848
+ uint64_t estimated_tail_size = 0;
2849
+
2850
+ // 1. Estimate index size
2851
+ if (rep_->table_options.index_type ==
2852
+ BlockBasedTableOptions::kTwoLevelIndexSearch) {
2853
+ assert(rep_->p_index_builder_);
2854
+ estimated_tail_size += rep_->p_index_builder_->CurrentIndexSizeEstimate();
2855
+ } else {
2856
+ assert(rep_->index_builder);
2857
+ estimated_tail_size += rep_->index_builder->CurrentIndexSizeEstimate();
2858
+ }
2859
+
2860
+ // 2. Estimate filter size
2861
+ if (rep_->filter_builder) {
2862
+ estimated_tail_size += rep_->filter_builder->CurrentFilterSizeEstimate();
2863
+ }
2864
+
2865
+ // 3. Estimate compression dictionary size
2866
+ if (rep_->data_block_compressor) {
2867
+ Slice dict = rep_->data_block_compressor->GetSerializedDict();
2868
+ if (!dict.empty()) {
2869
+ estimated_tail_size += dict.size();
2870
+ }
2871
+ }
2872
+
2873
+ // 4. Estimate range deletion block size
2874
+ if (!rep_->range_del_block.empty()) {
2875
+ estimated_tail_size += rep_->range_del_block.CurrentSizeEstimate();
2876
+ }
2877
+
2878
+ // 5. Estimate properties block size conservatively (~1-2KB)
2879
+ estimated_tail_size += 2048;
2880
+
2881
+ // 6. Estimate meta-index block size conservatively (~1KB)
2882
+ estimated_tail_size += 1024;
2883
+
2884
+ // 7. Add footer size
2885
+ estimated_tail_size += Footer::kMaxEncodedLength;
2886
+
2887
+ return estimated_tail_size;
2888
+ }
2889
+
2767
2890
  uint64_t BlockBasedTableBuilder::GetTailSize() const { return rep_->tail_size; }
2768
2891
 
2769
2892
  bool BlockBasedTableBuilder::NeedCompact() const {
@@ -89,11 +89,15 @@ class BlockBasedTableBuilder : public TableBuilder {
89
89
  // Finish() call, returns the size of the final generated file.
90
90
  uint64_t FileSize() const override;
91
91
 
92
- // Estimated size of the file generated so far. This is used when
93
- // FileSize() cannot estimate final SST size, e.g. parallel compression
94
- // is enabled.
92
+ // Estimated size of the file generated so far (based on data blocks, this
93
+ // estimate does not include meta blocks). This is used when FileSize() cannot
94
+ // estimate final SST size, e.g. parallel compression is enabled.
95
95
  uint64_t EstimatedFileSize() const override;
96
96
 
97
+ // Estimated tail size of the SST file generated so far. The "tail" refers to
98
+ // all blocks written after data blocks (index + filter).
99
+ uint64_t EstimatedTailSize() const override;
100
+
97
101
  // Get the size of the "tail" part of a SST file. "Tail" refers to
98
102
  // all blocks after data blocks till the end of the SST file.
99
103
  uint64_t GetTailSize() const override;