@nxtedition/rocksdb 13.5.12 → 14.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/binding.cc +33 -2
  2. package/binding.gyp +2 -2
  3. package/chained-batch.js +9 -16
  4. package/deps/rocksdb/rocksdb/BUCK +18 -1
  5. package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
  6. package/deps/rocksdb/rocksdb/Makefile +20 -9
  7. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
  8. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
  9. package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
  10. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
  11. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
  13. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
  14. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
  15. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
  16. package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
  17. package/deps/rocksdb/rocksdb/db/c.cc +207 -0
  18. package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
  19. package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
  20. package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
  21. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
  23. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
  24. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
  25. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
  26. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
  27. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
  28. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
  29. package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
  30. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
  31. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
  32. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
  37. package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
  38. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
  39. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
  40. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
  41. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
  42. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
  43. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
  44. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
  45. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
  46. package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
  47. package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
  48. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
  49. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
  50. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
  51. package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
  52. package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
  53. package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
  54. package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
  55. package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
  56. package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
  57. package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
  58. package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
  59. package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
  60. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
  61. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
  62. package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
  63. package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
  64. package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
  65. package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
  66. package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
  67. package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
  68. package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
  69. package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
  70. package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
  71. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  72. package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
  73. package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
  74. package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
  75. package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
  76. package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
  77. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
  78. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
  79. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
  80. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
  81. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
  82. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
  83. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
  84. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
  85. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
  86. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
  87. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
  88. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
  89. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
  90. package/deps/rocksdb/rocksdb/env/env.cc +12 -0
  91. package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
  92. package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
  93. package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
  94. package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
  95. package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
  96. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
  97. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
  98. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
  99. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
  100. package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
  101. package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
  102. package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
  103. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
  104. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
  105. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
  106. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
  107. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
  108. package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
  109. package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
  110. package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
  111. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
  112. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
  113. package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
  114. package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
  115. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
  116. package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
  117. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
  119. package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
  120. package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
  122. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
  124. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
  127. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  128. package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
  129. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
  130. package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
  131. package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
  132. package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
  133. package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
  134. package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
  137. package/deps/rocksdb/rocksdb/options/options.cc +2 -0
  138. package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
  139. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
  140. package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
  141. package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
  142. package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
  143. package/deps/rocksdb/rocksdb/src.mk +8 -2
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
  146. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
  147. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
  148. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
  149. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
  150. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
  151. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
  152. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
  153. package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
  154. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
  155. package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
  156. package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
  157. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
  158. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
  159. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
  160. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
  161. package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
  162. package/deps/rocksdb/rocksdb/table/format.cc +6 -12
  163. package/deps/rocksdb/rocksdb/table/format.h +10 -0
  164. package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
  165. package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
  166. package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
  167. package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
  168. package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
  169. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
  170. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
  171. package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
  172. package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
  173. package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
  174. package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
  175. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
  176. package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
  177. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
  178. package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
  179. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
  180. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
  181. package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
  182. package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
  183. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
  184. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
  185. package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
  186. package/deps/rocksdb/rocksdb/util/coding.h +3 -3
  187. package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
  188. package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
  189. package/deps/rocksdb/rocksdb/util/compression.h +5 -0
  190. package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
  191. package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
  192. package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
  193. package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
  194. package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
  195. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
  196. package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
  197. package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
  198. package/deps/rocksdb/rocksdb/util/status.cc +1 -0
  199. package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
  200. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
  201. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
  202. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
  203. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
  204. package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
  205. package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
  206. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
  207. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
  208. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
  209. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
  210. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
  211. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
  212. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
  213. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
  214. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
  215. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
  216. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
  217. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
  218. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
  219. package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
  220. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
  221. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
  222. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
  223. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
  224. package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
  225. package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
  226. package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
  227. package/index.js +3 -3
  228. package/package.json +1 -1
  229. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  230. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  231. package/util.h +38 -12
  232. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
@@ -154,19 +154,28 @@ const Slice& Decompressor::GetSerializedDict() const {
154
154
 
155
155
  namespace {
156
156
 
157
- class BuiltinCompressorV1 : public Compressor {
157
+ class CompressorBase : public Compressor {
158
+ public:
159
+ explicit CompressorBase(const CompressionOptions& opts) : opts_(opts) {}
160
+
161
+ protected:
162
+ CompressionOptions opts_;
163
+ };
164
+
165
+ class BuiltinCompressorV1 : public CompressorBase {
158
166
  public:
159
167
  const char* Name() const override { return "BuiltinCompressorV1"; }
160
168
 
161
169
  explicit BuiltinCompressorV1(const CompressionOptions& opts,
162
170
  CompressionType type)
163
- : opts_(opts), type_(type) {
171
+ : CompressorBase(opts), type_(type) {
164
172
  assert(type != kNoCompression);
165
173
  }
166
174
 
167
175
  CompressionType GetPreferredCompressionType() const override { return type_; }
168
176
 
169
- Status CompressBlock(Slice uncompressed_data, std::string* compressed_output,
177
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
178
+ size_t* compressed_output_size,
170
179
  CompressionType* out_compression_type,
171
180
  ManagedWorkingArea* wa) override {
172
181
  std::optional<CompressionContext> tmp_ctx;
@@ -179,47 +188,762 @@ class BuiltinCompressorV1 : public Compressor {
179
188
  ctx = &*tmp_ctx;
180
189
  }
181
190
  CompressionInfo info(opts_, *ctx, CompressionDict::GetEmptyDict(), type_);
191
+ std::string str_output;
192
+ str_output.reserve(uncompressed_data.size());
182
193
  if (!OLD_CompressData(uncompressed_data, info,
183
- 1 /*compress_format_version*/, compressed_output)) {
194
+ 1 /*compress_format_version*/, &str_output)) {
195
+ // Maybe rejected or bypassed
196
+ *compressed_output_size = str_output.size();
184
197
  *out_compression_type = kNoCompression;
185
198
  return Status::OK();
186
199
  }
200
+ if (str_output.size() > *compressed_output_size) {
201
+ // Compression rejected
202
+ *out_compression_type = kNoCompression;
203
+ return Status::OK();
204
+ }
205
+ std::memcpy(compressed_output, str_output.data(), str_output.size());
206
+ *compressed_output_size = str_output.size();
187
207
  *out_compression_type = type_;
188
208
  return Status::OK();
189
209
  }
190
210
 
191
211
  protected:
192
- const CompressionOptions opts_;
193
212
  const CompressionType type_;
194
213
  };
195
214
 
196
- class BuiltinCompressorV2 : public Compressor {
215
+ class CompressorWithSimpleDictBase : public CompressorBase {
197
216
  public:
198
- const char* Name() const override { return "BuiltinCompressorV2"; }
217
+ explicit CompressorWithSimpleDictBase(const CompressionOptions& opts,
218
+ std::string&& dict_data = {})
219
+ : CompressorBase(opts), dict_data_(std::move(dict_data)) {}
199
220
 
200
- explicit BuiltinCompressorV2(const CompressionOptions& opts,
201
- CompressionType type,
202
- CompressionDict&& dict = {})
203
- : opts_(opts), type_(type), dict_(std::move(dict)) {
204
- assert(type != kNoCompression);
221
+ size_t GetMaxSampleSizeIfWantDict(
222
+ CacheEntryRole /*block_type*/) const override {
223
+ return opts_.max_dict_bytes;
224
+ }
225
+
226
+ // NOTE: empty dict is equivalent to no dict
227
+ Slice GetSerializedDict() const override { return dict_data_; }
228
+
229
+ std::unique_ptr<Compressor> MaybeCloneSpecialized(
230
+ CacheEntryRole /*block_type*/,
231
+ DictSampleArgs&& dict_samples) final override {
232
+ assert(dict_samples.Verify());
233
+ if (dict_samples.empty()) {
234
+ // Nothing to specialize on
235
+ return nullptr;
236
+ } else {
237
+ return CloneForDict(std::move(dict_samples.sample_data));
238
+ }
239
+ }
240
+
241
+ virtual std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) = 0;
242
+
243
+ protected:
244
+ const std::string dict_data_;
245
+ };
246
+
247
+ // NOTE: the legacy behavior is to pretend to use dictionary compression when
248
+ // enabled, including storing a dictionary block, but to ignore it. That is
249
+ // matched here.
250
+ class BuiltinSnappyCompressorV2 : public CompressorWithSimpleDictBase {
251
+ public:
252
+ using CompressorWithSimpleDictBase::CompressorWithSimpleDictBase;
253
+
254
+ const char* Name() const override { return "BuiltinSnappyCompressorV2"; }
255
+
256
+ CompressionType GetPreferredCompressionType() const override {
257
+ return kSnappyCompression;
258
+ }
259
+
260
+ std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) override {
261
+ return std::make_unique<BuiltinSnappyCompressorV2>(opts_,
262
+ std::move(dict_data));
263
+ }
264
+
265
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
266
+ size_t* compressed_output_size,
267
+ CompressionType* out_compression_type,
268
+ ManagedWorkingArea*) override {
269
+ #ifdef SNAPPY
270
+ struct MySink : public snappy::Sink {
271
+ MySink(char* output, size_t output_size)
272
+ : output_(output), output_size_(output_size) {}
273
+
274
+ char* output_;
275
+ size_t output_size_;
276
+ size_t pos_ = 0;
277
+
278
+ void Append(const char* data, size_t n) override {
279
+ if (pos_ + n <= output_size_) {
280
+ std::memcpy(output_ + pos_, data, n);
281
+ pos_ += n;
282
+ } else {
283
+ // Virtual abort
284
+ pos_ = output_size_ + 1;
285
+ }
286
+ }
287
+
288
+ char* GetAppendBuffer(size_t length, char* scratch) override {
289
+ if (pos_ + length <= output_size_) {
290
+ return output_ + pos_;
291
+ }
292
+ return scratch;
293
+ }
294
+ };
295
+ MySink sink{compressed_output, *compressed_output_size};
296
+ snappy::ByteArraySource source{uncompressed_data.data(),
297
+ uncompressed_data.size()};
298
+
299
+ size_t outlen = snappy::Compress(&source, &sink);
300
+ if (outlen > 0 && sink.pos_ <= sink.output_size_) {
301
+ // Compression kept/successful
302
+ assert(outlen == sink.pos_);
303
+ *compressed_output_size = outlen;
304
+ *out_compression_type = kSnappyCompression;
305
+ return Status::OK();
306
+ }
307
+ // Compression rejected
308
+ *compressed_output_size = 1;
309
+ #else
310
+ (void)uncompressed_data;
311
+ (void)compressed_output;
312
+ // Compression bypassed (not supported)
313
+ *compressed_output_size = 0;
314
+ #endif
315
+ *out_compression_type = kNoCompression;
316
+ return Status::OK();
317
+ }
318
+
319
+ std::shared_ptr<Decompressor> GetOptimizedDecompressor() const override;
320
+ };
321
+
322
+ [[maybe_unused]]
323
+ std::pair<char*, size_t> StartCompressBlockV2(Slice uncompressed_data,
324
+ char* compressed_output,
325
+ size_t compressed_output_size) {
326
+ if ( // Can't compress more than 4GB
327
+ uncompressed_data.size() > std::numeric_limits<uint32_t>::max() ||
328
+ // Need enough output space for encoding uncompressed size
329
+ compressed_output_size <= 5) {
330
+ // Compression bypassed
331
+ return {nullptr, 0};
332
+ }
333
+ // Standard format for prepending uncompressed size to the compressed
334
+ // data in compress_format_version=2
335
+ char* alg_output = EncodeVarint32(
336
+ compressed_output, static_cast<uint32_t>(uncompressed_data.size()));
337
+ size_t alg_max_output_size =
338
+ compressed_output_size - (alg_output - compressed_output);
339
+ return {alg_output, alg_max_output_size};
340
+ }
341
+
342
+ class BuiltinZlibCompressorV2 : public CompressorWithSimpleDictBase {
343
+ public:
344
+ using CompressorWithSimpleDictBase::CompressorWithSimpleDictBase;
345
+
346
+ const char* Name() const override { return "BuiltinZlibCompressorV2"; }
347
+
348
+ CompressionType GetPreferredCompressionType() const override {
349
+ return kZlibCompression;
350
+ }
351
+
352
+ std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) override {
353
+ return std::make_unique<BuiltinZlibCompressorV2>(opts_,
354
+ std::move(dict_data));
355
+ }
356
+
357
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
358
+ size_t* compressed_output_size,
359
+ CompressionType* out_compression_type,
360
+ ManagedWorkingArea*) override {
361
+ #ifdef ZLIB
362
+ auto [alg_output, alg_max_output_size] = StartCompressBlockV2(
363
+ uncompressed_data, compressed_output, *compressed_output_size);
364
+ if (alg_max_output_size == 0) {
365
+ // Compression bypassed
366
+ *compressed_output_size = 0;
367
+ *out_compression_type = kNoCompression;
368
+ return Status::OK();
369
+ }
370
+
371
+ // The memLevel parameter specifies how much memory should be allocated for
372
+ // the internal compression state.
373
+ // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
374
+ // memLevel=9 uses maximum memory for optimal speed.
375
+ // The default value is 8. See zconf.h for more details.
376
+ static const int memLevel = 8;
377
+ int level = opts_.level;
378
+ if (level == CompressionOptions::kDefaultCompressionLevel) {
379
+ level = Z_DEFAULT_COMPRESSION;
380
+ }
381
+
382
+ z_stream stream;
383
+ memset(&stream, 0, sizeof(z_stream));
384
+
385
+ // Initialize the zlib stream
386
+ int st = deflateInit2(&stream, level, Z_DEFLATED, opts_.window_bits,
387
+ memLevel, opts_.strategy);
388
+ if (st != Z_OK) {
389
+ *compressed_output_size = 0;
390
+ *out_compression_type = kNoCompression;
391
+ return Status::OK();
392
+ }
393
+
394
+ // Set dictionary if available
395
+ if (!dict_data_.empty()) {
396
+ st = deflateSetDictionary(
397
+ &stream, reinterpret_cast<const Bytef*>(dict_data_.data()),
398
+ static_cast<unsigned int>(dict_data_.size()));
399
+ if (st != Z_OK) {
400
+ deflateEnd(&stream);
401
+ *compressed_output_size = 0;
402
+ *out_compression_type = kNoCompression;
403
+ return Status::OK();
404
+ }
405
+ }
406
+
407
+ // Set up input
408
+ stream.next_in = (Bytef*)uncompressed_data.data();
409
+ stream.avail_in = static_cast<unsigned int>(uncompressed_data.size());
410
+
411
+ // Set up output
412
+ stream.next_out = reinterpret_cast<Bytef*>(alg_output);
413
+ stream.avail_out = static_cast<unsigned int>(alg_max_output_size);
414
+
415
+ // Compress
416
+ st = deflate(&stream, Z_FINISH);
417
+ size_t outlen = alg_max_output_size - stream.avail_out;
418
+ deflateEnd(&stream);
419
+
420
+ if (st == Z_STREAM_END) {
421
+ // Compression kept/successful
422
+ *compressed_output_size =
423
+ outlen + /*header size*/ (alg_output - compressed_output);
424
+ *out_compression_type = kZlibCompression;
425
+ return Status::OK();
426
+ }
427
+ // Compression failed or rejected
428
+ *compressed_output_size = 1;
429
+ #else
430
+ (void)uncompressed_data;
431
+ (void)compressed_output;
432
+ // Compression bypassed (not supported)
433
+ *compressed_output_size = 0;
434
+ #endif
435
+ *out_compression_type = kNoCompression;
436
+ return Status::OK();
437
+ }
438
+ };
439
+
440
+ class BuiltinBZip2CompressorV2 : public CompressorWithSimpleDictBase {
441
+ public:
442
+ using CompressorWithSimpleDictBase::CompressorWithSimpleDictBase;
443
+
444
+ const char* Name() const override { return "BuiltinBZip2CompressorV2"; }
445
+
446
+ CompressionType GetPreferredCompressionType() const override {
447
+ return kBZip2Compression;
448
+ }
449
+
450
+ std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) override {
451
+ return std::make_unique<BuiltinBZip2CompressorV2>(opts_,
452
+ std::move(dict_data));
453
+ }
454
+
455
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
456
+ size_t* compressed_output_size,
457
+ CompressionType* out_compression_type,
458
+ ManagedWorkingArea*) override {
459
+ #ifdef BZIP2
460
+ auto [alg_output, alg_max_output_size] = StartCompressBlockV2(
461
+ uncompressed_data, compressed_output, *compressed_output_size);
462
+ if (alg_max_output_size == 0) {
463
+ // Compression bypassed
464
+ *compressed_output_size = 0;
465
+ *out_compression_type = kNoCompression;
466
+ return Status::OK();
467
+ }
468
+
469
+ // BZip2 doesn't actually use the dictionary, but we store it for
470
+ // compatibility similar to BuiltinSnappyCompressorV2
471
+
472
+ // Initialize the bzip2 stream
473
+ bz_stream stream;
474
+ memset(&stream, 0, sizeof(bz_stream));
475
+
476
+ // Block size 1 is 100K.
477
+ // 0 is for silent.
478
+ // 30 is the default workFactor
479
+ int st = BZ2_bzCompressInit(&stream, 1, 0, 30);
480
+ if (st != BZ_OK) {
481
+ *compressed_output_size = 0;
482
+ *out_compression_type = kNoCompression;
483
+ return Status::OK();
484
+ }
485
+
486
+ // Set up input
487
+ stream.next_in = const_cast<char*>(uncompressed_data.data());
488
+ stream.avail_in = static_cast<unsigned int>(uncompressed_data.size());
489
+
490
+ // Set up output
491
+ stream.next_out = alg_output;
492
+ stream.avail_out = static_cast<unsigned int>(alg_max_output_size);
493
+
494
+ // Compress
495
+ st = BZ2_bzCompress(&stream, BZ_FINISH);
496
+ size_t outlen = alg_max_output_size - stream.avail_out;
497
+ BZ2_bzCompressEnd(&stream);
498
+
499
+ // Check for success
500
+ if (st == BZ_STREAM_END) {
501
+ // Compression kept/successful
502
+ *compressed_output_size = outlen + (alg_output - compressed_output);
503
+ *out_compression_type = kBZip2Compression;
504
+ return Status::OK();
505
+ }
506
+ // Compression failed or rejected
507
+ *compressed_output_size = 1;
508
+ #else
509
+ (void)uncompressed_data;
510
+ (void)compressed_output;
511
+ // Compression bypassed (not supported)
512
+ *compressed_output_size = 0;
513
+ #endif
514
+ *out_compression_type = kNoCompression;
515
+ return Status::OK();
516
+ }
517
+ };
518
+
519
+ class BuiltinLZ4CompressorV2WithDict : public CompressorWithSimpleDictBase {
520
+ public:
521
+ using CompressorWithSimpleDictBase::CompressorWithSimpleDictBase;
522
+
523
+ const char* Name() const override { return "BuiltinLZ4CompressorV2"; }
524
+
525
+ CompressionType GetPreferredCompressionType() const override {
526
+ return kLZ4Compression;
527
+ }
528
+
529
+ std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) override {
530
+ return std::make_unique<BuiltinLZ4CompressorV2WithDict>(
531
+ opts_, std::move(dict_data));
532
+ }
533
+
534
+ ManagedWorkingArea ObtainWorkingArea() override {
535
+ #ifdef LZ4
536
+ return {reinterpret_cast<WorkingArea*>(LZ4_createStream()), this};
537
+ #else
538
+ return {};
539
+ #endif
540
+ }
541
+ void ReleaseWorkingArea(WorkingArea* wa) override {
542
+ if (wa) {
543
+ #ifdef LZ4
544
+ LZ4_freeStream(reinterpret_cast<LZ4_stream_t*>(wa));
545
+ #endif
546
+ }
547
+ }
548
+
549
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
550
+ size_t* compressed_output_size,
551
+ CompressionType* out_compression_type,
552
+ ManagedWorkingArea* wa) override {
553
+ #ifdef LZ4
554
+ auto [alg_output, alg_max_output_size] = StartCompressBlockV2(
555
+ uncompressed_data, compressed_output, *compressed_output_size);
556
+ if (alg_max_output_size == 0) {
557
+ // Compression bypassed
558
+ *compressed_output_size = 0;
559
+ *out_compression_type = kNoCompression;
560
+ return Status::OK();
561
+ }
562
+
563
+ ManagedWorkingArea tmp_wa;
564
+ LZ4_stream_t* stream;
565
+ if (wa != nullptr && wa->owner() == this) {
566
+ stream = reinterpret_cast<LZ4_stream_t*>(wa->get());
567
+ #if LZ4_VERSION_NUMBER >= 10900 // >= version 1.9.0
568
+ LZ4_resetStream_fast(stream);
569
+ #else
570
+ LZ4_resetStream(stream);
571
+ #endif
572
+ } else {
573
+ tmp_wa = ObtainWorkingArea();
574
+ stream = reinterpret_cast<LZ4_stream_t*>(tmp_wa.get());
575
+ }
576
+ if (!dict_data_.empty()) {
577
+ // TODO: more optimization possible here?
578
+ LZ4_loadDict(stream, dict_data_.data(),
579
+ static_cast<int>(dict_data_.size()));
580
+ }
581
+ int acceleration;
582
+ if (opts_.level < 0) {
583
+ acceleration = -opts_.level;
584
+ } else {
585
+ acceleration = 1;
586
+ }
587
+ auto outlen = LZ4_compress_fast_continue(
588
+ stream, uncompressed_data.data(), alg_output,
589
+ static_cast<int>(uncompressed_data.size()),
590
+ static_cast<int>(alg_max_output_size), acceleration);
591
+ if (outlen > 0) {
592
+ // Compression kept/successful
593
+ size_t output_size = static_cast<size_t>(
594
+ outlen + /*header size*/ (alg_output - compressed_output));
595
+ assert(output_size <= *compressed_output_size);
596
+ *compressed_output_size = output_size;
597
+ *out_compression_type = kLZ4Compression;
598
+ return Status::OK();
599
+ }
600
+ // Compression rejected
601
+ *compressed_output_size = 1;
602
+ #else
603
+ (void)uncompressed_data;
604
+ (void)compressed_output;
605
+ (void)wa;
606
+ // Compression bypassed (not supported)
607
+ *compressed_output_size = 0;
608
+ #endif
609
+ *out_compression_type = kNoCompression;
610
+ return Status::OK();
611
+ }
612
+ };
613
+
614
+ class BuiltinLZ4CompressorV2NoDict : public BuiltinLZ4CompressorV2WithDict {
615
+ public:
616
+ BuiltinLZ4CompressorV2NoDict(const CompressionOptions& opts)
617
+ : BuiltinLZ4CompressorV2WithDict(opts, /*dict_data=*/{}) {}
618
+
619
+ ManagedWorkingArea ObtainWorkingArea() override {
620
+ // Using an LZ4_stream_t between compressions and resetting with
621
+ // LZ4_resetStream_fast is actually slower than using a fresh LZ4_stream_t
622
+ // each time, or not involving a stream at all. Similarly, using an extState
623
+ // does not seem to offer a performance boost, perhaps a small regression.
624
+ return {};
205
625
  }
206
626
 
627
+ void ReleaseWorkingArea(WorkingArea* wa) override {
628
+ // Should not be called
629
+ (void)wa;
630
+ assert(wa == nullptr);
631
+ }
632
+
633
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
634
+ size_t* compressed_output_size,
635
+ CompressionType* out_compression_type,
636
+ ManagedWorkingArea* wa) override {
637
+ #ifdef LZ4
638
+ (void)wa;
639
+ auto [alg_output, alg_max_output_size] = StartCompressBlockV2(
640
+ uncompressed_data, compressed_output, *compressed_output_size);
641
+ if (alg_max_output_size == 0) {
642
+ // Compression bypassed
643
+ *compressed_output_size = 0;
644
+ *out_compression_type = kNoCompression;
645
+ return Status::OK();
646
+ }
647
+ int acceleration;
648
+ if (opts_.level < 0) {
649
+ acceleration = -opts_.level;
650
+ } else {
651
+ acceleration = 1;
652
+ }
653
+ auto outlen =
654
+ LZ4_compress_fast(uncompressed_data.data(), alg_output,
655
+ static_cast<int>(uncompressed_data.size()),
656
+ static_cast<int>(alg_max_output_size), acceleration);
657
+ if (outlen > 0) {
658
+ // Compression kept/successful
659
+ size_t output_size = static_cast<size_t>(
660
+ outlen + /*header size*/ (alg_output - compressed_output));
661
+ assert(output_size <= *compressed_output_size);
662
+ *compressed_output_size = output_size;
663
+ *out_compression_type = kLZ4Compression;
664
+ return Status::OK();
665
+ }
666
+ // Compression rejected
667
+ *compressed_output_size = 1;
668
+ #else
669
+ (void)uncompressed_data;
670
+ (void)compressed_output;
671
+ (void)wa;
672
+ // Compression bypassed (not supported)
673
+ *compressed_output_size = 0;
674
+ #endif
675
+ *out_compression_type = kNoCompression;
676
+ return Status::OK();
677
+ }
678
+ };
679
+
680
+ class BuiltinLZ4HCCompressorV2 : public CompressorWithSimpleDictBase {
681
+ public:
682
+ using CompressorWithSimpleDictBase::CompressorWithSimpleDictBase;
683
+
684
+ const char* Name() const override { return "BuiltinLZ4HCCompressorV2"; }
685
+
686
+ CompressionType GetPreferredCompressionType() const override {
687
+ return kLZ4HCCompression;
688
+ }
689
+
690
+ std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) override {
691
+ return std::make_unique<BuiltinLZ4HCCompressorV2>(opts_,
692
+ std::move(dict_data));
693
+ }
694
+
695
+ ManagedWorkingArea ObtainWorkingArea() override {
696
+ #ifdef LZ4
697
+ return {reinterpret_cast<WorkingArea*>(LZ4_createStreamHC()), this};
698
+ #else
699
+ return {};
700
+ #endif
701
+ }
702
+ void ReleaseWorkingArea(WorkingArea* wa) override {
703
+ if (wa) {
704
+ #ifdef LZ4
705
+ LZ4_freeStreamHC(reinterpret_cast<LZ4_streamHC_t*>(wa));
706
+ #endif
707
+ }
708
+ }
709
+
710
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
711
+ size_t* compressed_output_size,
712
+ CompressionType* out_compression_type,
713
+ ManagedWorkingArea* wa) override {
714
+ #ifdef LZ4
715
+ auto [alg_output, alg_max_output_size] = StartCompressBlockV2(
716
+ uncompressed_data, compressed_output, *compressed_output_size);
717
+ if (alg_max_output_size == 0) {
718
+ // Compression bypassed
719
+ *compressed_output_size = 0;
720
+ *out_compression_type = kNoCompression;
721
+ return Status::OK();
722
+ }
723
+
724
+ int level = opts_.level;
725
+ if (level == CompressionOptions::kDefaultCompressionLevel) {
726
+ level = 0; // lz4hc.h says any value < 1 will be sanitized to default
727
+ }
728
+
729
+ ManagedWorkingArea tmp_wa;
730
+ LZ4_streamHC_t* stream;
731
+ if (wa != nullptr && wa->owner() == this) {
732
+ stream = reinterpret_cast<LZ4_streamHC_t*>(wa->get());
733
+ } else {
734
+ tmp_wa = ObtainWorkingArea();
735
+ stream = reinterpret_cast<LZ4_streamHC_t*>(tmp_wa.get());
736
+ }
737
+ #if LZ4_VERSION_NUMBER >= 10900 // >= version 1.9.0
738
+ LZ4_resetStreamHC_fast(stream, level);
739
+ #else
740
+ LZ4_resetStreamHC(stream, level);
741
+ #endif
742
+ if (dict_data_.size() > 0) {
743
+ // TODO: more optimization possible here?
744
+ LZ4_loadDictHC(stream, dict_data_.data(),
745
+ static_cast<int>(dict_data_.size()));
746
+ }
747
+
748
+ auto outlen =
749
+ LZ4_compress_HC_continue(stream, uncompressed_data.data(), alg_output,
750
+ static_cast<int>(uncompressed_data.size()),
751
+ static_cast<int>(alg_max_output_size));
752
+ if (outlen > 0) {
753
+ // Compression kept/successful
754
+ size_t output_size = static_cast<size_t>(
755
+ outlen + /*header size*/ (alg_output - compressed_output));
756
+ assert(output_size <= *compressed_output_size);
757
+ *compressed_output_size = output_size;
758
+ *out_compression_type = kLZ4HCCompression;
759
+ return Status::OK();
760
+ }
761
+ // Compression rejected
762
+ *compressed_output_size = 1;
763
+ #else
764
+ (void)uncompressed_data;
765
+ (void)compressed_output;
766
+ (void)wa;
767
+ // Compression bypassed (not supported)
768
+ *compressed_output_size = 0;
769
+ #endif
770
+ *out_compression_type = kNoCompression;
771
+ return Status::OK();
772
+ }
773
+ };
774
+
775
+ class BuiltinXpressCompressorV2 : public CompressorWithSimpleDictBase {
776
+ public:
777
+ using CompressorWithSimpleDictBase::CompressorWithSimpleDictBase;
778
+
779
+ const char* Name() const override { return "BuiltinXpressCompressorV2"; }
780
+
781
+ CompressionType GetPreferredCompressionType() const override {
782
+ return kXpressCompression;
783
+ }
784
+
785
+ std::unique_ptr<Compressor> CloneForDict(std::string&& dict_data) override {
786
+ return std::make_unique<BuiltinXpressCompressorV2>(opts_,
787
+ std::move(dict_data));
788
+ }
789
+
790
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
791
+ size_t* compressed_output_size,
792
+ CompressionType* out_compression_type,
793
+ ManagedWorkingArea*) override {
794
+ #ifdef XPRESS
795
+ // XPRESS doesn't actually use the dictionary, but we store it for
796
+ // compatibility similar to BuiltinSnappyCompressorV2
797
+
798
+ // Use the new CompressWithMaxSize function that writes directly to the
799
+ // output buffer
800
+ size_t compressed_size = port::xpress::CompressWithMaxSize(
801
+ uncompressed_data.data(), uncompressed_data.size(), compressed_output,
802
+ *compressed_output_size);
803
+
804
+ if (compressed_size > 0) {
805
+ // Compression kept/successful
806
+ *compressed_output_size = compressed_size;
807
+ *out_compression_type = kXpressCompression;
808
+ return Status::OK();
809
+ }
810
+
811
+ // Compression rejected or failed
812
+ *compressed_output_size = 1;
813
+ #else
814
+ (void)uncompressed_data;
815
+ (void)compressed_output;
816
+ // Compression bypassed (not supported)
817
+ *compressed_output_size = 0;
818
+ #endif
819
+ *out_compression_type = kNoCompression;
820
+ return Status::OK();
821
+ }
822
+ };
823
+
824
+ class BuiltinZSTDCompressorV2 : public CompressorBase {
825
+ public:
826
+ explicit BuiltinZSTDCompressorV2(const CompressionOptions& opts,
827
+ CompressionDict&& dict = {})
828
+ : CompressorBase(opts), dict_(std::move(dict)) {}
829
+
830
+ const char* Name() const override { return "BuiltinZSTDCompressorV2"; }
831
+
832
+ CompressionType GetPreferredCompressionType() const override { return kZSTD; }
833
+
207
834
  size_t GetMaxSampleSizeIfWantDict(
208
835
  CacheEntryRole /*block_type*/) const override {
209
836
  if (opts_.max_dict_bytes == 0) {
210
837
  // Dictionary compression disabled
211
838
  return 0;
212
839
  } else {
213
- return type_ == kZSTD && opts_.zstd_max_train_bytes > 0
214
- ? opts_.zstd_max_train_bytes
215
- : opts_.max_dict_bytes;
840
+ return opts_.zstd_max_train_bytes > 0 ? opts_.zstd_max_train_bytes
841
+ : opts_.max_dict_bytes;
216
842
  }
217
843
  }
218
844
 
219
845
  // NOTE: empty dict is equivalent to no dict
220
846
  Slice GetSerializedDict() const override { return dict_.GetRawDict(); }
221
847
 
222
- CompressionType GetPreferredCompressionType() const override { return type_; }
848
+ ManagedWorkingArea ObtainWorkingArea() override {
849
+ #ifdef ZSTD
850
+ ZSTD_CCtx* ctx =
851
+ #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
852
+ ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
853
+ #else // ROCKSDB_ZSTD_CUSTOM_MEM
854
+ ZSTD_createCCtx();
855
+ #endif // ROCKSDB_ZSTD_CUSTOM_MEM
856
+ auto level = opts_.level;
857
+ if (level == CompressionOptions::kDefaultCompressionLevel) {
858
+ // NB: ZSTD_CLEVEL_DEFAULT is historically == 3
859
+ level = ZSTD_CLEVEL_DEFAULT;
860
+ }
861
+ size_t err = ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, level);
862
+ if (ZSTD_isError(err)) {
863
+ assert(false);
864
+ ZSTD_freeCCtx(ctx);
865
+ ctx = ZSTD_createCCtx();
866
+ }
867
+ if (opts_.checksum) {
868
+ err = ZSTD_CCtx_setParameter(ctx, ZSTD_c_checksumFlag, 1);
869
+ if (ZSTD_isError(err)) {
870
+ assert(false);
871
+ ZSTD_freeCCtx(ctx);
872
+ ctx = ZSTD_createCCtx();
873
+ }
874
+ }
875
+ return ManagedWorkingArea(reinterpret_cast<WorkingArea*>(ctx), this);
876
+ #else
877
+ return {};
878
+ #endif // ZSTD
879
+ }
880
+
881
+ void ReleaseWorkingArea(WorkingArea* wa) override {
882
+ if (wa) {
883
+ #ifdef ZSTD
884
+ ZSTD_freeCCtx(reinterpret_cast<ZSTD_CCtx*>(wa));
885
+ #endif // ZSTD
886
+ }
887
+ }
888
+
889
+ Status CompressBlock(Slice uncompressed_data, char* compressed_output,
890
+ size_t* compressed_output_size,
891
+ CompressionType* out_compression_type,
892
+ ManagedWorkingArea* wa) override {
893
+ #ifdef ZSTD
894
+ auto [alg_output, alg_max_output_size] = StartCompressBlockV2(
895
+ uncompressed_data, compressed_output, *compressed_output_size);
896
+ if (alg_max_output_size == 0) {
897
+ // Compression bypassed
898
+ *compressed_output_size = 0;
899
+ *out_compression_type = kNoCompression;
900
+ return Status::OK();
901
+ }
902
+
903
+ ManagedWorkingArea tmp_wa;
904
+ if (wa == nullptr || wa->owner() != this) {
905
+ tmp_wa = ObtainWorkingArea();
906
+ wa = &tmp_wa;
907
+ }
908
+ assert(wa->get() != nullptr);
909
+ ZSTD_CCtx* ctx = reinterpret_cast<ZSTD_CCtx*>(wa->get());
910
+
911
+ if (dict_.GetDigestedZstdCDict() != nullptr) {
912
+ ZSTD_CCtx_refCDict(ctx, dict_.GetDigestedZstdCDict());
913
+ } else {
914
+ ZSTD_CCtx_loadDictionary(ctx, dict_.GetRawDict().data(),
915
+ dict_.GetRawDict().size());
916
+ }
917
+
918
+ // Compression level is set in `contex` during ObtainWorkingArea()
919
+ size_t outlen =
920
+ ZSTD_compress2(ctx, alg_output, alg_max_output_size,
921
+ uncompressed_data.data(), uncompressed_data.size());
922
+ if (!ZSTD_isError(outlen)) {
923
+ // Compression kept/successful
924
+ size_t output_size = static_cast<size_t>(
925
+ outlen + /*header size*/ (alg_output - compressed_output));
926
+ assert(output_size <= *compressed_output_size);
927
+ *compressed_output_size = output_size;
928
+ *out_compression_type = kZSTD;
929
+ return Status::OK();
930
+ }
931
+ if (ZSTD_getErrorCode(outlen) != ZSTD_error_dstSize_tooSmall) {
932
+ return Status::Corruption(std::string("ZSTD_compress2 failed: ") +
933
+ ZSTD_getErrorName(outlen));
934
+ }
935
+ // Compression rejected
936
+ *compressed_output_size = 1;
937
+ #else
938
+ (void)uncompressed_data;
939
+ (void)compressed_output;
940
+ (void)wa;
941
+ // Compression bypassed (not supported)
942
+ *compressed_output_size = 0;
943
+ #endif
944
+ *out_compression_type = kNoCompression;
945
+ return Status::OK();
946
+ }
223
947
 
224
948
  std::unique_ptr<Compressor> MaybeCloneSpecialized(
225
949
  CacheEntryRole /*block_type*/, DictSampleArgs&& dict_samples) override {
@@ -230,7 +954,7 @@ class BuiltinCompressorV2 : public Compressor {
230
954
  }
231
955
  std::string dict_data;
232
956
  // Migrated from BlockBasedTableBuilder::EnterUnbuffered()
233
- if (type_ == kZSTD && opts_.zstd_max_train_bytes > 0) {
957
+ if (opts_.zstd_max_train_bytes > 0) {
234
958
  assert(dict_samples.sample_data.size() <= opts_.zstd_max_train_bytes);
235
959
  if (opts_.use_zstd_dict_trainer) {
236
960
  dict_data = ZSTD_TrainDictionary(dict_samples.sample_data,
@@ -247,43 +971,13 @@ class BuiltinCompressorV2 : public Compressor {
247
971
  // dictionary." Or similar for other compressions.
248
972
  dict_data = std::move(dict_samples.sample_data);
249
973
  }
250
- CompressionDict dict{std::move(dict_data), type_, opts_.level};
251
- return std::make_unique<BuiltinCompressorV2>(opts_, type_, std::move(dict));
974
+ CompressionDict dict{std::move(dict_data), kZSTD, opts_.level};
975
+ return std::make_unique<BuiltinZSTDCompressorV2>(opts_, std::move(dict));
252
976
  }
253
977
 
254
- // TODO: use ZSTD_CCtx directly
255
- ManagedWorkingArea ObtainWorkingArea() override {
256
- return ManagedWorkingArea(new CompressionContext(type_, opts_), this);
257
- }
258
- void ReleaseWorkingArea(WorkingArea* wa) override {
259
- delete static_cast<CompressionContext*>(wa);
260
- }
261
- Status CompressBlock(Slice uncompressed_data, std::string* compressed_output,
262
- CompressionType* out_compression_type,
263
- ManagedWorkingArea* wa) override {
264
- std::optional<CompressionContext> tmp_ctx;
265
- CompressionContext* ctx = nullptr;
266
- if (wa != nullptr && wa->owner() == this) {
267
- ctx = static_cast<CompressionContext*>(wa->get());
268
- }
269
- CompressionType type = type_;
270
- if (ctx == nullptr) {
271
- tmp_ctx.emplace(type, opts_);
272
- ctx = &*tmp_ctx;
273
- }
274
- CompressionInfo info(opts_, *ctx, dict_, type);
275
- if (!OLD_CompressData(uncompressed_data, info,
276
- 2 /*compress_format_version*/, compressed_output)) {
277
- *out_compression_type = kNoCompression;
278
- return Status::OK();
279
- }
280
- *out_compression_type = type;
281
- return Status::OK();
282
- }
978
+ std::shared_ptr<Decompressor> GetOptimizedDecompressor() const override;
283
979
 
284
980
  protected:
285
- const CompressionOptions opts_;
286
- const CompressionType type_;
287
981
  const CompressionDict dict_;
288
982
  };
289
983
 
@@ -480,7 +1174,6 @@ Status LZ4_DecompressBlock(const Decompressor::Args& args, Slice dict,
480
1174
  char* uncompressed_output) {
481
1175
  #ifdef LZ4
482
1176
  int expected_uncompressed_size = static_cast<int>(args.uncompressed_size);
483
- #if LZ4_VERSION_NUMBER >= 10400 // r124+
484
1177
  LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
485
1178
  if (!dict.empty()) {
486
1179
  LZ4_setStreamDecode(stream, dict.data(), static_cast<int>(dict.size()));
@@ -490,16 +1183,6 @@ Status LZ4_DecompressBlock(const Decompressor::Args& args, Slice dict,
490
1183
  static_cast<int>(args.compressed_data.size()),
491
1184
  expected_uncompressed_size);
492
1185
  LZ4_freeStreamDecode(stream);
493
- #else // up to r123
494
- if (!dict.empty()) {
495
- return Status::NotSupported(
496
- "This build doesn't support dictionary compression with LZ4");
497
- }
498
- int uncompressed_size =
499
- LZ4_decompress_safe(args.compressed_data.data(), uncompressed_output,
500
- static_cast<int>(args.compressed_data.size()),
501
- expected_uncompressed_size);
502
- #endif // LZ4_VERSION_NUMBER >= 10400
503
1186
 
504
1187
  if (uncompressed_size != expected_uncompressed_size) {
505
1188
  if (uncompressed_size < 0) {
@@ -799,7 +1482,7 @@ class BuiltinDecompressorV2OptimizeZstd : public BuiltinDecompressorV2 {
799
1482
  class BuiltinDecompressorV2OptimizeZstdWithDict
800
1483
  : public BuiltinDecompressorV2OptimizeZstd {
801
1484
  public:
802
- BuiltinDecompressorV2OptimizeZstdWithDict(const Slice& dict)
1485
+ explicit BuiltinDecompressorV2OptimizeZstdWithDict(const Slice& dict)
803
1486
  :
804
1487
  #ifdef ROCKSDB_ZSTD_DDICT
805
1488
  dict_(dict),
@@ -875,14 +1558,29 @@ class BuiltinCompressionManagerV2 : public CompressionManager {
875
1558
  // No acceptable compression ratio => no compression
876
1559
  return nullptr;
877
1560
  }
878
- if (type > kLastBuiltinCompression) {
879
- // Unrecognized; fall back on default compression
1561
+ if (!SupportsCompressionType(type)) {
1562
+ // Unrecognized or support not compiled in. Fall back on default
880
1563
  type = ColumnFamilyOptions{}.compression;
881
1564
  }
882
- if (type == kNoCompression) {
883
- return nullptr;
884
- } else {
885
- return std::make_unique<BuiltinCompressorV2>(opts, type);
1565
+ switch (type) {
1566
+ case kNoCompression:
1567
+ default:
1568
+ assert(type == kNoCompression); // Others should be excluded above
1569
+ return nullptr;
1570
+ case kSnappyCompression:
1571
+ return std::make_unique<BuiltinSnappyCompressorV2>(opts);
1572
+ case kZlibCompression:
1573
+ return std::make_unique<BuiltinZlibCompressorV2>(opts);
1574
+ case kBZip2Compression:
1575
+ return std::make_unique<BuiltinBZip2CompressorV2>(opts);
1576
+ case kLZ4Compression:
1577
+ return std::make_unique<BuiltinLZ4CompressorV2NoDict>(opts);
1578
+ case kLZ4HCCompression:
1579
+ return std::make_unique<BuiltinLZ4HCCompressorV2>(opts);
1580
+ case kXpressCompression:
1581
+ return std::make_unique<BuiltinXpressCompressorV2>(opts);
1582
+ case kZSTD:
1583
+ return std::make_unique<BuiltinZSTDCompressorV2>(opts);
886
1584
  }
887
1585
  }
888
1586
 
@@ -913,20 +1611,6 @@ class BuiltinCompressionManagerV2 : public CompressionManager {
913
1611
  return GetGeneralDecompressor();
914
1612
  }
915
1613
  }
916
- std::shared_ptr<Decompressor> GetDecompressorForCompressor(
917
- const Compressor& compressor) override {
918
- #ifdef ROCKSDB_USE_RTTI
919
- // To be extra safe, only optimize here if we are certain we are not
920
- // looking at a wrapped compressor, so that we are sure it only uses that
921
- // one compression type.
922
- if (dynamic_cast<const BuiltinCompressorV2*>(&compressor)) {
923
- CompressionType type = compressor.GetPreferredCompressionType();
924
- return GetDecompressorForTypes(&type, &type + 1);
925
- }
926
- #endif
927
- // Fallback
928
- return CompressionManager::GetDecompressorForCompressor(compressor);
929
- }
930
1614
 
931
1615
  bool SupportsCompressionType(CompressionType type) const override {
932
1616
  return CompressionTypeSupported(type);
@@ -937,6 +1621,7 @@ class BuiltinCompressionManagerV2 : public CompressionManager {
937
1621
  BuiltinDecompressorV2OptimizeZstd zstd_decompressor_;
938
1622
  BuiltinDecompressorV2SnappyOnly snappy_decompressor_;
939
1623
 
1624
+ public:
940
1625
  inline std::shared_ptr<Decompressor> GetGeneralDecompressor() {
941
1626
  return std::shared_ptr<Decompressor>(shared_from_this(), &decompressor_);
942
1627
  }
@@ -959,6 +1644,16 @@ const std::shared_ptr<BuiltinCompressionManagerV2>
959
1644
  kBuiltinCompressionManagerV2 =
960
1645
  std::make_shared<BuiltinCompressionManagerV2>();
961
1646
 
1647
+ std::shared_ptr<Decompressor>
1648
+ BuiltinZSTDCompressorV2::GetOptimizedDecompressor() const {
1649
+ return kBuiltinCompressionManagerV2->GetZstdDecompressor();
1650
+ }
1651
+
1652
+ std::shared_ptr<Decompressor>
1653
+ BuiltinSnappyCompressorV2::GetOptimizedDecompressor() const {
1654
+ return kBuiltinCompressionManagerV2->GetSnappyDecompressor();
1655
+ }
1656
+
962
1657
  } // namespace
963
1658
 
964
1659
  Status CompressionManager::CreateFromString(