@nxtedition/rocksdb 13.5.9 → 13.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/deps/rocksdb/rocksdb/BUCK +2 -1
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +2 -1
  3. package/deps/rocksdb/rocksdb/Makefile +1 -1
  4. package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +4 -5
  5. package/deps/rocksdb/rocksdb/db/c.cc +13 -0
  6. package/deps/rocksdb/rocksdb/db/c_test.c +0 -12
  7. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +8 -8
  8. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +2 -3
  9. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +5 -4
  10. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +2 -1
  11. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +10 -10
  12. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +11 -6
  13. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +10 -16
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc +2 -4
  15. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +12 -17
  16. package/deps/rocksdb/rocksdb/db/compaction/compaction_service_test.cc +164 -0
  17. package/deps/rocksdb/rocksdb/db/corruption_test.cc +74 -3
  18. package/deps/rocksdb/rocksdb/db/db_encryption_test.cc +39 -4
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +2 -83
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +0 -4
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -11
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_follower.cc +0 -3
  23. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +0 -9
  24. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +16 -54
  25. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +0 -6
  26. package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +186 -0
  27. package/deps/rocksdb/rocksdb/db/db_secondary_test.cc +3 -40
  28. package/deps/rocksdb/rocksdb/db/db_sst_test.cc +0 -54
  29. package/deps/rocksdb/rocksdb/db/db_test.cc +0 -292
  30. package/deps/rocksdb/rocksdb/db/db_test2.cc +0 -1235
  31. package/deps/rocksdb/rocksdb/db/db_test_util.h +8 -0
  32. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +11 -4
  33. package/deps/rocksdb/rocksdb/db/log_reader.cc +11 -11
  34. package/deps/rocksdb/rocksdb/db/merge_helper.h +1 -1
  35. package/deps/rocksdb/rocksdb/db/multi_scan.cc +70 -0
  36. package/deps/rocksdb/rocksdb/db/version_set.cc +15 -8
  37. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +4 -0
  38. package/deps/rocksdb/rocksdb/env/composite_env.cc +4 -0
  39. package/deps/rocksdb/rocksdb/env/env.cc +4 -0
  40. package/deps/rocksdb/rocksdb/env/env_encryption.cc +38 -3
  41. package/deps/rocksdb/rocksdb/env/env_test.cc +36 -1
  42. package/deps/rocksdb/rocksdb/env/fs_posix.cc +20 -4
  43. package/deps/rocksdb/rocksdb/env/io_posix.cc +16 -0
  44. package/deps/rocksdb/rocksdb/env/io_posix.h +3 -0
  45. package/deps/rocksdb/rocksdb/env/mock_env.cc +5 -0
  46. package/deps/rocksdb/rocksdb/file/readahead_raf.cc +4 -0
  47. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -6
  48. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +5 -0
  49. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +25 -1
  50. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +10 -0
  51. package/deps/rocksdb/rocksdb/include/rocksdb/env_encryption.h +9 -0
  52. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +12 -0
  53. package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +12 -8
  54. package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +29 -28
  55. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +26 -6
  56. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +9 -0
  57. package/deps/rocksdb/rocksdb/include/rocksdb/tool_hooks.h +3 -0
  58. package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +142 -0
  59. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h +2 -0
  60. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +2 -2
  61. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +2 -0
  62. package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
  63. package/deps/rocksdb/rocksdb/options/options_helper.h +3 -0
  64. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +2 -0
  65. package/deps/rocksdb/rocksdb/port/win/io_win.cc +20 -0
  66. package/deps/rocksdb/rocksdb/port/win/io_win.h +4 -0
  67. package/deps/rocksdb/rocksdb/src.mk +2 -1
  68. package/deps/rocksdb/rocksdb/table/block_based/block.cc +31 -34
  69. package/deps/rocksdb/rocksdb/table/block_based/block.h +2 -4
  70. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +43 -7
  71. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +6 -0
  72. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +367 -2
  73. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +69 -23
  74. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +54 -6
  75. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +27 -5
  76. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +167 -3
  77. package/deps/rocksdb/rocksdb/table/block_based/block_builder.cc +6 -2
  78. package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +6 -0
  79. package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +12 -0
  80. package/deps/rocksdb/rocksdb/table/block_based/block_type.h +1 -0
  81. package/deps/rocksdb/rocksdb/table/block_based/filter_policy.cc +0 -3
  82. package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +10 -7
  83. package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +244 -0
  84. package/deps/rocksdb/rocksdb/table/external_table.cc +1 -1
  85. package/deps/rocksdb/rocksdb/table/format.cc +51 -33
  86. package/deps/rocksdb/rocksdb/table/format.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +13 -8
  88. package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +1 -3
  89. package/deps/rocksdb/rocksdb/table/sst_file_writer.cc +5 -1
  90. package/deps/rocksdb/rocksdb/table/table_test.cc +629 -1
  91. package/deps/rocksdb/rocksdb/test_util/testutil.cc +0 -1
  92. package/deps/rocksdb/rocksdb/test_util/testutil.h +5 -0
  93. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +183 -94
  94. package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +71 -0
  95. package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +37 -22
  96. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +308 -0
  97. package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +189 -0
  98. package/deps/rocksdb/rocksdb/util/cast_util.h +22 -11
  99. package/deps/rocksdb/rocksdb/util/coding.h +4 -3
  100. package/deps/rocksdb/rocksdb/util/compression.cc +2 -0
  101. package/deps/rocksdb/rocksdb/util/compression.h +16 -6
  102. package/deps/rocksdb/rocksdb/util/compression_test.cc +1679 -15
  103. package/deps/rocksdb/rocksdb/util/stop_watch.h +17 -7
  104. package/deps/rocksdb/rocksdb/util/timer_queue_test.cc +17 -3
  105. package/deps/rocksdb/rocksdb/utilities/env_mirror.cc +10 -0
  106. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.cc +5 -0
  107. package/deps/rocksdb/rocksdb/utilities/fault_injection_env.h +2 -0
  108. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +18 -2
  109. package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +22 -3
  110. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +5 -0
  111. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +22 -2
  112. package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +15 -4
  113. package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +61 -0
  114. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +18 -0
  115. package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h +3 -0
  116. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +3 -0
  117. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc +9 -3
  118. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +9 -0
  119. package/deps/rocksdb/rocksdb.gyp +15 -1
  120. package/package.json +1 -1
  121. package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
  122. package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
  123. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.cc +0 -131
  124. package/deps/rocksdb/rocksdb/util/auto_skip_compressor.h +0 -90
@@ -15,6 +15,7 @@
15
15
  #include "rocksdb/convenience.h"
16
16
  #include "rocksdb/filter_policy.h"
17
17
  #include "rocksdb/sst_dump_tool.h"
18
+ #include "rocksdb/utilities/object_registry.h"
18
19
  #include "table/block_based/block_based_table_factory.h"
19
20
  #include "table/sst_file_dumper.h"
20
21
  #include "table/table_builder.h"
@@ -346,6 +347,76 @@ TEST_F(SSTDumpToolTest, CompressedSizes) {
346
347
  }
347
348
  }
348
349
 
350
+ namespace {
351
+ using Compressor8A = test::CompressorCustomAlg<kCustomCompression8A>;
352
+ class MyManager : public CompressionManager {
353
+ public:
354
+ static constexpr const char* kCompatibilityName = "SSTDumpToolTest:MyManager";
355
+ const char* Name() const override { return kCompatibilityName; }
356
+ const char* CompatibilityName() const override { return kCompatibilityName; }
357
+
358
+ bool SupportsCompressionType(CompressionType type) const override {
359
+ return type == kCustomCompression8A;
360
+ }
361
+
362
+ std::unique_ptr<Compressor> GetCompressor(const CompressionOptions& /*opts*/,
363
+ CompressionType type) override {
364
+ switch (static_cast<unsigned char>(type)) {
365
+ case kCustomCompression8A:
366
+ return std::make_unique<Compressor8A>();
367
+ default:
368
+ return nullptr;
369
+ }
370
+ }
371
+
372
+ std::shared_ptr<Decompressor> GetDecompressor() override {
373
+ return std::make_shared<test::DecompressorCustomAlg>();
374
+ }
375
+ };
376
+ } // namespace
377
+
378
+ TEST_F(SSTDumpToolTest, CompressionManager) {
379
+ if (!Compressor8A::Supported()) {
380
+ fprintf(stderr,
381
+ "Prerequisite compression library not supported. Skipping\n");
382
+ return;
383
+ }
384
+
385
+ // Registery in ObjectLibrary to check that sst_dump can use named
386
+ // CompressionManagers with dependency injection
387
+ auto& library = *ObjectLibrary::Default();
388
+ library.AddFactory<CompressionManager>(
389
+ MyManager::kCompatibilityName,
390
+ [](const std::string& /*uri*/, std::unique_ptr<CompressionManager>* guard,
391
+ std::string* /*errmsg*/) {
392
+ *guard = std::make_unique<MyManager>();
393
+ return guard->get();
394
+ });
395
+
396
+ Options opts;
397
+ opts.env = env();
398
+ BlockBasedTableOptions table_opts;
399
+ table_opts.filter_policy.reset(
400
+ ROCKSDB_NAMESPACE::NewBloomFilterPolicy(10, false));
401
+ opts.table_factory.reset(new BlockBasedTableFactory(table_opts));
402
+ std::string file_path = MakeFilePath("rocksdb_sst_test.sst");
403
+ createSST(opts, file_path, 10);
404
+
405
+ char* usage[5];
406
+ PopulateCommandArgs(file_path, "--command=recompress", usage);
407
+ snprintf(usage[3], kOptLength, "--compression_manager=%s",
408
+ MyManager::kCompatibilityName);
409
+ snprintf(usage[4], kOptLength, "--compression_types=kCustomCompression8A");
410
+
411
+ ROCKSDB_NAMESPACE::SSTDumpTool tool;
412
+ ASSERT_TRUE(!tool.Run(5, usage, opts));
413
+
414
+ cleanup(opts, file_path);
415
+ for (int i = 0; i < 5; i++) {
416
+ delete[] usage[i];
417
+ }
418
+ }
419
+
349
420
  TEST_F(SSTDumpToolTest, MemEnv) {
350
421
  std::unique_ptr<Env> mem_env(NewMemEnv(env()));
351
422
  Options opts;
@@ -17,17 +17,6 @@
17
17
 
18
18
  namespace ROCKSDB_NAMESPACE {
19
19
 
20
- static const std::vector<std::pair<CompressionType, const char*>>
21
- kCompressions = {
22
- {CompressionType::kNoCompression, "kNoCompression"},
23
- {CompressionType::kSnappyCompression, "kSnappyCompression"},
24
- {CompressionType::kZlibCompression, "kZlibCompression"},
25
- {CompressionType::kBZip2Compression, "kBZip2Compression"},
26
- {CompressionType::kLZ4Compression, "kLZ4Compression"},
27
- {CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
28
- {CompressionType::kXpressCompression, "kXpressCompression"},
29
- {CompressionType::kZSTD, "kZSTD"}};
30
-
31
20
  namespace {
32
21
 
33
22
  void print_help(bool to_stderr) {
@@ -98,10 +87,15 @@ void print_help(bool to_stderr) {
98
87
  be used when trying different compression algorithms
99
88
 
100
89
  --compression_types=<comma-separated list of CompressionType members, e.g.,
101
- kSnappyCompression>
90
+ kSnappyCompression or kCustomCompressionC4>
102
91
  Can be combined with --command=recompress to run recompression for this
103
92
  list of compression types
104
- Supported compression types: %s
93
+ Supported built-in compression types: %s
94
+
95
+ --compression_manager=<compression manager string>
96
+ Used with --command=recompress to specify a compression manager to use
97
+ instead of the built-in compression manager, which may support a
98
+ different set of compression types.
105
99
 
106
100
  --parse_internal_key=<0xKEY>
107
101
  Convenience option to parse an internal key on the command line. Dumps the
@@ -178,7 +172,8 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
178
172
  std::string compression_level_to_str;
179
173
  size_t block_size = 0;
180
174
  size_t readahead_size = 2 * 1024 * 1024;
181
- std::vector<std::pair<CompressionType, const char*>> compression_types;
175
+ std::vector<CompressionType> compression_types;
176
+ std::shared_ptr<CompressionManager> compression_manager;
182
177
  uint64_t total_num_files = 0;
183
178
  uint64_t total_num_data_blocks = 0;
184
179
  uint64_t total_data_block_size = 0;
@@ -244,19 +239,36 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
244
239
  std::istringstream iss(compression_types_csv);
245
240
  std::string compression_type;
246
241
  has_specified_compression_types = true;
242
+
247
243
  while (std::getline(iss, compression_type, ',')) {
248
- auto iter = std::find_if(
249
- kCompressions.begin(), kCompressions.end(),
250
- [&compression_type](std::pair<CompressionType, const char*> curr) {
251
- return curr.second == compression_type;
252
- });
253
- if (iter == kCompressions.end()) {
244
+ auto iter =
245
+ OptionsHelper::compression_type_string_map.find(compression_type);
246
+ if (iter == OptionsHelper::compression_type_string_map.end()) {
254
247
  fprintf(stderr, "%s is not a valid CompressionType\n",
255
248
  compression_type.c_str());
256
249
  exit(1);
257
250
  }
258
- compression_types.emplace_back(*iter);
251
+ compression_types.emplace_back(iter->second);
252
+ }
253
+ } else if (strncmp(argv[i], "--compression_manager=", 22) == 0) {
254
+ std::string compression_manager_str = argv[i] + 22;
255
+ ConfigOptions config_options;
256
+ config_options.ignore_unsupported_options = false;
257
+ Status s = CompressionManager::CreateFromString(
258
+ config_options, compression_manager_str, &compression_manager);
259
+ if (!s.ok()) {
260
+ fprintf(stderr, "Failed to create compression manager: %s\n",
261
+ s.ToString().c_str());
262
+ exit(1);
263
+ }
264
+ if (compression_manager == nullptr) {
265
+ fprintf(stderr, "No compression manager created: %s\n",
266
+ compression_manager_str.c_str());
267
+ exit(1);
259
268
  }
269
+ options.compression_manager = compression_manager;
270
+ printf("Using compression manager: %s\n",
271
+ compression_manager->GetId().c_str());
260
272
  } else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
261
273
  std::string in_key(argv[i] + 21);
262
274
  try {
@@ -450,9 +462,12 @@ int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
450
462
  }
451
463
 
452
464
  if (command == "recompress") {
465
+ // TODO: consider getting supported compressions from the compression
466
+ // manager
453
467
  st = dumper.ShowAllCompressionSizes(
454
468
  set_block_size ? block_size : 16384,
455
- compression_types.empty() ? kCompressions : compression_types,
469
+ compression_types.empty() ? GetSupportedCompressions()
470
+ : compression_types,
456
471
  compress_level_from, compress_level_to, compression_max_dict_bytes,
457
472
  compression_zstd_max_train_bytes, compression_max_dict_buffer_bytes,
458
473
  !compression_use_zstd_finalize_dict);
@@ -0,0 +1,308 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+ //
6
+
7
+ #include "util/auto_tune_compressor.h"
8
+
9
+ #include "options/options_helper.h"
10
+ #include "rocksdb/advanced_compression.h"
11
+ #include "test_util/sync_point.h"
12
+ #include "util/random.h"
13
+ #include "util/stop_watch.h"
14
+ namespace ROCKSDB_NAMESPACE {
15
+ const std::vector<std::vector<int>> CostAwareCompressor::kCompressionLevels{
16
+ {0}, // KSnappyCompression
17
+ {}, // kZlibCompression
18
+ {}, // kBZip2Compression
19
+ {1, 4, 9}, // kLZ4Compression
20
+ {1, 4, 9}, // klZ4HCCompression
21
+ {}, // kXpressCompression
22
+ {1, 15, 22} // kZSTD
23
+ };
24
+
25
+ int CompressionRejectionProbabilityPredictor::Predict() const {
26
+ return pred_rejection_prob_percentage_;
27
+ }
28
+
29
+ size_t CompressionRejectionProbabilityPredictor::attempted_compression_count()
30
+ const {
31
+ return rejected_count_ + compressed_count_;
32
+ }
33
+
34
+ bool CompressionRejectionProbabilityPredictor::Record(
35
+ Slice uncompressed_block_data, std::string* compressed_output,
36
+ const CompressionOptions& opts) {
37
+ if (compressed_output->size() >
38
+ (static_cast<uint64_t>(opts.max_compressed_bytes_per_kb) *
39
+ uncompressed_block_data.size()) >>
40
+ 10) {
41
+ rejected_count_++;
42
+ } else {
43
+ compressed_count_++;
44
+ }
45
+ auto attempted = attempted_compression_count();
46
+ if (attempted >= window_size_) {
47
+ pred_rejection_prob_percentage_ =
48
+ static_cast<int>(rejected_count_ * 100 / attempted);
49
+ compressed_count_ = 0;
50
+ rejected_count_ = 0;
51
+ assert(attempted_compression_count() == 0);
52
+ }
53
+ return true;
54
+ }
55
+
56
+ AutoSkipCompressorWrapper::AutoSkipCompressorWrapper(
57
+ std::unique_ptr<Compressor> compressor, const CompressionOptions& opts)
58
+ : CompressorWrapper::CompressorWrapper(std::move(compressor)),
59
+ opts_(opts) {}
60
+
61
+ const char* AutoSkipCompressorWrapper::Name() const {
62
+ return "AutoSkipCompressorWrapper";
63
+ }
64
+
65
+ Status AutoSkipCompressorWrapper::CompressBlock(
66
+ Slice uncompressed_data, std::string* compressed_output,
67
+ CompressionType* out_compression_type, ManagedWorkingArea* wa) {
68
+ // Check if the managed working area is provided or owned by this object.
69
+ // If not, bypass auto-skip logic since the working area lacks a predictor to
70
+ // record or make necessary decisions to compress or bypass compression of the
71
+ // block
72
+ if (wa == nullptr || wa->owner() != this) {
73
+ return wrapped_->CompressBlock(uncompressed_data, compressed_output,
74
+ out_compression_type, wa);
75
+ }
76
+ bool exploration =
77
+ Random::GetTLSInstance()->PercentTrue(kExplorationPercentage);
78
+ TEST_SYNC_POINT_CALLBACK(
79
+ "AutoSkipCompressorWrapper::CompressBlock::exploitOrExplore",
80
+ &exploration);
81
+ auto autoskip_wa = static_cast<AutoSkipWorkingArea*>(wa->get());
82
+ if (exploration) {
83
+ return CompressBlockAndRecord(uncompressed_data, compressed_output,
84
+ out_compression_type, autoskip_wa);
85
+ } else {
86
+ auto predictor_ptr = autoskip_wa->predictor;
87
+ auto prediction = predictor_ptr->Predict();
88
+ if (prediction <= kProbabilityCutOff) {
89
+ // decide to compress
90
+ return CompressBlockAndRecord(uncompressed_data, compressed_output,
91
+ out_compression_type, autoskip_wa);
92
+ } else {
93
+ // decide to bypass compression
94
+ *out_compression_type = kNoCompression;
95
+ return Status::OK();
96
+ }
97
+ }
98
+ return Status::OK();
99
+ }
100
+
101
+ Compressor::ManagedWorkingArea AutoSkipCompressorWrapper::ObtainWorkingArea() {
102
+ auto wrap_wa = wrapped_->ObtainWorkingArea();
103
+ return ManagedWorkingArea(new AutoSkipWorkingArea(std::move(wrap_wa)), this);
104
+ }
105
+ void AutoSkipCompressorWrapper::ReleaseWorkingArea(WorkingArea* wa) {
106
+ delete static_cast<AutoSkipWorkingArea*>(wa);
107
+ }
108
+
109
+ Status AutoSkipCompressorWrapper::CompressBlockAndRecord(
110
+ Slice uncompressed_data, std::string* compressed_output,
111
+ CompressionType* out_compression_type, AutoSkipWorkingArea* wa) {
112
+ Status status = wrapped_->CompressBlock(uncompressed_data, compressed_output,
113
+ out_compression_type, &(wa->wrapped));
114
+ // determine if it was rejected or compressed
115
+ auto predictor_ptr = wa->predictor;
116
+ predictor_ptr->Record(uncompressed_data, compressed_output, opts_);
117
+ return status;
118
+ }
119
+
120
+ const char* AutoSkipCompressorManager::Name() const {
121
+ // should have returned "AutoSkipCompressorManager" but we currently have an
122
+ // error so for now returning name of the wrapped container
123
+ return wrapped_->Name();
124
+ }
125
+
126
+ std::unique_ptr<Compressor> AutoSkipCompressorManager::GetCompressorForSST(
127
+ const FilterBuildingContext& context, const CompressionOptions& opts,
128
+ CompressionType preferred) {
129
+ assert(GetSupportedCompressions().size() > 1);
130
+ assert(preferred != kNoCompression);
131
+ return std::make_unique<AutoSkipCompressorWrapper>(
132
+ wrapped_->GetCompressorForSST(context, opts, preferred), opts);
133
+ }
134
+
135
+ CostAwareCompressor::CostAwareCompressor(const CompressionOptions& opts)
136
+ : opts_(opts) {
137
+ // Creates compressor supporting all the compression types and levels as per
138
+ // the compression levels set in vector CompressionLevels
139
+ auto builtInManager = GetBuiltinV2CompressionManager();
140
+ const auto& compressions = GetSupportedCompressions();
141
+ for (size_t i = 0; i < kCompressionLevels.size(); i++) {
142
+ CompressionType type = static_cast<CompressionType>(i + 1);
143
+ if (type == kNoCompression) {
144
+ continue;
145
+ }
146
+ if (kCompressionLevels[type - 1].size() == 0) {
147
+ allcompressors_.emplace_back();
148
+ continue;
149
+ } else {
150
+ // if the compression type is not supported, then skip and remove
151
+ // compression levels from the supported compression level list
152
+ if (std::find(compressions.begin(), compressions.end(), type) ==
153
+ compressions.end()) {
154
+ allcompressors_.emplace_back();
155
+ continue;
156
+ }
157
+ std::vector<std::unique_ptr<Compressor>> compressors_diff_levels;
158
+ for (size_t j = 0; j < kCompressionLevels[type - 1].size(); j++) {
159
+ auto level = kCompressionLevels[type - 1][j];
160
+ CompressionOptions new_opts = opts;
161
+ new_opts.level = level;
162
+ compressors_diff_levels.push_back(
163
+ builtInManager->GetCompressor(new_opts, type));
164
+ allcompressors_index_.emplace_back(i, j);
165
+ }
166
+ allcompressors_.push_back(std::move(compressors_diff_levels));
167
+ }
168
+ }
169
+ }
170
+
171
+ const char* CostAwareCompressor::Name() const { return "CostAwareCompressor"; }
172
+ size_t CostAwareCompressor::GetMaxSampleSizeIfWantDict(
173
+ CacheEntryRole block_type) const {
174
+ auto idx = allcompressors_index_.back();
175
+ return allcompressors_[idx.first][idx.second]->GetMaxSampleSizeIfWantDict(
176
+ block_type);
177
+ }
178
+
179
+ Slice CostAwareCompressor::GetSerializedDict() const {
180
+ auto idx = allcompressors_index_.back();
181
+ return allcompressors_[idx.first][idx.second]->GetSerializedDict();
182
+ }
183
+
184
+ CompressionType CostAwareCompressor::GetPreferredCompressionType() const {
185
+ return kZSTD;
186
+ }
187
+ std::unique_ptr<Compressor> CostAwareCompressor::MaybeCloneSpecialized(
188
+ CacheEntryRole block_type, DictSampleArgs&& dict_samples) {
189
+ // TODO: full dictionary compression support. Currently this just falls
190
+ // back on a non-multi compressor when asked to use a dictionary.
191
+ auto idx = allcompressors_index_.back();
192
+ return allcompressors_[idx.first][idx.second]->MaybeCloneSpecialized(
193
+ block_type, std::move(dict_samples));
194
+ }
195
+ Status CostAwareCompressor::CompressBlock(Slice uncompressed_data,
196
+ std::string* compressed_output,
197
+ CompressionType* out_compression_type,
198
+ ManagedWorkingArea* wa) {
199
+ // Check if the managed working area is provided or owned by this object.
200
+ // If not, bypass compressor logic since the working area lacks a predictor
201
+ if (allcompressors_.size() == 0) {
202
+ return Status::NotSupported("No compression type supported");
203
+ }
204
+ if (wa == nullptr || wa->owner() != this) {
205
+ // highest compression level of Zstd
206
+ size_t choosen_compression_type = 6;
207
+ size_t compression_level_ptr = 2;
208
+ return allcompressors_[choosen_compression_type][compression_level_ptr]
209
+ ->CompressBlock(uncompressed_data, compressed_output,
210
+ out_compression_type, wa);
211
+ }
212
+ auto local_wa = static_cast<CostAwareWorkingArea*>(wa->get());
213
+ std::pair<size_t, size_t> choosen_index(6, 2);
214
+ size_t choosen_compression_type = choosen_index.first;
215
+ size_t compresion_level_ptr = choosen_index.second;
216
+ return CompressBlockAndRecord(choosen_compression_type, compresion_level_ptr,
217
+ uncompressed_data, compressed_output,
218
+ out_compression_type, local_wa);
219
+ }
220
+
221
+ Compressor::ManagedWorkingArea CostAwareCompressor::ObtainWorkingArea() {
222
+ auto wrap_wa = allcompressors_.back().back()->ObtainWorkingArea();
223
+ auto wa = new CostAwareWorkingArea(std::move(wrap_wa));
224
+ // Create cost predictors for each compression type and level
225
+ wa->cost_predictors_.reserve(allcompressors_.size());
226
+ for (size_t i = 0; i < allcompressors_.size(); i++) {
227
+ CompressionType type = static_cast<CompressionType>(i + 1);
228
+ if (allcompressors_[type - 1].size() == 0) {
229
+ wa->cost_predictors_.emplace_back();
230
+ continue;
231
+ } else {
232
+ std::vector<IOCPUCostPredictor*> predictors_diff_levels;
233
+ predictors_diff_levels.reserve(kCompressionLevels[type - 1].size());
234
+ for (size_t j = 0; j < kCompressionLevels[type - 1].size(); j++) {
235
+ predictors_diff_levels.emplace_back(new IOCPUCostPredictor(10));
236
+ }
237
+ wa->cost_predictors_.emplace_back(std::move(predictors_diff_levels));
238
+ }
239
+ }
240
+ return ManagedWorkingArea(wa, this);
241
+ }
242
+ void CostAwareCompressor::ReleaseWorkingArea(WorkingArea* wa) {
243
+ // remove all created cost predictors
244
+ for (auto& prdictors_diff_levels :
245
+ static_cast<CostAwareWorkingArea*>(wa)->cost_predictors_) {
246
+ for (auto& predictor : prdictors_diff_levels) {
247
+ delete predictor;
248
+ }
249
+ }
250
+ delete static_cast<CostAwareWorkingArea*>(wa);
251
+ }
252
+
253
+ Status CostAwareCompressor::CompressBlockAndRecord(
254
+ size_t choosen_compression_type, size_t compression_level_ptr,
255
+ Slice uncompressed_data, std::string* compressed_output,
256
+ CompressionType* out_compression_type, CostAwareWorkingArea* wa) {
257
+ assert(choosen_compression_type < allcompressors_.size());
258
+ assert(compression_level_ptr <
259
+ allcompressors_[choosen_compression_type].size());
260
+ assert(choosen_compression_type < wa->cost_predictors_.size());
261
+ assert(compression_level_ptr <
262
+ wa->cost_predictors_[choosen_compression_type].size());
263
+ StopWatchNano<> timer(Env::Default()->GetSystemClock().get(), true);
264
+ Status status =
265
+ allcompressors_[choosen_compression_type][compression_level_ptr]
266
+ ->CompressBlock(uncompressed_data, compressed_output,
267
+ out_compression_type, &(wa->wrapped_));
268
+ std::pair<size_t, size_t> measured_data(timer.ElapsedMicros(),
269
+ compressed_output->size());
270
+ auto predictor =
271
+ wa->cost_predictors_[choosen_compression_type][compression_level_ptr];
272
+ auto output_length = measured_data.second;
273
+ auto cpu_time = measured_data.first;
274
+ predictor->CPUPredictor.Record(cpu_time);
275
+ predictor->IOPredictor.Record(output_length);
276
+ TEST_SYNC_POINT_CALLBACK(
277
+ "CostAwareCompressor::CompressBlockAndRecord::GetPredictor",
278
+ wa->cost_predictors_[choosen_compression_type][compression_level_ptr]);
279
+ return status;
280
+ }
281
+
282
+ std::shared_ptr<CompressionManagerWrapper> CreateAutoSkipCompressionManager(
283
+ std::shared_ptr<CompressionManager> wrapped) {
284
+ return std::make_shared<AutoSkipCompressorManager>(
285
+ wrapped == nullptr ? GetBuiltinV2CompressionManager() : wrapped);
286
+ }
287
+ const char* CostAwareCompressorManager::Name() const {
288
+ // should have returned "CostAwareCompressorManager" but we currently have an
289
+ // error so for now returning name of the wrapped container
290
+ return wrapped_->Name();
291
+ }
292
+
293
+ std::unique_ptr<Compressor> CostAwareCompressorManager::GetCompressorForSST(
294
+ const FilterBuildingContext& context, const CompressionOptions& opts,
295
+ CompressionType preferred) {
296
+ assert(GetSupportedCompressions().size() > 1);
297
+ (void)context;
298
+ (void)preferred;
299
+ return std::make_unique<CostAwareCompressor>(opts);
300
+ }
301
+
302
+ std::shared_ptr<CompressionManagerWrapper> CreateCostAwareCompressionManager(
303
+ std::shared_ptr<CompressionManager> wrapped) {
304
+ return std::make_shared<CostAwareCompressorManager>(
305
+ wrapped == nullptr ? GetBuiltinV2CompressionManager() : wrapped);
306
+ }
307
+
308
+ } // namespace ROCKSDB_NAMESPACE
@@ -0,0 +1,189 @@
1
+ // Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ // This source code is licensed under both the GPLv2 (found in the
3
+ // COPYING file in the root directory) and Apache 2.0 License
4
+ // (found in the LICENSE.Apache file in the root directory).
5
+ //
6
+ // Defines auto skip compressor wrapper which intelligently decides bypassing
7
+ // compression based on past data
8
+ // Defines CostAwareCompressor which currently tries to predict the cpu and io
9
+ // cost of the compression
10
+
11
+ #pragma once
12
+ #include <memory>
13
+
14
+ #include "rocksdb/advanced_compression.h"
15
+
16
+ namespace ROCKSDB_NAMESPACE {
17
+ // Auto Skip Compression Components
18
+ // Predict rejection probability using a moving window approach
19
+ class CompressionRejectionProbabilityPredictor {
20
+ public:
21
+ explicit CompressionRejectionProbabilityPredictor(int window_size)
22
+ : pred_rejection_prob_percentage_(0),
23
+ rejected_count_(0),
24
+ compressed_count_(0),
25
+ window_size_(window_size) {}
26
+ int Predict() const;
27
+ bool Record(Slice uncompressed_block_data, std::string* compressed_output,
28
+ const CompressionOptions& opts);
29
+ size_t attempted_compression_count() const;
30
+
31
+ protected:
32
+ int pred_rejection_prob_percentage_;
33
+ size_t rejected_count_;
34
+ size_t compressed_count_;
35
+ size_t window_size_;
36
+ };
37
+
38
+ class AutoSkipWorkingArea : public Compressor::WorkingArea {
39
+ public:
40
+ explicit AutoSkipWorkingArea(Compressor::ManagedWorkingArea&& wa)
41
+ : wrapped(std::move(wa)),
42
+ predictor(
43
+ std::make_shared<CompressionRejectionProbabilityPredictor>(10)) {}
44
+ ~AutoSkipWorkingArea() {}
45
+ AutoSkipWorkingArea(const AutoSkipWorkingArea&) = delete;
46
+ AutoSkipWorkingArea& operator=(const AutoSkipWorkingArea&) = delete;
47
+ AutoSkipWorkingArea(AutoSkipWorkingArea&& other) noexcept
48
+ : wrapped(std::move(other.wrapped)),
49
+ predictor(std::move(other.predictor)) {}
50
+
51
+ AutoSkipWorkingArea& operator=(AutoSkipWorkingArea&& other) noexcept {
52
+ if (this != &other) {
53
+ wrapped = std::move(other.wrapped);
54
+ predictor = std::move(other.predictor);
55
+ }
56
+ return *this;
57
+ }
58
+ Compressor::ManagedWorkingArea wrapped;
59
+ std::shared_ptr<CompressionRejectionProbabilityPredictor> predictor;
60
+ };
61
+ class AutoSkipCompressorWrapper : public CompressorWrapper {
62
+ public:
63
+ const char* Name() const override;
64
+ explicit AutoSkipCompressorWrapper(std::unique_ptr<Compressor> compressor,
65
+ const CompressionOptions& opts);
66
+
67
+ Status CompressBlock(Slice uncompressed_data, std::string* compressed_output,
68
+ CompressionType* out_compression_type,
69
+ ManagedWorkingArea* wa) override;
70
+ ManagedWorkingArea ObtainWorkingArea() override;
71
+ void ReleaseWorkingArea(WorkingArea* wa) override;
72
+
73
+ private:
74
+ Status CompressBlockAndRecord(Slice uncompressed_data,
75
+ std::string* compressed_output,
76
+ CompressionType* out_compression_type,
77
+ AutoSkipWorkingArea* wa);
78
+ static constexpr int kExplorationPercentage = 10;
79
+ static constexpr int kProbabilityCutOff = 50;
80
+ const CompressionOptions opts_;
81
+ };
82
+
83
+ class AutoSkipCompressorManager : public CompressionManagerWrapper {
84
+ using CompressionManagerWrapper::CompressionManagerWrapper;
85
+ const char* Name() const override;
86
+ std::unique_ptr<Compressor> GetCompressorForSST(
87
+ const FilterBuildingContext& context, const CompressionOptions& opts,
88
+ CompressionType preferred) override;
89
+ };
90
+ // Cost Aware Components
91
+ template <typename T>
92
+ class WindowAveragePredictor {
93
+ public:
94
+ explicit WindowAveragePredictor(int window_size)
95
+ : sum_(0), prediction_(0), count_(0), kWindowSize(window_size) {}
96
+ T Predict() { return prediction_; }
97
+ bool Record(T data) {
98
+ sum_ += data;
99
+ count_++;
100
+ if (count_ >= kWindowSize) {
101
+ prediction_ = sum_ / count_;
102
+ sum_ = 0;
103
+ count_ = 0;
104
+ }
105
+ return true;
106
+ }
107
+ void SetPrediction(T prediction) { prediction_ = prediction; }
108
+
109
+ private:
110
+ T sum_;
111
+ T prediction_;
112
+ int count_;
113
+ const int kWindowSize;
114
+ };
115
+
116
+ using IOCostPredictor = WindowAveragePredictor<size_t>;
117
+ using CPUUtilPredictor = WindowAveragePredictor<uint64_t>;
118
+
119
+ struct IOCPUCostPredictor {
120
+ explicit IOCPUCostPredictor(int window_size)
121
+ : IOPredictor(window_size), CPUPredictor(window_size) {}
122
+ IOCostPredictor IOPredictor;
123
+ CPUUtilPredictor CPUPredictor;
124
+ };
125
+ class CostAwareWorkingArea : public Compressor::WorkingArea {
126
+ public:
127
+ explicit CostAwareWorkingArea(Compressor::ManagedWorkingArea&& wa)
128
+ : wrapped_(std::move(wa)) {}
129
+ ~CostAwareWorkingArea() {}
130
+ CostAwareWorkingArea(const CostAwareWorkingArea&) = delete;
131
+ CostAwareWorkingArea& operator=(const CostAwareWorkingArea&) = delete;
132
+ CostAwareWorkingArea(CostAwareWorkingArea&& other) noexcept
133
+ : wrapped_(std::move(other.wrapped_)) {}
134
+
135
+ CostAwareWorkingArea& operator=(CostAwareWorkingArea&& other) noexcept {
136
+ if (this != &other) {
137
+ wrapped_ = std::move(other.wrapped_);
138
+ cost_predictors_ = std::move(other.cost_predictors_);
139
+ }
140
+ return *this;
141
+ }
142
+ Compressor::ManagedWorkingArea wrapped_;
143
+ std::vector<std::vector<IOCPUCostPredictor*>> cost_predictors_;
144
+ };
145
+
146
+ class CostAwareCompressor : public Compressor {
147
+ public:
148
+ explicit CostAwareCompressor(const CompressionOptions& opts);
149
+ const char* Name() const override;
150
+ size_t GetMaxSampleSizeIfWantDict(CacheEntryRole block_type) const override;
151
+ Slice GetSerializedDict() const override;
152
+ CompressionType GetPreferredCompressionType() const override;
153
+ ManagedWorkingArea ObtainWorkingArea() override;
154
+ std::unique_ptr<Compressor> MaybeCloneSpecialized(
155
+ CacheEntryRole block_type, DictSampleArgs&& dict_samples) override;
156
+
157
+ Status CompressBlock(Slice uncompressed_data, std::string* compressed_output,
158
+ CompressionType* out_compression_type,
159
+ ManagedWorkingArea* wa) override;
160
+ void ReleaseWorkingArea(WorkingArea* wa) override;
161
+
162
+ private:
163
+ Status CompressBlockAndRecord(size_t choosen_compression_type,
164
+ size_t compresion_level_ptr,
165
+ Slice uncompressed_data,
166
+ std::string* compressed_output,
167
+ CompressionType* out_compression_type,
168
+ CostAwareWorkingArea* wa);
169
+ static constexpr int kExplorationPercentage = 10;
170
+ static constexpr int kProbabilityCutOff = 50;
171
+ // This is the vector containing the list of compression levels that
172
+ // CostAwareCompressor will use create compressor and predicts the cost
173
+ // The vector contains list of compression level for compression algorithm in
174
+ // the order defined by enum CompressionType
175
+ static const std::vector<std::vector<int>> kCompressionLevels;
176
+ const CompressionOptions opts_;
177
+ std::vector<std::vector<std::unique_ptr<Compressor>>> allcompressors_;
178
+ std::vector<std::pair<size_t, size_t>> allcompressors_index_;
179
+ };
180
+
181
+ class CostAwareCompressorManager : public CompressionManagerWrapper {
182
+ using CompressionManagerWrapper::CompressionManagerWrapper;
183
+ const char* Name() const override;
184
+ std::unique_ptr<Compressor> GetCompressorForSST(
185
+ const FilterBuildingContext& context, const CompressionOptions& opts,
186
+ CompressionType preferred) override;
187
+ };
188
+
189
+ } // namespace ROCKSDB_NAMESPACE