@nxtedition/rocksdb 7.0.12 → 7.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
  2. package/deps/rocksdb/rocksdb/Makefile +3 -0
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
  5. package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
  6. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
  7. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
  8. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
  9. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
  11. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
  13. package/deps/rocksdb/rocksdb/db/c.cc +159 -5
  14. package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
  15. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
  16. package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
  24. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
  26. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
  36. package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
  37. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
  38. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
  39. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
  41. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
  42. package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
  43. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  44. package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
  45. package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
  46. package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
  47. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
  49. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  50. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
  51. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
  52. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
  53. package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
  54. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  55. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  56. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
  59. package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
  60. package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
  61. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  62. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
  63. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  66. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
  68. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  69. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
  70. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
  72. package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
  73. package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
  74. package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
  75. package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
  76. package/deps/rocksdb/rocksdb/options/options.cc +4 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
  78. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  79. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
  80. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
  81. package/deps/rocksdb/rocksdb/src.mk +1 -0
  82. package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
  88. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
  90. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
  91. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
  92. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
  93. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
  94. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
  95. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
  96. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
  97. package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
  98. package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
  99. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
  100. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
  101. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
  102. package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
  103. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  104. package/package.json +1 -1
  105. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -23,6 +23,7 @@
23
23
  #include "rocksdb/convenience.h"
24
24
  #include "rocksdb/db.h"
25
25
  #include "rocksdb/env.h"
26
+ #include "rocksdb/experimental.h"
26
27
  #include "rocksdb/filter_policy.h"
27
28
  #include "rocksdb/iterator.h"
28
29
  #include "rocksdb/memtablerep.h"
@@ -59,6 +60,7 @@ using ROCKSDB_NAMESPACE::Cache;
59
60
  using ROCKSDB_NAMESPACE::Checkpoint;
60
61
  using ROCKSDB_NAMESPACE::ColumnFamilyDescriptor;
61
62
  using ROCKSDB_NAMESPACE::ColumnFamilyHandle;
63
+ using ROCKSDB_NAMESPACE::ColumnFamilyMetaData;
62
64
  using ROCKSDB_NAMESPACE::ColumnFamilyOptions;
63
65
  using ROCKSDB_NAMESPACE::CompactionFilter;
64
66
  using ROCKSDB_NAMESPACE::CompactionFilterFactory;
@@ -78,6 +80,7 @@ using ROCKSDB_NAMESPACE::FlushOptions;
78
80
  using ROCKSDB_NAMESPACE::InfoLogLevel;
79
81
  using ROCKSDB_NAMESPACE::IngestExternalFileOptions;
80
82
  using ROCKSDB_NAMESPACE::Iterator;
83
+ using ROCKSDB_NAMESPACE::LevelMetaData;
81
84
  using ROCKSDB_NAMESPACE::LiveFileMetaData;
82
85
  using ROCKSDB_NAMESPACE::Logger;
83
86
  using ROCKSDB_NAMESPACE::LRUCacheOptions;
@@ -105,6 +108,7 @@ using ROCKSDB_NAMESPACE::Slice;
105
108
  using ROCKSDB_NAMESPACE::SliceParts;
106
109
  using ROCKSDB_NAMESPACE::SliceTransform;
107
110
  using ROCKSDB_NAMESPACE::Snapshot;
111
+ using ROCKSDB_NAMESPACE::SstFileMetaData;
108
112
  using ROCKSDB_NAMESPACE::SstFileWriter;
109
113
  using ROCKSDB_NAMESPACE::Status;
110
114
  using ROCKSDB_NAMESPACE::TablePropertiesCollectorFactory;
@@ -178,6 +182,15 @@ struct rocksdb_cache_t {
178
182
  };
179
183
  struct rocksdb_livefiles_t { std::vector<LiveFileMetaData> rep; };
180
184
  struct rocksdb_column_family_handle_t { ColumnFamilyHandle* rep; };
185
+ struct rocksdb_column_family_metadata_t {
186
+ ColumnFamilyMetaData rep;
187
+ };
188
+ struct rocksdb_level_metadata_t {
189
+ const LevelMetaData* rep;
190
+ };
191
+ struct rocksdb_sst_file_metadata_t {
192
+ const SstFileMetaData* rep;
193
+ };
181
194
  struct rocksdb_envoptions_t { EnvOptions rep; };
182
195
  struct rocksdb_ingestexternalfileoptions_t { IngestExternalFileOptions rep; };
183
196
  struct rocksdb_sstfilewriter_t { SstFileWriter* rep; };
@@ -1737,6 +1750,29 @@ void rocksdb_compact_range_cf(
1737
1750
  (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
1738
1751
  }
1739
1752
 
1753
+ void rocksdb_suggest_compact_range(rocksdb_t* db, const char* start_key,
1754
+ size_t start_key_len, const char* limit_key,
1755
+ size_t limit_key_len, char** errptr) {
1756
+ Slice a, b;
1757
+ Status s = ROCKSDB_NAMESPACE::experimental::SuggestCompactRange(
1758
+ db->rep,
1759
+ (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
1760
+ (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
1761
+ SaveError(errptr, s);
1762
+ }
1763
+
1764
+ void rocksdb_suggest_compact_range_cf(
1765
+ rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
1766
+ const char* start_key, size_t start_key_len, const char* limit_key,
1767
+ size_t limit_key_len, char** errptr) {
1768
+ Slice a, b;
1769
+ Status s = db->rep->SuggestCompactRange(
1770
+ column_family->rep,
1771
+ (start_key ? (a = Slice(start_key, start_key_len), &a) : nullptr),
1772
+ (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr));
1773
+ SaveError(errptr, s);
1774
+ }
1775
+
1740
1776
  void rocksdb_compact_range_opt(rocksdb_t* db, rocksdb_compactoptions_t* opt,
1741
1777
  const char* start_key, size_t start_key_len,
1742
1778
  const char* limit_key, size_t limit_key_len) {
@@ -3338,11 +3374,6 @@ unsigned char rocksdb_options_get_advise_random_on_open(
3338
3374
  return opt->rep.advise_random_on_open;
3339
3375
  }
3340
3376
 
3341
- void rocksdb_options_set_experimental_mempurge_threshold(rocksdb_options_t* opt,
3342
- double v) {
3343
- opt->rep.experimental_mempurge_threshold = v;
3344
- }
3345
-
3346
3377
  void rocksdb_options_set_access_hint_on_compaction_start(
3347
3378
  rocksdb_options_t* opt, int v) {
3348
3379
  switch(v) {
@@ -3528,6 +3559,16 @@ int rocksdb_options_get_max_background_flushes(rocksdb_options_t* opt) {
3528
3559
  return opt->rep.max_background_flushes;
3529
3560
  }
3530
3561
 
3562
+ void rocksdb_options_set_experimental_mempurge_threshold(rocksdb_options_t* opt,
3563
+ double v) {
3564
+ opt->rep.experimental_mempurge_threshold = v;
3565
+ }
3566
+
3567
+ double rocksdb_options_get_experimental_mempurge_threshold(
3568
+ rocksdb_options_t* opt) {
3569
+ return opt->rep.experimental_mempurge_threshold;
3570
+ }
3571
+
3531
3572
  void rocksdb_options_set_max_log_file_size(rocksdb_options_t* opt, size_t v) {
3532
3573
  opt->rep.max_log_file_size = v;
3533
3574
  }
@@ -5131,6 +5172,119 @@ void rocksdb_delete_file_in_range_cf(
5131
5172
  (limit_key ? (b = Slice(limit_key, limit_key_len), &b) : nullptr)));
5132
5173
  }
5133
5174
 
5175
+ /* MetaData */
5176
+
5177
+ rocksdb_column_family_metadata_t* rocksdb_get_column_family_metadata(
5178
+ rocksdb_t* db) {
5179
+ rocksdb_column_family_metadata_t* meta = new rocksdb_column_family_metadata_t;
5180
+ db->rep->GetColumnFamilyMetaData(&meta->rep);
5181
+ return meta;
5182
+ }
5183
+
5184
+ rocksdb_column_family_metadata_t* rocksdb_get_column_family_metadata_cf(
5185
+ rocksdb_t* db, rocksdb_column_family_handle_t* column_family) {
5186
+ rocksdb_column_family_metadata_t* meta = new rocksdb_column_family_metadata_t;
5187
+ db->rep->GetColumnFamilyMetaData(column_family->rep, &meta->rep);
5188
+ return meta;
5189
+ }
5190
+
5191
+ void rocksdb_column_family_metadata_destroy(
5192
+ rocksdb_column_family_metadata_t* cf_meta) {
5193
+ delete cf_meta;
5194
+ }
5195
+
5196
+ uint64_t rocksdb_column_family_metadata_get_size(
5197
+ rocksdb_column_family_metadata_t* cf_meta) {
5198
+ return cf_meta->rep.size;
5199
+ }
5200
+
5201
+ size_t rocksdb_column_family_metadata_get_file_count(
5202
+ rocksdb_column_family_metadata_t* cf_meta) {
5203
+ return cf_meta->rep.file_count;
5204
+ }
5205
+
5206
+ char* rocksdb_column_family_metadata_get_name(
5207
+ rocksdb_column_family_metadata_t* cf_meta) {
5208
+ return strdup(cf_meta->rep.name.c_str());
5209
+ }
5210
+
5211
+ size_t rocksdb_column_family_metadata_get_level_count(
5212
+ rocksdb_column_family_metadata_t* cf_meta) {
5213
+ return cf_meta->rep.levels.size();
5214
+ }
5215
+
5216
+ rocksdb_level_metadata_t* rocksdb_column_family_metadata_get_level_metadata(
5217
+ rocksdb_column_family_metadata_t* cf_meta, size_t i) {
5218
+ if (i >= cf_meta->rep.levels.size()) {
5219
+ return NULL;
5220
+ }
5221
+ rocksdb_level_metadata_t* level_meta =
5222
+ (rocksdb_level_metadata_t*)malloc(sizeof(rocksdb_level_metadata_t));
5223
+ level_meta->rep = &cf_meta->rep.levels[i];
5224
+
5225
+ return level_meta;
5226
+ }
5227
+
5228
+ void rocksdb_level_metadata_destroy(rocksdb_level_metadata_t* level_meta) {
5229
+ // Only free the base pointer as its parent rocksdb_column_family_metadata_t
5230
+ // has the ownership of its rep.
5231
+ free(level_meta);
5232
+ }
5233
+
5234
+ int rocksdb_level_metadata_get_level(rocksdb_level_metadata_t* level_meta) {
5235
+ return level_meta->rep->level;
5236
+ }
5237
+
5238
+ uint64_t rocksdb_level_metadata_get_size(rocksdb_level_metadata_t* level_meta) {
5239
+ return level_meta->rep->size;
5240
+ }
5241
+
5242
+ size_t rocksdb_level_metadata_get_file_count(
5243
+ rocksdb_level_metadata_t* level_meta) {
5244
+ return level_meta->rep->files.size();
5245
+ }
5246
+
5247
+ rocksdb_sst_file_metadata_t* rocksdb_level_metadata_get_sst_file_metadata(
5248
+ rocksdb_level_metadata_t* level_meta, size_t i) {
5249
+ if (i >= level_meta->rep->files.size()) {
5250
+ return nullptr;
5251
+ }
5252
+ rocksdb_sst_file_metadata_t* file_meta =
5253
+ (rocksdb_sst_file_metadata_t*)malloc(sizeof(rocksdb_sst_file_metadata_t));
5254
+ file_meta->rep = &level_meta->rep->files[i];
5255
+ return file_meta;
5256
+ }
5257
+
5258
+ void rocksdb_sst_file_metadata_destroy(rocksdb_sst_file_metadata_t* file_meta) {
5259
+ // Only free the base pointer as its parent rocksdb_level_metadata_t
5260
+ // has the ownership of its rep.
5261
+ free(file_meta);
5262
+ }
5263
+
5264
+ char* rocksdb_sst_file_metadata_get_relative_filename(
5265
+ rocksdb_sst_file_metadata_t* file_meta) {
5266
+ return strdup(file_meta->rep->relative_filename.c_str());
5267
+ }
5268
+
5269
+ uint64_t rocksdb_sst_file_metadata_get_size(
5270
+ rocksdb_sst_file_metadata_t* file_meta) {
5271
+ return file_meta->rep->size;
5272
+ }
5273
+
5274
+ char* rocksdb_sst_file_metadata_get_smallestkey(
5275
+ rocksdb_sst_file_metadata_t* file_meta, size_t* key_len) {
5276
+ *key_len = file_meta->rep->smallestkey.size();
5277
+ return CopyString(file_meta->rep->smallestkey);
5278
+ }
5279
+
5280
+ char* rocksdb_sst_file_metadata_get_largestkey(
5281
+ rocksdb_sst_file_metadata_t* file_meta, size_t* key_len) {
5282
+ *key_len = file_meta->rep->largestkey.size();
5283
+ return CopyString(file_meta->rep->largestkey);
5284
+ }
5285
+
5286
+ /* Transactions */
5287
+
5134
5288
  rocksdb_transactiondb_options_t* rocksdb_transactiondb_options_create() {
5135
5289
  return new rocksdb_transactiondb_options_t;
5136
5290
  }
@@ -272,6 +272,92 @@ static rocksdb_compactionfilter_t* CFilterCreate(
272
272
  CFilterName);
273
273
  }
274
274
 
275
+ void CheckMetaData(rocksdb_column_family_metadata_t* cf_meta,
276
+ const char* expected_cf_name) {
277
+ char* cf_name = rocksdb_column_family_metadata_get_name(cf_meta);
278
+ assert(strcmp(cf_name, expected_cf_name) == 0);
279
+ rocksdb_free(cf_name);
280
+
281
+ size_t cf_size = rocksdb_column_family_metadata_get_size(cf_meta);
282
+ assert(cf_size > 0);
283
+ size_t cf_file_count = rocksdb_column_family_metadata_get_size(cf_meta);
284
+ assert(cf_file_count > 0);
285
+
286
+ uint64_t total_level_size = 0;
287
+ size_t total_file_count = 0;
288
+ size_t level_count = rocksdb_column_family_metadata_get_level_count(cf_meta);
289
+ assert(level_count > 0);
290
+ for (size_t l = 0; l < level_count; ++l) {
291
+ rocksdb_level_metadata_t* level_meta =
292
+ rocksdb_column_family_metadata_get_level_metadata(cf_meta, l);
293
+ assert(level_meta);
294
+ assert(rocksdb_level_metadata_get_level(level_meta) >= (int)l);
295
+ uint64_t level_size = rocksdb_level_metadata_get_size(level_meta);
296
+ uint64_t file_size_in_level = 0;
297
+
298
+ size_t file_count = rocksdb_level_metadata_get_file_count(level_meta);
299
+ total_file_count += file_count;
300
+ for (size_t f = 0; f < file_count; ++f) {
301
+ rocksdb_sst_file_metadata_t* file_meta =
302
+ rocksdb_level_metadata_get_sst_file_metadata(level_meta, f);
303
+ assert(file_meta);
304
+
305
+ uint64_t file_size = rocksdb_sst_file_metadata_get_size(file_meta);
306
+ assert(file_size > 0);
307
+ file_size_in_level += file_size;
308
+
309
+ char* file_name =
310
+ rocksdb_sst_file_metadata_get_relative_filename(file_meta);
311
+ assert(file_name);
312
+ assert(strlen(file_name) > 0);
313
+ rocksdb_free(file_name);
314
+
315
+ size_t smallest_key_len;
316
+ char* smallest_key = rocksdb_sst_file_metadata_get_smallestkey(
317
+ file_meta, &smallest_key_len);
318
+ assert(smallest_key);
319
+ assert(smallest_key_len > 0);
320
+ size_t largest_key_len;
321
+ char* largest_key =
322
+ rocksdb_sst_file_metadata_get_largestkey(file_meta, &largest_key_len);
323
+ assert(largest_key);
324
+ assert(largest_key_len > 0);
325
+ rocksdb_free(smallest_key);
326
+ rocksdb_free(largest_key);
327
+
328
+ rocksdb_sst_file_metadata_destroy(file_meta);
329
+ }
330
+ assert(level_size == file_size_in_level);
331
+ total_level_size += level_size;
332
+ rocksdb_level_metadata_destroy(level_meta);
333
+ }
334
+ assert(total_file_count > 0);
335
+ assert(cf_size == total_level_size);
336
+ }
337
+
338
+ void GetAndCheckMetaData(rocksdb_t* db) {
339
+ rocksdb_column_family_metadata_t* cf_meta =
340
+ rocksdb_get_column_family_metadata(db);
341
+
342
+ CheckMetaData(cf_meta, "default");
343
+
344
+ rocksdb_column_family_metadata_destroy(cf_meta);
345
+ }
346
+
347
+ void GetAndCheckMetaDataCf(rocksdb_t* db,
348
+ rocksdb_column_family_handle_t* handle,
349
+ const char* cf_name) {
350
+ // Compact to make sure we have at least one sst file to obtain datadata.
351
+ rocksdb_compact_range_cf(db, handle, NULL, 0, NULL, 0);
352
+
353
+ rocksdb_column_family_metadata_t* cf_meta =
354
+ rocksdb_get_column_family_metadata_cf(db, handle);
355
+
356
+ CheckMetaData(cf_meta, cf_name);
357
+
358
+ rocksdb_column_family_metadata_destroy(cf_meta);
359
+ }
360
+
275
361
  static rocksdb_t* CheckCompaction(rocksdb_t* db, rocksdb_options_t* options,
276
362
  rocksdb_readoptions_t* roptions,
277
363
  rocksdb_writeoptions_t* woptions) {
@@ -304,6 +390,11 @@ static rocksdb_t* CheckCompaction(rocksdb_t* db, rocksdb_options_t* options,
304
390
  CheckGet(db, roptions, "foo", "foovalue");
305
391
  CheckGet(db, roptions, "bar", NULL);
306
392
  CheckGet(db, roptions, "baz", "newbazvalue");
393
+
394
+ rocksdb_suggest_compact_range(db, "bar", 3, "foo", 3, &err);
395
+ GetAndCheckMetaData(db);
396
+ CheckNoError(err);
397
+
307
398
  return db;
308
399
  }
309
400
 
@@ -1296,6 +1387,9 @@ int main(int argc, char** argv) {
1296
1387
  CheckNoError(err);
1297
1388
  rocksdb_put_cf(db, woptions, handles[1], "foobar4", 7, "hello4", 6, &err);
1298
1389
  CheckNoError(err);
1390
+ rocksdb_suggest_compact_range_cf(db, handles[1], "foo", 3, "foobar9", 7,
1391
+ &err);
1392
+ CheckNoError(err);
1299
1393
 
1300
1394
  rocksdb_flushoptions_t *flush_options = rocksdb_flushoptions_create();
1301
1395
  rocksdb_flushoptions_set_wait(flush_options, 1);
@@ -1443,6 +1537,8 @@ int main(int argc, char** argv) {
1443
1537
  CheckNoError(err);
1444
1538
  rocksdb_iter_destroy(iter);
1445
1539
 
1540
+ GetAndCheckMetaDataCf(db, handles[1], cf_names[1]);
1541
+
1446
1542
  rocksdb_drop_column_family(db, handles[1], &err);
1447
1543
  CheckNoError(err);
1448
1544
  for (i = 0; i < 2; i++) {
@@ -1831,6 +1927,10 @@ int main(int argc, char** argv) {
1831
1927
  rocksdb_options_set_wal_compression(o, 1);
1832
1928
  CheckCondition(1 == rocksdb_options_get_wal_compression(o));
1833
1929
 
1930
+ rocksdb_options_set_experimental_mempurge_threshold(o, 29.0);
1931
+ CheckCondition(29.0 ==
1932
+ rocksdb_options_get_experimental_mempurge_threshold(o));
1933
+
1834
1934
  /* Blob Options */
1835
1935
  rocksdb_options_set_enable_blob_files(o, 1);
1836
1936
  CheckCondition(1 == rocksdb_options_get_enable_blob_files(o));
@@ -1961,6 +2061,8 @@ int main(int argc, char** argv) {
1961
2061
  CheckCondition(4 == rocksdb_options_get_bottommost_compression(copy));
1962
2062
  CheckCondition(2 == rocksdb_options_get_compaction_style(copy));
1963
2063
  CheckCondition(1 == rocksdb_options_get_atomic_flush(copy));
2064
+ CheckCondition(29.0 ==
2065
+ rocksdb_options_get_experimental_mempurge_threshold(copy));
1964
2066
 
1965
2067
  // Copies should be independent.
1966
2068
  rocksdb_options_set_allow_ingest_behind(copy, 0);
@@ -2309,6 +2411,12 @@ int main(int argc, char** argv) {
2309
2411
  CheckCondition(0 == rocksdb_options_get_atomic_flush(copy));
2310
2412
  CheckCondition(1 == rocksdb_options_get_atomic_flush(o));
2311
2413
 
2414
+ rocksdb_options_set_experimental_mempurge_threshold(copy, 229.0);
2415
+ CheckCondition(229.0 ==
2416
+ rocksdb_options_get_experimental_mempurge_threshold(copy));
2417
+ CheckCondition(29.0 ==
2418
+ rocksdb_options_get_experimental_mempurge_threshold(o));
2419
+
2312
2420
  rocksdb_options_destroy(copy);
2313
2421
  rocksdb_options_destroy(o);
2314
2422
  }
@@ -550,7 +550,8 @@ ColumnFamilyData::ColumnFamilyData(
550
550
  prev_compaction_needed_bytes_(0),
551
551
  allow_2pc_(db_options.allow_2pc),
552
552
  last_memtable_id_(0),
553
- db_paths_registered_(false) {
553
+ db_paths_registered_(false),
554
+ mempurge_used_(false) {
554
555
  if (id_ != kDummyColumnFamilyDataId) {
555
556
  // TODO(cc): RegisterDbPaths can be expensive, considering moving it
556
557
  // outside of this constructor which might be called with db mutex held.
@@ -480,11 +480,8 @@ class ColumnFamilyData {
480
480
  const MutableCFOptions& mutable_cf_options,
481
481
  const ImmutableCFOptions& immutable_cf_options);
482
482
 
483
- // Recalculate some small conditions, which are changed only during
484
- // compaction, adding new memtable and/or
485
- // recalculation of compaction score. These values are used in
486
- // DBImpl::MakeRoomForWrite function to decide, if it need to make
487
- // a write stall
483
+ // Recalculate some stall conditions, which are changed only during
484
+ // compaction, adding new memtable and/or recalculation of compaction score.
488
485
  WriteStallCondition RecalculateWriteStallConditions(
489
486
  const MutableCFOptions& mutable_cf_options);
490
487
 
@@ -529,6 +526,10 @@ class ColumnFamilyData {
529
526
 
530
527
  static const uint32_t kDummyColumnFamilyDataId;
531
528
 
529
+ // Keep track of whether the mempurge feature was ever used.
530
+ void SetMempurgeUsed() { mempurge_used_ = true; }
531
+ bool GetMempurgeUsed() { return mempurge_used_; }
532
+
532
533
  private:
533
534
  friend class ColumnFamilySet;
534
535
  ColumnFamilyData(uint32_t id, const std::string& name,
@@ -629,6 +630,7 @@ class ColumnFamilyData {
629
630
  // For charging memory usage of file metadata created for newly added files to
630
631
  // a Version associated with this CFD
631
632
  std::shared_ptr<CacheReservationManager> file_metadata_cache_res_mgr_;
633
+ bool mempurge_used_;
632
634
  };
633
635
 
634
636
  // ColumnFamilySet has interesting thread-safety requirements
@@ -279,6 +279,28 @@ Compaction::Compaction(
279
279
  }
280
280
 
281
281
  GetBoundaryKeys(vstorage, inputs_, &smallest_user_key_, &largest_user_key_);
282
+
283
+ // Every compaction regardless of any compaction reason may respect the
284
+ // existing compact cursor in the output level to split output files
285
+ InternalKey temp_split_key = InternalKey();
286
+ if (immutable_options_.compaction_style == kCompactionStyleLevel &&
287
+ immutable_options_.compaction_pri == kRoundRobin) {
288
+ const InternalKey cursor =
289
+ input_vstorage_->GetCompactCursors()[output_level_];
290
+ if (cursor.Valid()) {
291
+ const Slice& cursor_user_key = ExtractUserKey(cursor.Encode());
292
+ auto ucmp = vstorage->InternalComparator()->user_comparator();
293
+ // May split output files according to the cursor if it in the user-key
294
+ // range
295
+ if (ucmp->CompareWithoutTimestamp(cursor_user_key, smallest_user_key_) >
296
+ 0 &&
297
+ ucmp->CompareWithoutTimestamp(cursor_user_key, largest_user_key_) <=
298
+ 0) {
299
+ temp_split_key = cursor;
300
+ }
301
+ }
302
+ }
303
+ output_split_key_ = temp_split_key;
282
304
  }
283
305
 
284
306
  Compaction::~Compaction() {
@@ -177,6 +177,12 @@ class Compaction {
177
177
  // moving a single input file to the next level (no merging or splitting)
178
178
  bool IsTrivialMove() const;
179
179
 
180
+ // The split user key in the output level if this compaction is required to
181
+ // split the output files according to the existing cursor in the output
182
+ // level under round-robin compaction policy. Empty indicates no required
183
+ // splitting key
184
+ const InternalKey GetOutputSplitKey() const { return output_split_key_; }
185
+
180
186
  // If true, then the compaction can be done by simply deleting input files.
181
187
  bool deletion_compaction() const { return deletion_compaction_; }
182
188
 
@@ -379,6 +385,8 @@ class Compaction {
379
385
  Temperature output_temperature_;
380
386
  // If true, then the compaction can be done by simply deleting input files.
381
387
  const bool deletion_compaction_;
388
+ // should it split the output file using the compact cursor?
389
+ InternalKey output_split_key_;
382
390
 
383
391
  // Compaction input files organized by level. Constant after construction
384
392
  const std::vector<CompactionInputFiles> inputs_;
@@ -197,6 +197,7 @@ void CompactionIterator::Next() {
197
197
 
198
198
  bool CompactionIterator::InvokeFilterIfNeeded(bool* need_skip,
199
199
  Slice* skip_until) {
200
+ // TODO: support compaction filter for wide-column entities
200
201
  if (!compaction_filter_ ||
201
202
  (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex)) {
202
203
  return true;
@@ -519,7 +520,8 @@ void CompactionIterator::NextFromInput() {
519
520
  // In the previous iteration we encountered a single delete that we could
520
521
  // not compact out. We will keep this Put, but can drop it's data.
521
522
  // (See Optimization 3, below.)
522
- if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex) {
523
+ if (ikey_.type != kTypeValue && ikey_.type != kTypeBlobIndex &&
524
+ ikey_.type != kTypeWideColumnEntity) {
523
525
  ROCKS_LOG_FATAL(info_log_, "Unexpected key %s for compaction output",
524
526
  ikey_.DebugString(allow_data_in_errors_, true).c_str());
525
527
  assert(false);
@@ -533,7 +535,7 @@ void CompactionIterator::NextFromInput() {
533
535
  assert(false);
534
536
  }
535
537
 
536
- if (ikey_.type == kTypeBlobIndex) {
538
+ if (ikey_.type == kTypeBlobIndex || ikey_.type == kTypeWideColumnEntity) {
537
539
  ikey_.type = kTypeValue;
538
540
  current_key_.UpdateInternalKey(ikey_.sequence, ikey_.type);
539
541
  }
@@ -689,7 +691,8 @@ void CompactionIterator::NextFromInput() {
689
691
  // either way. We will maintain counts of how many mismatches
690
692
  // happened
691
693
  if (next_ikey.type != kTypeValue &&
692
- next_ikey.type != kTypeBlobIndex) {
694
+ next_ikey.type != kTypeBlobIndex &&
695
+ next_ikey.type != kTypeWideColumnEntity) {
693
696
  ++iter_stats_.num_single_del_mismatch;
694
697
  }
695
698
 
@@ -981,6 +981,21 @@ TEST_F(CompactionIteratorWithSnapshotCheckerTest,
981
981
  2 /*earliest_write_conflict_snapshot*/);
982
982
  }
983
983
 
984
+ // Same as above but with a wide-column entity. In addition to the value getting
985
+ // trimmed, the type of the KV is changed to kTypeValue.
986
+ TEST_F(CompactionIteratorWithSnapshotCheckerTest,
987
+ KeepSingleDeletionForWriteConflictChecking_WideColumnEntity) {
988
+ AddSnapshot(2, 0);
989
+ RunTest({test::KeyStr("a", 2, kTypeSingleDeletion),
990
+ test::KeyStr("a", 1, kTypeWideColumnEntity)},
991
+ {"", "fake_entity"},
992
+ {test::KeyStr("a", 2, kTypeSingleDeletion),
993
+ test::KeyStr("a", 1, kTypeValue)},
994
+ {"", ""}, 2 /* last_committed_seq */, nullptr /* merge_operator */,
995
+ nullptr /* compaction_filter */, false /* bottommost_level */,
996
+ 2 /* earliest_write_conflict_snapshot */);
997
+ }
998
+
984
999
  // Compaction filter should keep uncommitted key as-is, and
985
1000
  // * Convert the latest value to deletion, and/or
986
1001
  // * if latest value is a merge, apply filter to all subsequent merges.
@@ -191,7 +191,7 @@ struct CompactionJob::SubcompactionState {
191
191
  // The number of bytes overlapping between the current output and
192
192
  // grandparent files used in ShouldStopBefore().
193
193
  uint64_t overlapped_bytes = 0;
194
- // A flag determine whether the key has been seen in ShouldStopBefore()
194
+ // A flag determines whether the key has been seen in ShouldStopBefore()
195
195
  bool seen_key = false;
196
196
  // sub compaction job id, which is used to identify different sub-compaction
197
197
  // within the same compaction job.
@@ -201,6 +201,9 @@ struct CompactionJob::SubcompactionState {
201
201
  // sub-compaction begin.
202
202
  bool notify_on_subcompaction_completion = false;
203
203
 
204
+ // A flag determines if this subcompaction has been split by the cursor
205
+ bool is_split = false;
206
+
204
207
  SubcompactionState(Compaction* c, Slice* _start, Slice* _end, uint64_t size,
205
208
  uint32_t _sub_job_id)
206
209
  : compaction(c),
@@ -234,6 +237,23 @@ struct CompactionJob::SubcompactionState {
234
237
  &compaction->column_family_data()->internal_comparator();
235
238
  const std::vector<FileMetaData*>& grandparents = compaction->grandparents();
236
239
 
240
+ const InternalKey output_split_key = compaction->GetOutputSplitKey();
241
+ if (output_split_key.Valid() && !is_split) {
242
+ // Invalid output_split_key indicates that we do not need to split
243
+ if ((end == nullptr || icmp->user_comparator()->Compare(
244
+ ExtractUserKey(output_split_key.Encode()),
245
+ ExtractUserKey(*end)) < 0) &&
246
+ (start == nullptr || icmp->user_comparator()->Compare(
247
+ ExtractUserKey(output_split_key.Encode()),
248
+ ExtractUserKey(*start)) > 0)) {
249
+ // We may only split the output when the cursor is in the range. Split
250
+ // occurs when the next key is larger than/equal to the cursor
251
+ if (icmp->Compare(internal_key, output_split_key.Encode()) >= 0) {
252
+ is_split = true;
253
+ return true;
254
+ }
255
+ }
256
+ }
237
257
  bool grandparant_file_switched = false;
238
258
  // Scan to find earliest grandparent file that contains key.
239
259
  while (grandparent_index < grandparents.size() &&
@@ -621,6 +641,16 @@ void CompactionJob::GenSubcompactionBoundaries() {
621
641
  }
622
642
  }
623
643
 
644
+ Slice output_split_user_key;
645
+ const InternalKey output_split_key = c->GetOutputSplitKey();
646
+ if (output_split_key.Valid()) {
647
+ output_split_user_key = ExtractUserKey(output_split_key.Encode());
648
+ bounds.emplace_back(output_split_key.Encode());
649
+ } else {
650
+ // Empty user key indicates that splitting is not required here
651
+ output_split_user_key = Slice();
652
+ }
653
+
624
654
  std::sort(bounds.begin(), bounds.end(),
625
655
  [cfd_comparator](const Slice& a, const Slice& b) -> bool {
626
656
  return cfd_comparator->Compare(ExtractUserKey(a),
@@ -691,7 +721,10 @@ void CompactionJob::GenSubcompactionBoundaries() {
691
721
  // need to put an end boundary
692
722
  continue;
693
723
  }
694
- if (sum >= mean) {
724
+ if (sum >= mean ||
725
+ (!output_split_user_key.empty() &&
726
+ cfd_comparator->Compare(ExtractUserKey(ranges[i].range.limit),
727
+ output_split_user_key) == 0)) {
695
728
  boundaries_.emplace_back(ExtractUserKey(ranges[i].range.limit));
696
729
  sizes_.emplace_back(sum);
697
730
  subcompactions--;
@@ -1349,6 +1349,61 @@ TEST_F(CompactionPickerTest, CompactionPriRoundRobin) {
1349
1349
  }
1350
1350
  }
1351
1351
 
1352
+ TEST_F(CompactionPickerTest, CompactionPriMinOverlappingManyFiles) {
1353
+ NewVersionStorage(6, kCompactionStyleLevel);
1354
+ ioptions_.compaction_pri = kMinOverlappingRatio;
1355
+ mutable_cf_options_.max_bytes_for_level_base = 15000000;
1356
+ mutable_cf_options_.max_bytes_for_level_multiplier = 10;
1357
+
1358
+ // file 7 and 8 over lap with the same file, but file 8 is smaller so
1359
+ // it will be picked.
1360
+ Add(2, 13U, "010", "011",
1361
+ 6100U); // Overlaps with a large file. Not picked
1362
+ Add(2, 14U, "020", "021",
1363
+ 6100U); // Overlaps with a large file. Not picked
1364
+ Add(2, 15U, "030", "031",
1365
+ 6100U); // Overlaps with a large file. Not picked
1366
+ Add(2, 16U, "040", "041",
1367
+ 6100U); // Overlaps with a large file. Not picked
1368
+ Add(2, 17U, "050", "051",
1369
+ 6100U); // Overlaps with a large file. Not picked
1370
+ Add(2, 18U, "060", "061",
1371
+ 6100U); // Overlaps with a large file. Not picked
1372
+ Add(2, 19U, "070", "071",
1373
+ 6100U); // Overlaps with a large file. Not picked
1374
+ Add(2, 20U, "080", "081",
1375
+ 6100U); // Overlaps with a large file. Not picked
1376
+
1377
+ Add(2, 6U, "150", "167", 60000000U); // Overlaps with file 26, 27
1378
+ Add(2, 7U, "168", "169", 60000000U); // Overlaps with file 27
1379
+ Add(2, 8U, "201", "300", 61000000U); // Overlaps with file 28, but the file
1380
+ // itself is larger. Should be picked.
1381
+ Add(2, 9U, "610", "611",
1382
+ 6100U); // Overlaps with a large file. Not picked
1383
+ Add(2, 10U, "620", "621",
1384
+ 6100U); // Overlaps with a large file. Not picked
1385
+ Add(2, 11U, "630", "631",
1386
+ 6100U); // Overlaps with a large file. Not picked
1387
+ Add(2, 12U, "640", "641",
1388
+ 6100U); // Overlaps with a large file. Not picked
1389
+
1390
+ Add(3, 31U, "001", "100", 260000000U);
1391
+ Add(3, 26U, "160", "165", 260000000U);
1392
+ Add(3, 27U, "166", "170", 260000000U);
1393
+ Add(3, 28U, "180", "400", 260000000U);
1394
+ Add(3, 29U, "401", "500", 260000000U);
1395
+ Add(3, 30U, "601", "700", 260000000U);
1396
+ UpdateVersionStorageInfo();
1397
+
1398
+ std::unique_ptr<Compaction> compaction(level_compaction_picker.PickCompaction(
1399
+ cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
1400
+ &log_buffer_));
1401
+ ASSERT_TRUE(compaction.get() != nullptr);
1402
+ ASSERT_EQ(1U, compaction->num_input_files(0));
1403
+ // Picking file 8 because overlapping ratio is the biggest.
1404
+ ASSERT_EQ(8U, compaction->input(0, 0)->fd.GetNumber());
1405
+ }
1406
+
1352
1407
  // This test exhibits the bug where we don't properly reset parent_index in
1353
1408
  // PickCompaction()
1354
1409
  TEST_F(CompactionPickerTest, ParentIndexResetBug) {