@nxtedition/rocksdb 7.0.12 → 7.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
  2. package/deps/rocksdb/rocksdb/Makefile +3 -0
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
  5. package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
  6. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
  7. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
  8. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
  9. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
  11. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
  13. package/deps/rocksdb/rocksdb/db/c.cc +159 -5
  14. package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
  15. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
  16. package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
  24. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
  26. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
  36. package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
  37. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
  38. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
  39. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
  41. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
  42. package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
  43. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  44. package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
  45. package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
  46. package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
  47. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
  49. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  50. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
  51. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
  52. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
  53. package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
  54. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  55. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  56. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
  59. package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
  60. package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
  61. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  62. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
  63. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  66. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
  68. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  69. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
  70. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
  72. package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
  73. package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
  74. package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
  75. package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
  76. package/deps/rocksdb/rocksdb/options/options.cc +4 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
  78. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  79. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
  80. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
  81. package/deps/rocksdb/rocksdb/src.mk +1 -0
  82. package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
  88. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
  90. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
  91. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
  92. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
  93. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
  94. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
  95. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
  96. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
  97. package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
  98. package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
  99. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
  100. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
  101. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
  102. package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
  103. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  104. package/package.json +1 -1
  105. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -865,23 +865,6 @@ struct DBOptions {
865
865
  // Default: true
866
866
  bool advise_random_on_open = true;
867
867
 
868
- // [experimental]
869
- // Used to activate or deactive the Mempurge feature (memtable garbage
870
- // collection). (deactivated by default). At every flush, the total useful
871
- // payload (total entries minus garbage entries) is estimated as a ratio
872
- // [useful payload bytes]/[size of a memtable (in bytes)]. This ratio is then
873
- // compared to this `threshold` value:
874
- // - if ratio<threshold: the flush is replaced by a mempurge operation
875
- // - else: a regular flush operation takes place.
876
- // Threshold values:
877
- // 0.0: mempurge deactivated (default).
878
- // 1.0: recommended threshold value.
879
- // >1.0 : aggressive mempurge.
880
- // 0 < threshold < 1.0: mempurge triggered only for very low useful payload
881
- // ratios.
882
- // [experimental]
883
- double experimental_mempurge_threshold = 0.0;
884
-
885
868
  // Amount of data to build up in memtables across all column
886
869
  // families before writing to disk.
887
870
  //
@@ -1274,13 +1257,30 @@ struct DBOptions {
1274
1257
  // Default: nullptr
1275
1258
  std::shared_ptr<FileChecksumGenFactory> file_checksum_gen_factory = nullptr;
1276
1259
 
1277
- // By default, RocksDB recovery fails if any table file referenced in
1278
- // MANIFEST are missing after scanning the MANIFEST.
1279
- // Best-efforts recovery is another recovery mode that
1280
- // tries to restore the database to the most recent point in time without
1281
- // missing file.
1282
- // Currently not compatible with atomic flush. Furthermore, WAL files will
1260
+ // By default, RocksDB recovery fails if any table/blob file referenced in
1261
+ // MANIFEST are missing after scanning the MANIFEST pointed to by the
1262
+ // CURRENT file.
1263
+ // Best-efforts recovery is another recovery mode that tolerates missing or
1264
+ // corrupted table or blob files.
1265
+ // Best-efforts recovery does not need a valid CURRENT file, and tries to
1266
+ // recover the database using one of the available MANIFEST files in the db
1267
+ // directory.
1268
+ // Best-efforts recovery recovers database to a state in which the database
1269
+ // includes only table and blob files whose actual sizes match the
1270
+ // information in the chosen MANIFEST without holes in the history.
1271
+ // Best-efforts recovery tries the available MANIFEST files from high file
1272
+ // numbers (newer) to low file numbers (older), and stops after finding the
1273
+ // first MANIFEST file from which the db can be recovered to a state without
1274
+ // invalid (missing/file-mismatch) table and blob files.
1275
+ // It is possible that the database can be restored to an empty state with no
1276
+ // table or blob files.
1277
+ // Regardless of this option, the IDENTITY file is updated if needed during
1278
+ // recovery to match the DB ID in the MANIFEST (if previously using
1279
+ // write_dbid_to_manifest) or to be in some valid state (non-empty DB ID).
1280
+ // Currently, not compatible with atomic flush. Furthermore, WAL files will
1283
1281
  // not be used for recovery if best_efforts_recovery is true.
1282
+ // Also requires either 1) LOCK file exists or 2) underlying env's LockFile()
1283
+ // call returns ok even for non-existing LOCK file.
1284
1284
  // Default: false
1285
1285
  bool best_efforts_recovery = false;
1286
1286
 
@@ -85,6 +85,13 @@ class StackableDB : public DB {
85
85
  return db_->Put(options, column_family, key, ts, val);
86
86
  }
87
87
 
88
+ using DB::PutEntity;
89
+ Status PutEntity(const WriteOptions& options,
90
+ ColumnFamilyHandle* column_family, const Slice& key,
91
+ const WideColumns& columns) override {
92
+ return db_->PutEntity(options, column_family, key, columns);
93
+ }
94
+
88
95
  using DB::Get;
89
96
  virtual Status Get(const ReadOptions& options,
90
97
  ColumnFamilyHandle* column_family, const Slice& key,
@@ -113,6 +113,17 @@ class WriteBatchWithIndex : public WriteBatchBase {
113
113
  Status Put(ColumnFamilyHandle* column_family, const Slice& key,
114
114
  const Slice& ts, const Slice& value) override;
115
115
 
116
+ Status PutEntity(ColumnFamilyHandle* column_family, const Slice& /* key */,
117
+ const WideColumns& /* columns */) override {
118
+ if (!column_family) {
119
+ return Status::InvalidArgument(
120
+ "Cannot call this method without a column family handle");
121
+ }
122
+
123
+ return Status::NotSupported(
124
+ "PutEntity not supported by WriteBatchWithIndex");
125
+ }
126
+
116
127
  using WriteBatchBase::Merge;
117
128
  Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
118
129
  const Slice& value) override;
@@ -100,6 +100,11 @@ class WriteBatch : public WriteBatchBase {
100
100
  return Put(nullptr, key, value);
101
101
  }
102
102
 
103
+ // UNDER CONSTRUCTION -- DO NOT USE
104
+ using WriteBatchBase::PutEntity;
105
+ Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key,
106
+ const WideColumns& columns) override;
107
+
103
108
  using WriteBatchBase::Delete;
104
109
  // If the database contains a mapping for "key", erase it. Else do nothing.
105
110
  // The following Delete(..., const Slice& key) can be used when user-defined
@@ -240,6 +245,12 @@ class WriteBatch : public WriteBatchBase {
240
245
  }
241
246
  virtual void Put(const Slice& /*key*/, const Slice& /*value*/) {}
242
247
 
248
+ virtual Status PutEntityCF(uint32_t /* column_family_id */,
249
+ const Slice& /* key */,
250
+ const Slice& /* entity */) {
251
+ return Status::NotSupported("PutEntityCF not implemented");
252
+ }
253
+
243
254
  virtual Status DeleteCF(uint32_t column_family_id, const Slice& key) {
244
255
  if (column_family_id == 0) {
245
256
  Delete(key);
@@ -346,6 +357,9 @@ class WriteBatch : public WriteBatchBase {
346
357
  // Returns true if PutCF will be called during Iterate
347
358
  bool HasPut() const;
348
359
 
360
+ // Returns true if PutEntityCF will be called during Iterate
361
+ bool HasPutEntity() const;
362
+
349
363
  // Returns true if DeleteCF will be called during Iterate
350
364
  bool HasDelete() const;
351
365
 
@@ -11,6 +11,7 @@
11
11
  #include <cstddef>
12
12
 
13
13
  #include "rocksdb/rocksdb_namespace.h"
14
+ #include "rocksdb/wide_columns.h"
14
15
 
15
16
  namespace ROCKSDB_NAMESPACE {
16
17
 
@@ -41,6 +42,10 @@ class WriteBatchBase {
41
42
  const SliceParts& value);
42
43
  virtual Status Put(const SliceParts& key, const SliceParts& value);
43
44
 
45
+ // UNDER CONSTRUCTION -- DO NOT USE
46
+ virtual Status PutEntity(ColumnFamilyHandle* column_family, const Slice& key,
47
+ const WideColumns& columns) = 0;
48
+
44
49
  // Merge "value" with the existing value of "key" in the database.
45
50
  // "key->merge(existing, value)"
46
51
  virtual Status Merge(ColumnFamilyHandle* column_family, const Slice& key,
@@ -464,6 +464,10 @@ static std::unordered_map<std::string, OptionTypeInfo>
464
464
  offsetof(struct MutableCFOptions, compression_per_level),
465
465
  OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
466
466
  {0, OptionType::kCompressionType})},
467
+ {"experimental_mempurge_threshold",
468
+ {offsetof(struct MutableCFOptions, experimental_mempurge_threshold),
469
+ OptionType::kDouble, OptionVerificationType::kNormal,
470
+ OptionTypeFlags::kMutable}},
467
471
  {kOptNameCompOpts,
468
472
  OptionTypeInfo::Struct(
469
473
  kOptNameCompOpts, &compression_options_type_info,
@@ -1037,6 +1041,9 @@ void MutableCFOptions::Dump(Logger* log) const {
1037
1041
  report_bg_io_stats);
1038
1042
  ROCKS_LOG_INFO(log, " compression: %d",
1039
1043
  static_cast<int>(compression));
1044
+ ROCKS_LOG_INFO(log,
1045
+ " experimental_mempurge_threshold: %f",
1046
+ experimental_mempurge_threshold);
1040
1047
 
1041
1048
  // Universal Compaction Options
1042
1049
  ROCKS_LOG_INFO(log, "compaction_options_universal.size_ratio : %d",
@@ -112,6 +112,8 @@ struct MutableCFOptions {
112
112
  max_successive_merges(options.max_successive_merges),
113
113
  inplace_update_num_locks(options.inplace_update_num_locks),
114
114
  prefix_extractor(options.prefix_extractor),
115
+ experimental_mempurge_threshold(
116
+ options.experimental_mempurge_threshold),
115
117
  disable_auto_compactions(options.disable_auto_compactions),
116
118
  soft_pending_compaction_bytes_limit(
117
119
  options.soft_pending_compaction_bytes_limit),
@@ -170,6 +172,7 @@ struct MutableCFOptions {
170
172
  max_successive_merges(0),
171
173
  inplace_update_num_locks(0),
172
174
  prefix_extractor(nullptr),
175
+ experimental_mempurge_threshold(0.0),
173
176
  disable_auto_compactions(false),
174
177
  soft_pending_compaction_bytes_limit(0),
175
178
  hard_pending_compaction_bytes_limit(0),
@@ -231,6 +234,22 @@ struct MutableCFOptions {
231
234
  size_t max_successive_merges;
232
235
  size_t inplace_update_num_locks;
233
236
  std::shared_ptr<const SliceTransform> prefix_extractor;
237
+ // [experimental]
238
+ // Used to activate or deactive the Mempurge feature (memtable garbage
239
+ // collection). (deactivated by default). At every flush, the total useful
240
+ // payload (total entries minus garbage entries) is estimated as a ratio
241
+ // [useful payload bytes]/[size of a memtable (in bytes)]. This ratio is then
242
+ // compared to this `threshold` value:
243
+ // - if ratio<threshold: the flush is replaced by a mempurge operation
244
+ // - else: a regular flush operation takes place.
245
+ // Threshold values:
246
+ // 0.0: mempurge deactivated (default).
247
+ // 1.0: recommended threshold value.
248
+ // >1.0 : aggressive mempurge.
249
+ // 0 < threshold < 1.0: mempurge triggered only for very low useful payload
250
+ // ratios.
251
+ // [experimental]
252
+ double experimental_mempurge_threshold;
234
253
 
235
254
  // Compaction related options
236
255
  bool disable_auto_compactions;
@@ -208,8 +208,7 @@ static std::unordered_map<std::string, OptionTypeInfo>
208
208
  {0, OptionType::kString, OptionVerificationType::kDeprecated,
209
209
  OptionTypeFlags::kNone}},
210
210
  {"experimental_mempurge_threshold",
211
- {offsetof(struct ImmutableDBOptions, experimental_mempurge_threshold),
212
- OptionType::kDouble, OptionVerificationType::kNormal,
211
+ {0, OptionType::kDouble, OptionVerificationType::kDeprecated,
213
212
  OptionTypeFlags::kNone}},
214
213
  {"is_fd_close_on_exec",
215
214
  {offsetof(struct ImmutableDBOptions, is_fd_close_on_exec),
@@ -716,7 +715,6 @@ ImmutableDBOptions::ImmutableDBOptions(const DBOptions& options)
716
715
  allow_fallocate(options.allow_fallocate),
717
716
  is_fd_close_on_exec(options.is_fd_close_on_exec),
718
717
  advise_random_on_open(options.advise_random_on_open),
719
- experimental_mempurge_threshold(options.experimental_mempurge_threshold),
720
718
  db_write_buffer_size(options.db_write_buffer_size),
721
719
  write_buffer_manager(options.write_buffer_manager),
722
720
  access_hint_on_compaction_start(options.access_hint_on_compaction_start),
@@ -847,9 +845,6 @@ void ImmutableDBOptions::Dump(Logger* log) const {
847
845
  is_fd_close_on_exec);
848
846
  ROCKS_LOG_HEADER(log, " Options.advise_random_on_open: %d",
849
847
  advise_random_on_open);
850
- ROCKS_LOG_HEADER(
851
- log, " Options.experimental_mempurge_threshold: %f",
852
- experimental_mempurge_threshold);
853
848
  ROCKS_LOG_HEADER(
854
849
  log, " Options.db_write_buffer_size: %" ROCKSDB_PRIszt,
855
850
  db_write_buffer_size);
@@ -58,7 +58,6 @@ struct ImmutableDBOptions {
58
58
  bool allow_fallocate;
59
59
  bool is_fd_close_on_exec;
60
60
  bool advise_random_on_open;
61
- double experimental_mempurge_threshold;
62
61
  size_t db_write_buffer_size;
63
62
  std::shared_ptr<WriteBufferManager> write_buffer_manager;
64
63
  DBOptions::AccessHint access_hint_on_compaction_start;
@@ -49,6 +49,7 @@ AdvancedColumnFamilyOptions::AdvancedColumnFamilyOptions(const Options& options)
49
49
  options.max_write_buffer_size_to_maintain),
50
50
  inplace_update_support(options.inplace_update_support),
51
51
  inplace_update_num_locks(options.inplace_update_num_locks),
52
+ experimental_mempurge_threshold(options.experimental_mempurge_threshold),
52
53
  inplace_callback(options.inplace_callback),
53
54
  memtable_prefix_bloom_size_ratio(
54
55
  options.memtable_prefix_bloom_size_ratio),
@@ -424,12 +425,14 @@ void ColumnFamilyOptions::Dump(Logger* log) const {
424
425
  ROCKS_LOG_HEADER(log, " blob_cache options: %s",
425
426
  blob_cache->GetPrintableOptions().c_str());
426
427
  }
428
+ ROCKS_LOG_HEADER(log, "Options.experimental_mempurge_threshold: %f",
429
+ experimental_mempurge_threshold);
427
430
  } // ColumnFamilyOptions::Dump
428
431
 
429
432
  void Options::Dump(Logger* log) const {
430
433
  DBOptions::Dump(log);
431
434
  ColumnFamilyOptions::Dump(log);
432
- } // Options::Dump
435
+ } // Options::Dump
433
436
 
434
437
  void Options::DumpCFOptions(Logger* log) const {
435
438
  ColumnFamilyOptions::Dump(log);
@@ -212,6 +212,8 @@ void UpdateColumnFamilyOptions(const MutableCFOptions& moptions,
212
212
  cf_opts->max_successive_merges = moptions.max_successive_merges;
213
213
  cf_opts->inplace_update_num_locks = moptions.inplace_update_num_locks;
214
214
  cf_opts->prefix_extractor = moptions.prefix_extractor;
215
+ cf_opts->experimental_mempurge_threshold =
216
+ moptions.experimental_mempurge_threshold;
215
217
 
216
218
  // Compaction related options
217
219
  cf_opts->disable_auto_compactions = moptions.disable_auto_compactions;
@@ -503,6 +503,7 @@ TEST_F(OptionsSettableTest, ColumnFamilyOptionsAllFieldsSettable) {
503
503
  "paranoid_file_checks=true;"
504
504
  "force_consistency_checks=true;"
505
505
  "inplace_update_num_locks=7429;"
506
+ "experimental_mempurge_threshold=0.0001;"
506
507
  "optimize_filters_for_hits=false;"
507
508
  "level_compaction_dynamic_level_bytes=false;"
508
509
  "inplace_update_support=false;"
@@ -116,6 +116,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
116
116
  {"max_successive_merges", "30"},
117
117
  {"min_partial_merge_operands", "31"},
118
118
  {"prefix_extractor", "fixed:31"},
119
+ {"experimental_mempurge_threshold", "0.003"},
119
120
  {"optimize_filters_for_hits", "true"},
120
121
  {"enable_blob_files", "true"},
121
122
  {"min_blob_size", "1K"},
@@ -164,7 +165,6 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
164
165
  {"persist_stats_to_disk", "false"},
165
166
  {"stats_history_buffer_size", "69"},
166
167
  {"advise_random_on_open", "true"},
167
- {"experimental_mempurge_threshold", "0.0"},
168
168
  {"use_adaptive_mutex", "false"},
169
169
  {"compaction_readahead_size", "100"},
170
170
  {"random_access_max_buffer_size", "3145728"},
@@ -256,6 +256,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
256
256
  ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr);
257
257
  ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true);
258
258
  ASSERT_EQ(new_cf_opt.prefix_extractor->AsString(), "rocksdb.FixedPrefix.31");
259
+ ASSERT_EQ(new_cf_opt.experimental_mempurge_threshold, 0.003);
259
260
  ASSERT_EQ(new_cf_opt.enable_blob_files, true);
260
261
  ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10);
261
262
  ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30);
@@ -329,7 +330,6 @@ TEST_F(OptionsTest, GetOptionsFromMapTest) {
329
330
  ASSERT_EQ(new_db_opt.persist_stats_to_disk, false);
330
331
  ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U);
331
332
  ASSERT_EQ(new_db_opt.advise_random_on_open, true);
332
- ASSERT_EQ(new_db_opt.experimental_mempurge_threshold, 0.0);
333
333
  ASSERT_EQ(new_db_opt.use_adaptive_mutex, false);
334
334
  ASSERT_EQ(new_db_opt.compaction_readahead_size, 100);
335
335
  ASSERT_EQ(new_db_opt.random_access_max_buffer_size, 3145728);
@@ -2345,6 +2345,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
2345
2345
  {"max_successive_merges", "30"},
2346
2346
  {"min_partial_merge_operands", "31"},
2347
2347
  {"prefix_extractor", "fixed:31"},
2348
+ {"experimental_mempurge_threshold", "0.003"},
2348
2349
  {"optimize_filters_for_hits", "true"},
2349
2350
  {"enable_blob_files", "true"},
2350
2351
  {"min_blob_size", "1K"},
@@ -2393,7 +2394,6 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
2393
2394
  {"persist_stats_to_disk", "false"},
2394
2395
  {"stats_history_buffer_size", "69"},
2395
2396
  {"advise_random_on_open", "true"},
2396
- {"experimental_mempurge_threshold", "0.0"},
2397
2397
  {"use_adaptive_mutex", "false"},
2398
2398
  {"compaction_readahead_size", "100"},
2399
2399
  {"random_access_max_buffer_size", "3145728"},
@@ -2479,6 +2479,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
2479
2479
  ASSERT_TRUE(new_cf_opt.prefix_extractor != nullptr);
2480
2480
  ASSERT_EQ(new_cf_opt.optimize_filters_for_hits, true);
2481
2481
  ASSERT_EQ(new_cf_opt.prefix_extractor->AsString(), "rocksdb.FixedPrefix.31");
2482
+ ASSERT_EQ(new_cf_opt.experimental_mempurge_threshold, 0.003);
2482
2483
  ASSERT_EQ(new_cf_opt.enable_blob_files, true);
2483
2484
  ASSERT_EQ(new_cf_opt.min_blob_size, 1ULL << 10);
2484
2485
  ASSERT_EQ(new_cf_opt.blob_file_size, 1ULL << 30);
@@ -2553,7 +2554,6 @@ TEST_F(OptionsOldApiTest, GetOptionsFromMapTest) {
2553
2554
  ASSERT_EQ(new_db_opt.persist_stats_to_disk, false);
2554
2555
  ASSERT_EQ(new_db_opt.stats_history_buffer_size, 69U);
2555
2556
  ASSERT_EQ(new_db_opt.advise_random_on_open, true);
2556
- ASSERT_EQ(new_db_opt.experimental_mempurge_threshold, 0.0);
2557
2557
  ASSERT_EQ(new_db_opt.use_adaptive_mutex, false);
2558
2558
  ASSERT_EQ(new_db_opt.compaction_readahead_size, 100);
2559
2559
  ASSERT_EQ(new_db_opt.random_access_max_buffer_size, 3145728);
@@ -410,7 +410,7 @@ IOStatus WinFileSystem::OpenWritableFile(
410
410
  if (INVALID_HANDLE_VALUE == hFile) {
411
411
  auto lastError = GetLastError();
412
412
  return IOErrorFromWindowsError(
413
- "Failed to create a NewWriteableFile: " + fname, lastError);
413
+ "Failed to create a NewWritableFile: " + fname, lastError);
414
414
  }
415
415
 
416
416
  // We will start writing at the end, appending
@@ -503,6 +503,7 @@ TEST_MAIN_SOURCES = \
503
503
  db/version_edit_test.cc \
504
504
  db/version_set_test.cc \
505
505
  db/wal_manager_test.cc \
506
+ db/wide/db_wide_basic_test.cc \
506
507
  db/wide/wide_column_serialization_test.cc \
507
508
  db/write_batch_test.cc \
508
509
  db/write_callback_test.cc \
@@ -307,10 +307,11 @@ void MetaBlockIter::SeekImpl(const Slice& target) {
307
307
  // target = "seek_user_key @ type | seqno".
308
308
  //
309
309
  // For any type other than kTypeValue, kTypeDeletion, kTypeSingleDeletion,
310
- // or kTypeBlobIndex, this function behaves identically as Seek().
310
+ // kTypeBlobIndex, or kTypeWideColumnEntity, this function behaves identically
311
+ // to Seek().
311
312
  //
312
313
  // For any type in kTypeValue, kTypeDeletion, kTypeSingleDeletion,
313
- // or kTypeBlobIndex:
314
+ // kTypeBlobIndex, or kTypeWideColumnEntity:
314
315
  //
315
316
  // If the return value is FALSE, iter location is undefined, and it means:
316
317
  // 1) there is no key in this block falling into the range:
@@ -412,7 +413,8 @@ bool DataBlockIter::SeekForGetImpl(const Slice& target) {
412
413
  if (value_type != ValueType::kTypeValue &&
413
414
  value_type != ValueType::kTypeDeletion &&
414
415
  value_type != ValueType::kTypeSingleDeletion &&
415
- value_type != ValueType::kTypeBlobIndex) {
416
+ value_type != ValueType::kTypeBlobIndex &&
417
+ value_type != ValueType::kTypeWideColumnEntity) {
416
418
  SeekImpl(target);
417
419
  return true;
418
420
  }
@@ -387,7 +387,7 @@ static std::unordered_map<std::string, OptionTypeInfo>
387
387
  {offsetof(struct BlockBasedTableOptions, block_cache),
388
388
  OptionType::kUnknown, OptionVerificationType::kNormal,
389
389
  (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
390
- // Parses the input vsalue as a Cache
390
+ // Parses the input value as a Cache
391
391
  [](const ConfigOptions& opts, const std::string&,
392
392
  const std::string& value, void* addr) {
393
393
  auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
@@ -397,7 +397,7 @@ static std::unordered_map<std::string, OptionTypeInfo>
397
397
  {offsetof(struct BlockBasedTableOptions, block_cache_compressed),
398
398
  OptionType::kUnknown, OptionVerificationType::kNormal,
399
399
  (OptionTypeFlags::kCompareNever | OptionTypeFlags::kDontSerialize),
400
- // Parses the input vsalue as a Cache
400
+ // Parses the input value as a Cache
401
401
  [](const ConfigOptions& opts, const std::string&,
402
402
  const std::string& value, void* addr) {
403
403
  auto* cache = static_cast<std::shared_ptr<Cache>*>(addr);
@@ -1902,7 +1902,8 @@ bool BlockBasedTable::PrefixRangeMayMatch(
1902
1902
  may_match = filter->RangeMayExist(
1903
1903
  read_options.iterate_upper_bound, user_key_without_ts, prefix_extractor,
1904
1904
  rep_->internal_comparator.user_comparator(), const_ikey_ptr,
1905
- &filter_checked, need_upper_bound_check, no_io, lookup_context);
1905
+ &filter_checked, need_upper_bound_check, no_io, lookup_context,
1906
+ read_options.rate_limiter_priority);
1906
1907
  }
1907
1908
 
1908
1909
  if (filter_checked) {
@@ -1974,7 +1975,8 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator(
1974
1975
  bool BlockBasedTable::FullFilterKeyMayMatch(
1975
1976
  FilterBlockReader* filter, const Slice& internal_key, const bool no_io,
1976
1977
  const SliceTransform* prefix_extractor, GetContext* get_context,
1977
- BlockCacheLookupContext* lookup_context) const {
1978
+ BlockCacheLookupContext* lookup_context,
1979
+ Env::IOPriority rate_limiter_priority) const {
1978
1980
  if (filter == nullptr) {
1979
1981
  return true;
1980
1982
  }
@@ -1984,13 +1986,15 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
1984
1986
  size_t ts_sz = rep_->internal_comparator.user_comparator()->timestamp_size();
1985
1987
  Slice user_key_without_ts = StripTimestampFromUserKey(user_key, ts_sz);
1986
1988
  if (rep_->whole_key_filtering) {
1987
- may_match = filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr,
1988
- get_context, lookup_context);
1989
+ may_match =
1990
+ filter->KeyMayMatch(user_key_without_ts, no_io, const_ikey_ptr,
1991
+ get_context, lookup_context, rate_limiter_priority);
1989
1992
  } else if (!PrefixExtractorChanged(prefix_extractor) &&
1990
1993
  prefix_extractor->InDomain(user_key_without_ts) &&
1991
1994
  !filter->PrefixMayMatch(
1992
1995
  prefix_extractor->Transform(user_key_without_ts), no_io,
1993
- const_ikey_ptr, get_context, lookup_context)) {
1996
+ const_ikey_ptr, get_context, lookup_context,
1997
+ rate_limiter_priority)) {
1994
1998
  // FIXME ^^^: there should be no reason for Get() to depend on current
1995
1999
  // prefix_extractor at all. It should always use table_prefix_extractor.
1996
2000
  may_match = false;
@@ -2005,14 +2009,15 @@ bool BlockBasedTable::FullFilterKeyMayMatch(
2005
2009
  void BlockBasedTable::FullFilterKeysMayMatch(
2006
2010
  FilterBlockReader* filter, MultiGetRange* range, const bool no_io,
2007
2011
  const SliceTransform* prefix_extractor,
2008
- BlockCacheLookupContext* lookup_context) const {
2012
+ BlockCacheLookupContext* lookup_context,
2013
+ Env::IOPriority rate_limiter_priority) const {
2009
2014
  if (filter == nullptr) {
2010
2015
  return;
2011
2016
  }
2012
2017
  uint64_t before_keys = range->KeysLeft();
2013
2018
  assert(before_keys > 0); // Caller should ensure
2014
2019
  if (rep_->whole_key_filtering) {
2015
- filter->KeysMayMatch(range, no_io, lookup_context);
2020
+ filter->KeysMayMatch(range, no_io, lookup_context, rate_limiter_priority);
2016
2021
  uint64_t after_keys = range->KeysLeft();
2017
2022
  if (after_keys) {
2018
2023
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_FULL_POSITIVE, after_keys);
@@ -2028,7 +2033,8 @@ void BlockBasedTable::FullFilterKeysMayMatch(
2028
2033
  } else if (!PrefixExtractorChanged(prefix_extractor)) {
2029
2034
  // FIXME ^^^: there should be no reason for MultiGet() to depend on current
2030
2035
  // prefix_extractor at all. It should always use table_prefix_extractor.
2031
- filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context);
2036
+ filter->PrefixesMayMatch(range, prefix_extractor, false, lookup_context,
2037
+ rate_limiter_priority);
2032
2038
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_PREFIX_CHECKED, before_keys);
2033
2039
  uint64_t after_keys = range->KeysLeft();
2034
2040
  uint64_t filtered_keys = before_keys - after_keys;
@@ -2065,7 +2071,8 @@ Status BlockBasedTable::Get(const ReadOptions& read_options, const Slice& key,
2065
2071
  }
2066
2072
  TEST_SYNC_POINT("BlockBasedTable::Get:BeforeFilterMatch");
2067
2073
  const bool may_match = FullFilterKeyMayMatch(
2068
- filter, key, no_io, prefix_extractor, get_context, &lookup_context);
2074
+ filter, key, no_io, prefix_extractor, get_context, &lookup_context,
2075
+ read_options.rate_limiter_priority);
2069
2076
  TEST_SYNC_POINT("BlockBasedTable::Get:AfterFilterMatch");
2070
2077
  if (!may_match) {
2071
2078
  RecordTick(rep_->ioptions.stats, BLOOM_FILTER_USEFUL);
@@ -454,12 +454,14 @@ class BlockBasedTable : public TableReader {
454
454
  const bool no_io,
455
455
  const SliceTransform* prefix_extractor,
456
456
  GetContext* get_context,
457
- BlockCacheLookupContext* lookup_context) const;
457
+ BlockCacheLookupContext* lookup_context,
458
+ Env::IOPriority rate_limiter_priority) const;
458
459
 
459
460
  void FullFilterKeysMayMatch(FilterBlockReader* filter, MultiGetRange* range,
460
461
  const bool no_io,
461
462
  const SliceTransform* prefix_extractor,
462
- BlockCacheLookupContext* lookup_context) const;
463
+ BlockCacheLookupContext* lookup_context,
464
+ Env::IOPriority rate_limiter_priority) const;
463
465
 
464
466
  // If force_direct_prefetch is true, always prefetching to RocksDB
465
467
  // buffer, rather than calling RandomAccessFile::Prefetch().
@@ -335,7 +335,7 @@ DEFINE_SYNC_AND_ASYNC(void, BlockBasedTable::MultiGet)
335
335
  TableReaderCaller::kUserMultiGet, tracing_mget_id,
336
336
  /*_get_from_user_specified_snapshot=*/read_options.snapshot != nullptr};
337
337
  FullFilterKeysMayMatch(filter, &sst_file_range, no_io, prefix_extractor,
338
- &lookup_context);
338
+ &lookup_context, read_options.rate_limiter_priority);
339
339
 
340
340
  if (!sst_file_range.empty()) {
341
341
  IndexBlockIter iiter_on_stack;
@@ -112,16 +112,18 @@ class FilterBlockReader {
112
112
  virtual bool KeyMayMatch(const Slice& key, const bool no_io,
113
113
  const Slice* const const_ikey_ptr,
114
114
  GetContext* get_context,
115
- BlockCacheLookupContext* lookup_context) = 0;
115
+ BlockCacheLookupContext* lookup_context,
116
+ Env::IOPriority rate_limiter_priority) = 0;
116
117
 
117
118
  virtual void KeysMayMatch(MultiGetRange* range, const bool no_io,
118
- BlockCacheLookupContext* lookup_context) {
119
+ BlockCacheLookupContext* lookup_context,
120
+ Env::IOPriority rate_limiter_priority) {
119
121
  for (auto iter = range->begin(); iter != range->end(); ++iter) {
120
122
  const Slice ukey_without_ts = iter->ukey_without_ts;
121
123
  const Slice ikey = iter->ikey;
122
124
  GetContext* const get_context = iter->get_context;
123
125
  if (!KeyMayMatch(ukey_without_ts, no_io, &ikey, get_context,
124
- lookup_context)) {
126
+ lookup_context, rate_limiter_priority)) {
125
127
  range->SkipKey(iter);
126
128
  }
127
129
  }
@@ -133,19 +135,22 @@ class FilterBlockReader {
133
135
  virtual bool PrefixMayMatch(const Slice& prefix, const bool no_io,
134
136
  const Slice* const const_ikey_ptr,
135
137
  GetContext* get_context,
136
- BlockCacheLookupContext* lookup_context) = 0;
138
+ BlockCacheLookupContext* lookup_context,
139
+ Env::IOPriority rate_limiter_priority) = 0;
137
140
 
138
141
  virtual void PrefixesMayMatch(MultiGetRange* range,
139
142
  const SliceTransform* prefix_extractor,
140
143
  const bool no_io,
141
- BlockCacheLookupContext* lookup_context) {
144
+ BlockCacheLookupContext* lookup_context,
145
+ Env::IOPriority rate_limiter_priority) {
142
146
  for (auto iter = range->begin(); iter != range->end(); ++iter) {
143
147
  const Slice ukey_without_ts = iter->ukey_without_ts;
144
148
  const Slice ikey = iter->ikey;
145
149
  GetContext* const get_context = iter->get_context;
146
150
  if (prefix_extractor->InDomain(ukey_without_ts) &&
147
151
  !PrefixMayMatch(prefix_extractor->Transform(ukey_without_ts), no_io,
148
- &ikey, get_context, lookup_context)) {
152
+ &ikey, get_context, lookup_context,
153
+ rate_limiter_priority)) {
149
154
  range->SkipKey(iter);
150
155
  }
151
156
  }
@@ -170,7 +175,8 @@ class FilterBlockReader {
170
175
  const Slice* const const_ikey_ptr,
171
176
  bool* filter_checked, bool need_upper_bound_check,
172
177
  bool no_io,
173
- BlockCacheLookupContext* lookup_context) = 0;
178
+ BlockCacheLookupContext* lookup_context,
179
+ Env::IOPriority rate_limiter_priority) = 0;
174
180
  };
175
181
 
176
182
  } // namespace ROCKSDB_NAMESPACE
@@ -67,7 +67,8 @@ template <typename TBlocklike>
67
67
  Status FilterBlockReaderCommon<TBlocklike>::GetOrReadFilterBlock(
68
68
  bool no_io, GetContext* get_context,
69
69
  BlockCacheLookupContext* lookup_context,
70
- CachableEntry<TBlocklike>* filter_block, BlockType block_type) const {
70
+ CachableEntry<TBlocklike>* filter_block, BlockType block_type,
71
+ Env::IOPriority rate_limiter_priority) const {
71
72
  assert(filter_block);
72
73
 
73
74
  if (!filter_block_.IsEmpty()) {
@@ -76,6 +77,7 @@ Status FilterBlockReaderCommon<TBlocklike>::GetOrReadFilterBlock(
76
77
  }
77
78
 
78
79
  ReadOptions read_options;
80
+ read_options.rate_limiter_priority = rate_limiter_priority;
79
81
  if (no_io) {
80
82
  read_options.read_tier = kBlockCacheTier;
81
83
  }
@@ -100,7 +102,8 @@ bool FilterBlockReaderCommon<TBlocklike>::RangeMayExist(
100
102
  const SliceTransform* prefix_extractor, const Comparator* comparator,
101
103
  const Slice* const const_ikey_ptr, bool* filter_checked,
102
104
  bool need_upper_bound_check, bool no_io,
103
- BlockCacheLookupContext* lookup_context) {
105
+ BlockCacheLookupContext* lookup_context,
106
+ Env::IOPriority rate_limiter_priority) {
104
107
  if (!prefix_extractor || !prefix_extractor->InDomain(user_key_without_ts)) {
105
108
  *filter_checked = false;
106
109
  return true;
@@ -113,7 +116,8 @@ bool FilterBlockReaderCommon<TBlocklike>::RangeMayExist(
113
116
  } else {
114
117
  *filter_checked = true;
115
118
  return PrefixMayMatch(prefix, no_io, const_ikey_ptr,
116
- /* get_context */ nullptr, lookup_context);
119
+ /* get_context */ nullptr, lookup_context,
120
+ rate_limiter_priority);
117
121
  }
118
122
  }
119
123
 
@@ -40,7 +40,8 @@ class FilterBlockReaderCommon : public FilterBlockReader {
40
40
  const Comparator* comparator,
41
41
  const Slice* const const_ikey_ptr, bool* filter_checked,
42
42
  bool need_upper_bound_check, bool no_io,
43
- BlockCacheLookupContext* lookup_context) override;
43
+ BlockCacheLookupContext* lookup_context,
44
+ Env::IOPriority rate_limiter_priority) override;
44
45
 
45
46
  protected:
46
47
  static Status ReadFilterBlock(const BlockBasedTable* table,
@@ -59,7 +60,8 @@ class FilterBlockReaderCommon : public FilterBlockReader {
59
60
  Status GetOrReadFilterBlock(bool no_io, GetContext* get_context,
60
61
  BlockCacheLookupContext* lookup_context,
61
62
  CachableEntry<TBlocklike>* filter_block,
62
- BlockType block_type) const;
63
+ BlockType block_type,
64
+ Env::IOPriority rate_limiter_priority) const;
63
65
 
64
66
  size_t ApproximateFilterBlockMemoryUsage() const;
65
67