@nxtedition/rocksdb 7.0.12 → 7.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -0
  2. package/deps/rocksdb/rocksdb/Makefile +3 -0
  3. package/deps/rocksdb/rocksdb/TARGETS +6 -0
  4. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +12 -7
  5. package/deps/rocksdb/rocksdb/cache/cache_key.h +2 -0
  6. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +18 -6
  7. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +13 -5
  8. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +89 -0
  9. package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +18 -28
  10. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +147 -2
  11. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +30 -0
  12. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +403 -30
  13. package/deps/rocksdb/rocksdb/db/c.cc +159 -5
  14. package/deps/rocksdb/rocksdb/db/c_test.c +108 -0
  15. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -1
  16. package/deps/rocksdb/rocksdb/db/column_family.h +7 -5
  17. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +22 -0
  18. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +8 -0
  19. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +6 -3
  20. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +15 -0
  21. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +35 -2
  22. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +55 -0
  23. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +21 -19
  24. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +60 -1
  25. package/deps/rocksdb/rocksdb/db/db_flush_test.cc +247 -6
  26. package/deps/rocksdb/rocksdb/db/db_impl/compacted_db_impl.h +10 -0
  27. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -33
  28. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -2
  29. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +7 -15
  30. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +17 -3
  31. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +10 -4
  32. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h +9 -0
  33. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h +8 -0
  34. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +32 -0
  35. package/deps/rocksdb/rocksdb/db/db_inplace_update_test.cc +54 -0
  36. package/deps/rocksdb/rocksdb/db/db_iter.cc +50 -2
  37. package/deps/rocksdb/rocksdb/db/db_iter.h +2 -0
  38. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +56 -25
  39. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -1
  40. package/deps/rocksdb/rocksdb/db/db_test.cc +9 -0
  41. package/deps/rocksdb/rocksdb/db/flush_job.cc +17 -8
  42. package/deps/rocksdb/rocksdb/db/flush_job.h +1 -1
  43. package/deps/rocksdb/rocksdb/db/log_writer.h +1 -1
  44. package/deps/rocksdb/rocksdb/db/memtable.cc +103 -93
  45. package/deps/rocksdb/rocksdb/db/memtable.h +3 -3
  46. package/deps/rocksdb/rocksdb/db/merge_helper.cc +7 -2
  47. package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
  48. package/deps/rocksdb/rocksdb/db/version_set.cc +13 -5
  49. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +5 -0
  50. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +213 -0
  51. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +6 -7
  52. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization_test.cc +16 -0
  53. package/deps/rocksdb/rocksdb/db/write_batch.cc +154 -2
  54. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +3 -0
  55. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +4 -0
  56. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_filter.h +8 -3
  57. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +21 -1
  58. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +31 -4
  59. package/deps/rocksdb/rocksdb/env/env_test.cc +2 -2
  60. package/deps/rocksdb/rocksdb/env/fs_remap.cc +4 -0
  61. package/deps/rocksdb/rocksdb/file/random_access_file_reader.h +1 -1
  62. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +17 -0
  63. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +136 -0
  64. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +6 -0
  65. package/deps/rocksdb/rocksdb/include/rocksdb/env.h +1 -1
  66. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  67. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +23 -23
  68. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +7 -0
  69. package/deps/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h +11 -0
  70. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +14 -0
  71. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch_base.h +5 -0
  72. package/deps/rocksdb/rocksdb/options/cf_options.cc +7 -0
  73. package/deps/rocksdb/rocksdb/options/cf_options.h +19 -0
  74. package/deps/rocksdb/rocksdb/options/db_options.cc +1 -6
  75. package/deps/rocksdb/rocksdb/options/db_options.h +0 -1
  76. package/deps/rocksdb/rocksdb/options/options.cc +4 -1
  77. package/deps/rocksdb/rocksdb/options/options_helper.cc +2 -0
  78. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +1 -0
  79. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -4
  80. package/deps/rocksdb/rocksdb/port/win/env_win.cc +1 -1
  81. package/deps/rocksdb/rocksdb/src.mk +1 -0
  82. package/deps/rocksdb/rocksdb/table/block_based/block.cc +5 -3
  83. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +2 -2
  84. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +16 -9
  85. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +4 -2
  86. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +1 -1
  87. package/deps/rocksdb/rocksdb/table/block_based/filter_block.h +13 -7
  88. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +7 -3
  89. package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +4 -2
  90. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +28 -17
  91. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.h +15 -9
  92. package/deps/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc +32 -16
  93. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +28 -18
  94. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +15 -6
  95. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +16 -7
  96. package/deps/rocksdb/rocksdb/table/block_fetcher.cc +1 -1
  97. package/deps/rocksdb/rocksdb/table/get_context.cc +27 -6
  98. package/deps/rocksdb/rocksdb/table/get_context.h +2 -0
  99. package/deps/rocksdb/rocksdb/table/table_test.cc +5 -5
  100. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +46 -0
  101. package/deps/rocksdb/rocksdb/util/filter_bench.cc +3 -1
  102. package/deps/rocksdb/rocksdb/util/mutexlock.h +1 -1
  103. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +1 -1
  104. package/package.json +1 -1
  105. package/prebuilds/linux-x64/node.napi.node +0 -0
@@ -29,6 +29,8 @@
29
29
  // kTypeRollbackXID varstring
30
30
  // kTypeBeginPersistedPrepareXID
31
31
  // kTypeBeginUnprepareXID
32
+ // kTypeWideColumnEntity varstring varstring
33
+ // kTypeColumnFamilyWideColumnEntity varint32 varstring varstring
32
34
  // kTypeNoop
33
35
  // varstring :=
34
36
  // len: varint32
@@ -36,6 +38,8 @@
36
38
 
37
39
  #include "rocksdb/write_batch.h"
38
40
 
41
+ #include <algorithm>
42
+ #include <limits>
39
43
  #include <map>
40
44
  #include <stack>
41
45
  #include <stdexcept>
@@ -52,6 +56,7 @@
52
56
  #include "db/merge_context.h"
53
57
  #include "db/snapshot_impl.h"
54
58
  #include "db/trim_history_scheduler.h"
59
+ #include "db/wide/wide_column_serialization.h"
55
60
  #include "db/write_batch_internal.h"
56
61
  #include "monitoring/perf_context_imp.h"
57
62
  #include "monitoring/statistics.h"
@@ -82,6 +87,7 @@ enum ContentFlags : uint32_t {
82
87
  HAS_DELETE_RANGE = 1 << 9,
83
88
  HAS_BLOB_INDEX = 1 << 10,
84
89
  HAS_BEGIN_UNPREPARE = 1 << 11,
90
+ HAS_PUT_ENTITY = 1 << 12,
85
91
  };
86
92
 
87
93
  struct BatchContentClassifier : public WriteBatch::Handler {
@@ -92,6 +98,12 @@ struct BatchContentClassifier : public WriteBatch::Handler {
92
98
  return Status::OK();
93
99
  }
94
100
 
101
+ Status PutEntityCF(uint32_t /* column_family_id */, const Slice& /* key */,
102
+ const Slice& /* entity */) override {
103
+ content_flags |= ContentFlags::HAS_PUT_ENTITY;
104
+ return Status::OK();
105
+ }
106
+
95
107
  Status DeleteCF(uint32_t, const Slice&) override {
96
108
  content_flags |= ContentFlags::HAS_DELETE;
97
109
  return Status::OK();
@@ -287,6 +299,10 @@ bool WriteBatch::HasPut() const {
287
299
  return (ComputeContentFlags() & ContentFlags::HAS_PUT) != 0;
288
300
  }
289
301
 
302
+ bool WriteBatch::HasPutEntity() const {
303
+ return (ComputeContentFlags() & ContentFlags::HAS_PUT_ENTITY) != 0;
304
+ }
305
+
290
306
  bool WriteBatch::HasDelete() const {
291
307
  return (ComputeContentFlags() & ContentFlags::HAS_DELETE) != 0;
292
308
  }
@@ -435,6 +451,17 @@ Status ReadRecordFromWriteBatch(Slice* input, char* tag,
435
451
  return Status::Corruption("bad Rollback XID");
436
452
  }
437
453
  break;
454
+ case kTypeColumnFamilyWideColumnEntity:
455
+ if (!GetVarint32(input, column_family)) {
456
+ return Status::Corruption("bad WriteBatch PutEntity");
457
+ }
458
+ FALLTHROUGH_INTENDED;
459
+ case kTypeWideColumnEntity:
460
+ if (!GetLengthPrefixedSlice(input, key) ||
461
+ !GetLengthPrefixedSlice(input, value)) {
462
+ return Status::Corruption("bad WriteBatch PutEntity");
463
+ }
464
+ break;
438
465
  default:
439
466
  return Status::Corruption("unknown WriteBatch tag");
440
467
  }
@@ -462,6 +489,7 @@ Status WriteBatchInternal::Iterate(const WriteBatch* wb,
462
489
  (begin == WriteBatchInternal::kHeader) && (end == wb->rep_.size());
463
490
 
464
491
  Slice key, value, blob, xid;
492
+
465
493
  // Sometimes a sub-batch starts with a Noop. We want to exclude such Noops as
466
494
  // the batch boundary symbols otherwise we would mis-count the number of
467
495
  // batches. We do that by checking whether the accumulated batch is empty
@@ -661,6 +689,16 @@ Status WriteBatchInternal::Iterate(const WriteBatch* wb,
661
689
  assert(s.ok());
662
690
  empty_batch = true;
663
691
  break;
692
+ case kTypeWideColumnEntity:
693
+ case kTypeColumnFamilyWideColumnEntity:
694
+ assert(wb->content_flags_.load(std::memory_order_relaxed) &
695
+ (ContentFlags::DEFERRED | ContentFlags::HAS_PUT_ENTITY));
696
+ s = handler->PutEntityCF(column_family, key, value);
697
+ if (LIKELY(s.ok())) {
698
+ empty_batch = false;
699
+ ++found;
700
+ }
701
+ break;
664
702
  default:
665
703
  return Status::Corruption("unknown WriteBatch tag");
666
704
  }
@@ -891,6 +929,86 @@ Status WriteBatch::Put(ColumnFamilyHandle* column_family, const SliceParts& key,
891
929
  "Cannot call this method on column family enabling timestamp");
892
930
  }
893
931
 
932
+ Status WriteBatchInternal::PutEntity(WriteBatch* b, uint32_t column_family_id,
933
+ const Slice& key,
934
+ const WideColumns& columns) {
935
+ assert(b);
936
+
937
+ if (key.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
938
+ return Status::InvalidArgument("key is too large");
939
+ }
940
+
941
+ WideColumns sorted_columns(columns);
942
+ std::sort(sorted_columns.begin(), sorted_columns.end(),
943
+ [](const WideColumn& lhs, const WideColumn& rhs) {
944
+ return lhs.name().compare(rhs.name()) < 0;
945
+ });
946
+
947
+ std::string entity;
948
+ const Status s = WideColumnSerialization::Serialize(sorted_columns, entity);
949
+ if (!s.ok()) {
950
+ return s;
951
+ }
952
+
953
+ if (entity.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
954
+ return Status::InvalidArgument("wide column entity is too large");
955
+ }
956
+
957
+ LocalSavePoint save(b);
958
+
959
+ WriteBatchInternal::SetCount(b, WriteBatchInternal::Count(b) + 1);
960
+
961
+ if (column_family_id == 0) {
962
+ b->rep_.push_back(static_cast<char>(kTypeWideColumnEntity));
963
+ } else {
964
+ b->rep_.push_back(static_cast<char>(kTypeColumnFamilyWideColumnEntity));
965
+ PutVarint32(&b->rep_, column_family_id);
966
+ }
967
+
968
+ PutLengthPrefixedSlice(&b->rep_, key);
969
+ PutLengthPrefixedSlice(&b->rep_, entity);
970
+
971
+ b->content_flags_.store(b->content_flags_.load(std::memory_order_relaxed) |
972
+ ContentFlags::HAS_PUT_ENTITY,
973
+ std::memory_order_relaxed);
974
+
975
+ if (b->prot_info_ != nullptr) {
976
+ b->prot_info_->entries_.emplace_back(
977
+ ProtectionInfo64()
978
+ .ProtectKVO(key, entity, kTypeWideColumnEntity)
979
+ .ProtectC(column_family_id));
980
+ }
981
+
982
+ return save.commit();
983
+ }
984
+
985
+ Status WriteBatch::PutEntity(ColumnFamilyHandle* column_family,
986
+ const Slice& key, const WideColumns& columns) {
987
+ if (!column_family) {
988
+ return Status::InvalidArgument(
989
+ "Cannot call this method without a column family handle");
990
+ }
991
+
992
+ Status s;
993
+ uint32_t cf_id = 0;
994
+ size_t ts_sz = 0;
995
+
996
+ std::tie(s, cf_id, ts_sz) =
997
+ WriteBatchInternal::GetColumnFamilyIdAndTimestampSize(this,
998
+ column_family);
999
+
1000
+ if (!s.ok()) {
1001
+ return s;
1002
+ }
1003
+
1004
+ if (ts_sz) {
1005
+ return Status::InvalidArgument(
1006
+ "Cannot call this method on column family enabling timestamp");
1007
+ }
1008
+
1009
+ return WriteBatchInternal::PutEntity(this, cf_id, key, columns);
1010
+ }
1011
+
894
1012
  Status WriteBatchInternal::InsertNoop(WriteBatch* b) {
895
1013
  b->rep_.push_back(static_cast<char>(kTypeNoop));
896
1014
  return Status::OK();
@@ -1556,6 +1674,10 @@ Status WriteBatch::VerifyChecksum() const {
1556
1674
  case kTypeCommitXIDAndTimestamp:
1557
1675
  checksum_protected = false;
1558
1676
  break;
1677
+ case kTypeColumnFamilyWideColumnEntity:
1678
+ case kTypeWideColumnEntity:
1679
+ tag = kTypeWideColumnEntity;
1680
+ break;
1559
1681
  default:
1560
1682
  return Status::Corruption(
1561
1683
  "unknown WriteBatch tag",
@@ -1865,11 +1987,13 @@ class MemTableInserter : public WriteBatch::Handler {
1865
1987
  mem->Add(sequence_, value_type, key, value, kv_prot_info,
1866
1988
  concurrent_memtable_writes_, get_post_process_info(mem),
1867
1989
  hint_per_batch_ ? &GetHintMap()[mem] : nullptr);
1868
- } else if (moptions->inplace_callback == nullptr) {
1990
+ } else if (moptions->inplace_callback == nullptr ||
1991
+ value_type != kTypeValue) {
1869
1992
  assert(!concurrent_memtable_writes_);
1870
- ret_status = mem->Update(sequence_, key, value, kv_prot_info);
1993
+ ret_status = mem->Update(sequence_, value_type, key, value, kv_prot_info);
1871
1994
  } else {
1872
1995
  assert(!concurrent_memtable_writes_);
1996
+ assert(value_type == kTypeValue);
1873
1997
  ret_status = mem->UpdateCallback(sequence_, key, value, kv_prot_info);
1874
1998
  if (ret_status.IsNotFound()) {
1875
1999
  // key not found in memtable. Do sst get, update, add
@@ -1994,6 +2118,29 @@ class MemTableInserter : public WriteBatch::Handler {
1994
2118
  return ret_status;
1995
2119
  }
1996
2120
 
2121
+ Status PutEntityCF(uint32_t column_family_id, const Slice& key,
2122
+ const Slice& value) override {
2123
+ const auto* kv_prot_info = NextProtectionInfo();
2124
+
2125
+ Status s;
2126
+ if (kv_prot_info) {
2127
+ // Memtable needs seqno, doesn't need CF ID
2128
+ auto mem_kv_prot_info =
2129
+ kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
2130
+ s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
2131
+ &mem_kv_prot_info);
2132
+ } else {
2133
+ s = PutCFImpl(column_family_id, key, value, kTypeWideColumnEntity,
2134
+ /* kv_prot_info */ nullptr);
2135
+ }
2136
+
2137
+ if (UNLIKELY(s.IsTryAgain())) {
2138
+ DecrementProtectionInfoIdxForTryAgain();
2139
+ }
2140
+
2141
+ return s;
2142
+ }
2143
+
1997
2144
  Status DeleteImpl(uint32_t /*column_family_id*/, const Slice& key,
1998
2145
  const Slice& value, ValueType delete_type,
1999
2146
  const ProtectionInfoKVOS64* kv_prot_info) {
@@ -2780,6 +2927,11 @@ class ProtectionInfoUpdater : public WriteBatch::Handler {
2780
2927
  return UpdateProtInfo(cf, key, val, kTypeValue);
2781
2928
  }
2782
2929
 
2930
+ Status PutEntityCF(uint32_t cf, const Slice& key,
2931
+ const Slice& entity) override {
2932
+ return UpdateProtInfo(cf, key, entity, kTypeWideColumnEntity);
2933
+ }
2934
+
2783
2935
  Status DeleteCF(uint32_t cf, const Slice& key) override {
2784
2936
  return UpdateProtInfo(cf, key, "", kTypeDeletion);
2785
2937
  }
@@ -88,6 +88,9 @@ class WriteBatchInternal {
88
88
  static Status Put(WriteBatch* batch, uint32_t column_family_id,
89
89
  const SliceParts& key, const SliceParts& value);
90
90
 
91
+ static Status PutEntity(WriteBatch* batch, uint32_t column_family_id,
92
+ const Slice& key, const WideColumns& columns);
93
+
91
94
  static Status Delete(WriteBatch* batch, uint32_t column_family_id,
92
95
  const SliceParts& key);
93
96
 
@@ -267,6 +267,10 @@ DECLARE_double(blob_garbage_collection_age_cutoff);
267
267
  DECLARE_double(blob_garbage_collection_force_threshold);
268
268
  DECLARE_uint64(blob_compaction_readahead_size);
269
269
  DECLARE_int32(blob_file_starting_level);
270
+ DECLARE_bool(use_blob_cache);
271
+ DECLARE_bool(use_shared_block_and_blob_cache);
272
+ DECLARE_uint64(blob_cache_size);
273
+ DECLARE_int32(blob_cache_numshardbits);
270
274
 
271
275
  DECLARE_int32(approximate_size_one_in);
272
276
  DECLARE_bool(sync_fault_injection);
@@ -36,9 +36,14 @@ class DbStressCompactionFilter : public CompactionFilter {
36
36
  return Decision::kKeep;
37
37
  }
38
38
  uint64_t key_num = 0;
39
- bool ok = GetIntVal(key.ToString(), &key_num);
40
- assert(ok);
41
- (void)ok;
39
+ {
40
+ Slice ukey_without_ts = key;
41
+ assert(ukey_without_ts.size() >= FLAGS_user_timestamp_size);
42
+ ukey_without_ts.remove_suffix(FLAGS_user_timestamp_size);
43
+ [[maybe_unused]] bool ok =
44
+ GetIntVal(ukey_without_ts.ToString(), &key_num);
45
+ assert(ok);
46
+ }
42
47
  port::Mutex* key_mutex = state_->GetMutexForKey(cf_id_, key_num);
43
48
  if (!key_mutex->TryLock()) {
44
49
  return Decision::kKeep;
@@ -305,7 +305,7 @@ DEFINE_int64(cache_size, 2LL * KB * KB * KB,
305
305
  DEFINE_int32(cache_numshardbits, 6,
306
306
  "Number of shards for the block cache"
307
307
  " is 2 ** cache_numshardbits. Negative means use default settings."
308
- " This is applied only if FLAGS_cache_size is non-negative.");
308
+ " This is applied only if FLAGS_cache_size is greater than 0.");
309
309
 
310
310
  DEFINE_bool(cache_index_and_filter_blocks, false,
311
311
  "True if indexes/filters should be cached in block cache.");
@@ -450,6 +450,26 @@ DEFINE_int32(
450
450
  "[Integrated BlobDB] Enable writing blob files during flushes and "
451
451
  "compactions starting from the specified level.");
452
452
 
453
+ DEFINE_bool(use_blob_cache, false, "[Integrated BlobDB] Enable blob cache.");
454
+
455
+ DEFINE_bool(
456
+ use_shared_block_and_blob_cache, true,
457
+ "[Integrated BlobDB] Use a shared backing cache for both block "
458
+ "cache and blob cache. It only takes effect if use_blob_cache is enabled.");
459
+
460
+ DEFINE_uint64(
461
+ blob_cache_size, 2LL * KB * KB * KB,
462
+ "[Integrated BlobDB] Number of bytes to use as a cache of blobs. It only "
463
+ "takes effect if the block and blob caches are different "
464
+ "(use_shared_block_and_blob_cache = false).");
465
+
466
+ DEFINE_int32(blob_cache_numshardbits, 6,
467
+ "[Integrated BlobDB] Number of shards for the blob cache is 2 ** "
468
+ "blob_cache_numshardbits. Negative means use default settings. "
469
+ "It only takes effect if blob_cache_size is greater than 0, and "
470
+ "the block and blob caches are different "
471
+ "(use_shared_block_and_blob_cache = false).");
472
+
453
473
  static const bool FLAGS_subcompactions_dummy __attribute__((__unused__)) =
454
474
  RegisterFlagValidator(&FLAGS_subcompactions, &ValidateUint32Range);
455
475
 
@@ -193,6 +193,8 @@ bool StressTest::BuildOptionsTable() {
193
193
  {"memtable_huge_page_size", {"0", std::to_string(2 * 1024 * 1024)}},
194
194
  {"max_successive_merges", {"0", "2", "4"}},
195
195
  {"inplace_update_num_locks", {"100", "200", "300"}},
196
+ // TODO: re-enable once internal task T124324915 is fixed.
197
+ // {"experimental_mempurge_threshold", {"0.0", "1.0"}},
196
198
  // TODO(ljin): enable test for this option
197
199
  // {"disable_auto_compactions", {"100", "200", "300"}},
198
200
  {"level0_file_num_compaction_trigger",
@@ -2334,6 +2336,17 @@ void StressTest::Open(SharedState* shared) {
2334
2336
  options_.blob_compaction_readahead_size,
2335
2337
  options_.blob_file_starting_level);
2336
2338
 
2339
+ if (FLAGS_use_blob_cache) {
2340
+ fprintf(stdout,
2341
+ "Integrated BlobDB: blob cache enabled, block and blob caches "
2342
+ "shared: %d, blob cache size %" PRIu64
2343
+ ", blob cache num shard bits: %d\n",
2344
+ FLAGS_use_shared_block_and_blob_cache, FLAGS_blob_cache_size,
2345
+ FLAGS_blob_cache_numshardbits);
2346
+ } else {
2347
+ fprintf(stdout, "Integrated BlobDB: blob cache disabled\n");
2348
+ }
2349
+
2337
2350
  fprintf(stdout, "DB path: [%s]\n", FLAGS_db.c_str());
2338
2351
 
2339
2352
  Status s;
@@ -2684,10 +2697,6 @@ void CheckAndSetOptionsForUserTimestamp(Options& options) {
2684
2697
  exit(1);
2685
2698
  }
2686
2699
  #endif // !ROCKSDB_LITE
2687
- if (FLAGS_enable_compaction_filter) {
2688
- fprintf(stderr, "CompactionFilter not supported with timestamp.\n");
2689
- exit(1);
2690
- }
2691
2700
  if (FLAGS_test_cf_consistency || FLAGS_test_batches_snapshots) {
2692
2701
  fprintf(stderr,
2693
2702
  "Due to per-key ts-seq ordering constraint, only the (default) "
@@ -2886,6 +2895,24 @@ void InitializeOptionsFromFlags(
2886
2895
  options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size;
2887
2896
  options.blob_file_starting_level = FLAGS_blob_file_starting_level;
2888
2897
 
2898
+ if (FLAGS_use_blob_cache) {
2899
+ if (FLAGS_use_shared_block_and_blob_cache) {
2900
+ options.blob_cache = cache;
2901
+ } else {
2902
+ if (FLAGS_blob_cache_size > 0) {
2903
+ LRUCacheOptions co;
2904
+ co.capacity = FLAGS_blob_cache_size;
2905
+ co.num_shard_bits = FLAGS_blob_cache_numshardbits;
2906
+ options.blob_cache = NewLRUCache(co);
2907
+ } else {
2908
+ fprintf(stderr,
2909
+ "Unable to create a standalone blob cache if blob_cache_size "
2910
+ "<= 0.\n");
2911
+ exit(1);
2912
+ }
2913
+ }
2914
+ }
2915
+
2889
2916
  options.wal_compression =
2890
2917
  StringToCompressionType(FLAGS_wal_compression.c_str());
2891
2918
 
@@ -1980,7 +1980,7 @@ TEST_P(EnvPosixTestWithParam, PosixRandomRWFile) {
1980
1980
  // Cannot open non-existing file.
1981
1981
  ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions()));
1982
1982
 
1983
- // Create the file using WriteableFile
1983
+ // Create the file using WritableFile
1984
1984
  {
1985
1985
  std::unique_ptr<WritableFile> wf;
1986
1986
  ASSERT_OK(env_->NewWritableFile(path, &wf, EnvOptions()));
@@ -2109,7 +2109,7 @@ TEST_P(EnvPosixTestWithParam, PosixRandomRWFileRandomized) {
2109
2109
  ASSERT_NOK(env_->NewRandomRWFile(path, &file, EnvOptions()));
2110
2110
  #endif
2111
2111
 
2112
- // Create the file using WriteableFile
2112
+ // Create the file using WritableFile
2113
2113
  {
2114
2114
  std::unique_ptr<WritableFile> wf;
2115
2115
  ASSERT_OK(env_->NewWritableFile(path, &wf, EnvOptions()));
@@ -268,6 +268,10 @@ IOStatus RemapFileSystem::RenameFile(const std::string& src,
268
268
  IODebugContext* dbg) {
269
269
  auto status_and_src_enc_path = EncodePath(src);
270
270
  if (!status_and_src_enc_path.first.ok()) {
271
+ if (status_and_src_enc_path.first.IsNotFound()) {
272
+ const IOStatus& s = status_and_src_enc_path.first;
273
+ status_and_src_enc_path.first = IOStatus::PathNotFound(s.ToString());
274
+ }
271
275
  return status_and_src_enc_path.first;
272
276
  }
273
277
  auto status_and_dest_enc_path = EncodePathWithNewBasename(dest);
@@ -38,7 +38,7 @@ FSReadRequest Align(const FSReadRequest& r, size_t alignment);
38
38
  // Otherwise, do nothing and return false.
39
39
  bool TryMerge(FSReadRequest* dest, const FSReadRequest& src);
40
40
 
41
- // RandomAccessFileReader is a wrapper on top of Env::RandomAccessFile. It is
41
+ // RandomAccessFileReader is a wrapper on top of FSRandomAccessFile. It is
42
42
  // responsible for:
43
43
  // - Handling Buffered and Direct reads appropriately.
44
44
  // - Rate limiting compaction reads.
@@ -345,6 +345,23 @@ struct AdvancedColumnFamilyOptions {
345
345
  // Dynamically changeable through SetOptions() API
346
346
  size_t inplace_update_num_locks = 10000;
347
347
 
348
+ // [experimental]
349
+ // Used to activate or deactive the Mempurge feature (memtable garbage
350
+ // collection). (deactivated by default). At every flush, the total useful
351
+ // payload (total entries minus garbage entries) is estimated as a ratio
352
+ // [useful payload bytes]/[size of a memtable (in bytes)]. This ratio is then
353
+ // compared to this `threshold` value:
354
+ // - if ratio<threshold: the flush is replaced by a mempurge operation
355
+ // - else: a regular flush operation takes place.
356
+ // Threshold values:
357
+ // 0.0: mempurge deactivated (default).
358
+ // 1.0: recommended threshold value.
359
+ // >1.0 : aggressive mempurge.
360
+ // 0 < threshold < 1.0: mempurge triggered only for very low useful payload
361
+ // ratios.
362
+ // [experimental]
363
+ double experimental_mempurge_threshold = 0.0;
364
+
348
365
  // existing_value - pointer to previous value (from both memtable and sst).
349
366
  // nullptr if key doesn't exist
350
367
  // existing_value_size - pointer to size of existing_value).
@@ -110,6 +110,10 @@ typedef struct rocksdb_writeoptions_t rocksdb_writeoptions_t;
110
110
  typedef struct rocksdb_universal_compaction_options_t rocksdb_universal_compaction_options_t;
111
111
  typedef struct rocksdb_livefiles_t rocksdb_livefiles_t;
112
112
  typedef struct rocksdb_column_family_handle_t rocksdb_column_family_handle_t;
113
+ typedef struct rocksdb_column_family_metadata_t
114
+ rocksdb_column_family_metadata_t;
115
+ typedef struct rocksdb_level_metadata_t rocksdb_level_metadata_t;
116
+ typedef struct rocksdb_sst_file_metadata_t rocksdb_sst_file_metadata_t;
113
117
  typedef struct rocksdb_envoptions_t rocksdb_envoptions_t;
114
118
  typedef struct rocksdb_ingestexternalfileoptions_t rocksdb_ingestexternalfileoptions_t;
115
119
  typedef struct rocksdb_sstfilewriter_t rocksdb_sstfilewriter_t;
@@ -626,6 +630,15 @@ extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_cf(
626
630
  const char* start_key, size_t start_key_len, const char* limit_key,
627
631
  size_t limit_key_len);
628
632
 
633
+ extern ROCKSDB_LIBRARY_API void rocksdb_suggest_compact_range(
634
+ rocksdb_t* db, const char* start_key, size_t start_key_len,
635
+ const char* limit_key, size_t limit_key_len, char** errptr);
636
+
637
+ extern ROCKSDB_LIBRARY_API void rocksdb_suggest_compact_range_cf(
638
+ rocksdb_t* db, rocksdb_column_family_handle_t* column_family,
639
+ const char* start_key, size_t start_key_len, const char* limit_key,
640
+ size_t limit_key_len, char** errptr);
641
+
629
642
  extern ROCKSDB_LIBRARY_API void rocksdb_compact_range_opt(
630
643
  rocksdb_t* db, rocksdb_compactoptions_t* opt, const char* start_key,
631
644
  size_t start_key_len, const char* limit_key, size_t limit_key_len);
@@ -1498,6 +1511,11 @@ extern ROCKSDB_LIBRARY_API void rocksdb_options_set_report_bg_io_stats(
1498
1511
  extern ROCKSDB_LIBRARY_API unsigned char rocksdb_options_get_report_bg_io_stats(
1499
1512
  rocksdb_options_t*);
1500
1513
 
1514
+ extern ROCKSDB_LIBRARY_API void
1515
+ rocksdb_options_set_experimental_mempurge_threshold(rocksdb_options_t*, double);
1516
+ extern ROCKSDB_LIBRARY_API double
1517
+ rocksdb_options_get_experimental_mempurge_threshold(rocksdb_options_t*);
1518
+
1501
1519
  enum {
1502
1520
  rocksdb_tolerate_corrupted_tail_records_recovery = 0,
1503
1521
  rocksdb_absolute_consistency_recovery = 1,
@@ -2165,6 +2183,124 @@ extern ROCKSDB_LIBRARY_API void rocksdb_delete_file_in_range_cf(
2165
2183
  const char* start_key, size_t start_key_len, const char* limit_key,
2166
2184
  size_t limit_key_len, char** errptr);
2167
2185
 
2186
+ /* MetaData */
2187
+
2188
+ extern ROCKSDB_LIBRARY_API rocksdb_column_family_metadata_t*
2189
+ rocksdb_get_column_family_metadata(rocksdb_t* db);
2190
+
2191
+ /**
2192
+ * Returns the rocksdb_column_family_metadata_t of the specified
2193
+ * column family.
2194
+ *
2195
+ * Note that the caller is responsible to release the returned memory
2196
+ * using rocksdb_column_family_metadata_destroy.
2197
+ */
2198
+ extern ROCKSDB_LIBRARY_API rocksdb_column_family_metadata_t*
2199
+ rocksdb_get_column_family_metadata_cf(
2200
+ rocksdb_t* db, rocksdb_column_family_handle_t* column_family);
2201
+
2202
+ extern ROCKSDB_LIBRARY_API void rocksdb_column_family_metadata_destroy(
2203
+ rocksdb_column_family_metadata_t* cf_meta);
2204
+
2205
+ extern ROCKSDB_LIBRARY_API uint64_t rocksdb_column_family_metadata_get_size(
2206
+ rocksdb_column_family_metadata_t* cf_meta);
2207
+
2208
+ extern ROCKSDB_LIBRARY_API size_t rocksdb_column_family_metadata_get_file_count(
2209
+ rocksdb_column_family_metadata_t* cf_meta);
2210
+
2211
+ extern ROCKSDB_LIBRARY_API char* rocksdb_column_family_metadata_get_name(
2212
+ rocksdb_column_family_metadata_t* cf_meta);
2213
+
2214
+ extern ROCKSDB_LIBRARY_API size_t
2215
+ rocksdb_column_family_metadata_get_level_count(
2216
+ rocksdb_column_family_metadata_t* cf_meta);
2217
+
2218
+ /**
2219
+ * Returns the rocksdb_level_metadata_t of the ith level from the specified
2220
+ * column family metadata.
2221
+ *
2222
+ * If the specified i is greater than or equal to the number of levels
2223
+ * in the specified column family, then NULL will be returned.
2224
+ *
2225
+ * Note that the caller is responsible to release the returned memory
2226
+ * using rocksdb_level_metadata_destroy before releasing its parent
2227
+ * rocksdb_column_family_metadata_t.
2228
+ */
2229
+ extern ROCKSDB_LIBRARY_API rocksdb_level_metadata_t*
2230
+ rocksdb_column_family_metadata_get_level_metadata(
2231
+ rocksdb_column_family_metadata_t* cf_meta, size_t i);
2232
+
2233
+ /**
2234
+ * Releases the specified rocksdb_level_metadata_t.
2235
+ *
2236
+ * Note that the specified rocksdb_level_metadata_t must be released
2237
+ * before the release of its parent rocksdb_column_family_metadata_t.
2238
+ */
2239
+ extern ROCKSDB_LIBRARY_API void rocksdb_level_metadata_destroy(
2240
+ rocksdb_level_metadata_t* level_meta);
2241
+
2242
+ extern ROCKSDB_LIBRARY_API int rocksdb_level_metadata_get_level(
2243
+ rocksdb_level_metadata_t* level_meta);
2244
+
2245
+ extern ROCKSDB_LIBRARY_API uint64_t
2246
+ rocksdb_level_metadata_get_size(rocksdb_level_metadata_t* level_meta);
2247
+
2248
+ extern ROCKSDB_LIBRARY_API size_t
2249
+ rocksdb_level_metadata_get_file_count(rocksdb_level_metadata_t* level_meta);
2250
+
2251
+ /**
2252
+ * Returns the sst_file_metadata_t of the ith file from the specified level
2253
+ * metadata.
2254
+ *
2255
+ * If the specified i is greater than or equal to the number of files
2256
+ * in the specified level, then NULL will be returned.
2257
+ *
2258
+ * Note that the caller is responsible to release the returned memory
2259
+ * using rocksdb_sst_file_metadata_destroy before releasing its
2260
+ * parent rocksdb_level_metadata_t.
2261
+ */
2262
+ extern ROCKSDB_LIBRARY_API rocksdb_sst_file_metadata_t*
2263
+ rocksdb_level_metadata_get_sst_file_metadata(
2264
+ rocksdb_level_metadata_t* level_meta, size_t i);
2265
+
2266
+ /**
2267
+ * Releases the specified rocksdb_sst_file_metadata_t.
2268
+ *
2269
+ * Note that the specified rocksdb_sst_file_metadata_t must be released
2270
+ * before the release of its parent rocksdb_level_metadata_t.
2271
+ */
2272
+ extern ROCKSDB_LIBRARY_API void rocksdb_sst_file_metadata_destroy(
2273
+ rocksdb_sst_file_metadata_t* file_meta);
2274
+
2275
+ extern ROCKSDB_LIBRARY_API char*
2276
+ rocksdb_sst_file_metadata_get_relative_filename(
2277
+ rocksdb_sst_file_metadata_t* file_meta);
2278
+
2279
+ extern ROCKSDB_LIBRARY_API uint64_t
2280
+ rocksdb_sst_file_metadata_get_size(rocksdb_sst_file_metadata_t* file_meta);
2281
+
2282
+ /**
2283
+ * Returns the smallest key of the specified sst file.
2284
+ * The caller is responsible for releasing the returned memory.
2285
+ *
2286
+ * @param file_meta the metadata of an SST file to obtain its smallest key.
2287
+ * @param len the out value which will contain the length of the returned key
2288
+ * after the function call.
2289
+ */
2290
+ extern ROCKSDB_LIBRARY_API char* rocksdb_sst_file_metadata_get_smallestkey(
2291
+ rocksdb_sst_file_metadata_t* file_meta, size_t* len);
2292
+
2293
+ /**
2294
+ * Returns the smallest key of the specified sst file.
2295
+ * The caller is responsible for releasing the returned memory.
2296
+ *
2297
+ * @param file_meta the metadata of an SST file to obtain its smallest key.
2298
+ * @param len the out value which will contain the length of the returned key
2299
+ * after the function call.
2300
+ */
2301
+ extern ROCKSDB_LIBRARY_API char* rocksdb_sst_file_metadata_get_largestkey(
2302
+ rocksdb_sst_file_metadata_t* file_meta, size_t* len);
2303
+
2168
2304
  /* Transactions */
2169
2305
 
2170
2306
  extern ROCKSDB_LIBRARY_API rocksdb_column_family_handle_t*
@@ -27,6 +27,7 @@
27
27
  #include "rocksdb/transaction_log.h"
28
28
  #include "rocksdb/types.h"
29
29
  #include "rocksdb/version.h"
30
+ #include "rocksdb/wide_columns.h"
30
31
 
31
32
  #ifdef _WIN32
32
33
  // Windows API macro interference
@@ -406,6 +407,11 @@ class DB {
406
407
  return Put(options, DefaultColumnFamily(), key, ts, value);
407
408
  }
408
409
 
410
+ // UNDER CONSTRUCTION -- DO NOT USE
411
+ virtual Status PutEntity(const WriteOptions& options,
412
+ ColumnFamilyHandle* column_family, const Slice& key,
413
+ const WideColumns& columns) = 0;
414
+
409
415
  // Remove the database entry (if any) for "key". Returns OK on
410
416
  // success, and a non-OK status on error. It is not an error if "key"
411
417
  // did not exist in the database.
@@ -876,7 +876,7 @@ class WritableFile {
876
876
  virtual ~WritableFile();
877
877
 
878
878
  // Append data to the end of the file
879
- // Note: A WriteableFile object must support either Append or
879
+ // Note: A WritableFile object must support either Append or
880
880
  // PositionedAppend, so the users cannot mix the two.
881
881
  virtual Status Append(const Slice& data) = 0;
882
882
 
@@ -951,7 +951,7 @@ class FSWritableFile {
951
951
  virtual ~FSWritableFile() {}
952
952
 
953
953
  // Append data to the end of the file
954
- // Note: A WriteableFile object must support either Append or
954
+ // Note: A WritableFile object must support either Append or
955
955
  // PositionedAppend, so the users cannot mix the two.
956
956
  virtual IOStatus Append(const Slice& data, const IOOptions& options,
957
957
  IODebugContext* dbg) = 0;