@nxtedition/rocksdb 7.0.0-alpha.8 → 7.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/binding.cc +44 -46
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -1
  3. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +3 -1
  4. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +28 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +5 -2
  8. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +48 -60
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +18 -20
  10. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
  11. package/deps/rocksdb/rocksdb/db/c.cc +5 -0
  12. package/deps/rocksdb/rocksdb/db/column_family.cc +20 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +44 -26
  15. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +32 -14
  16. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -44
  17. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +3 -1
  18. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -1
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -5
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +47 -35
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -1
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +54 -32
  23. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +426 -61
  24. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -0
  25. package/deps/rocksdb/rocksdb/db/db_test.cc +102 -24
  26. package/deps/rocksdb/rocksdb/db/db_test2.cc +159 -30
  27. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +1 -1
  29. package/deps/rocksdb/rocksdb/db/version_builder.cc +39 -10
  30. package/deps/rocksdb/rocksdb/db/version_builder.h +4 -1
  31. package/deps/rocksdb/rocksdb/db/version_edit.h +20 -0
  32. package/deps/rocksdb/rocksdb/db/version_set.cc +2 -1
  33. package/deps/rocksdb/rocksdb/db/version_set.h +17 -2
  34. package/deps/rocksdb/rocksdb/db/version_set_test.cc +119 -0
  35. package/deps/rocksdb/rocksdb/db/write_batch.cc +96 -0
  36. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -0
  37. package/deps/rocksdb/rocksdb/db/write_thread.cc +1 -0
  38. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -2
  41. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -0
  42. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +12 -0
  43. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +1 -1
  44. package/deps/rocksdb/rocksdb/env/fs_posix.cc +96 -6
  45. package/deps/rocksdb/rocksdb/env/io_posix.cc +51 -18
  46. package/deps/rocksdb/rocksdb/env/io_posix.h +2 -0
  47. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +12 -5
  48. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +22 -6
  49. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +99 -8
  50. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +9 -1
  51. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +3 -0
  52. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -0
  53. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +4 -0
  54. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  55. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +7 -0
  56. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +11 -1
  57. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +4 -1
  58. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +14 -1
  59. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +6 -0
  60. package/deps/rocksdb/rocksdb/options/cf_options.cc +12 -1
  61. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  62. package/deps/rocksdb/rocksdb/options/options.cc +8 -1
  63. package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
  64. package/deps/rocksdb/rocksdb/options/options_parser.cc +2 -1
  65. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +7 -2
  66. package/deps/rocksdb/rocksdb/options/options_test.cc +52 -0
  67. package/deps/rocksdb/rocksdb/port/port_posix.h +10 -1
  68. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -1
  69. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +1 -1
  70. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  71. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -5
  72. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -10
  73. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +1 -1
  74. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +1 -1
  75. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  76. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  77. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +39 -12
  78. package/deps/rocksdb/rocksdb/util/comparator.cc +10 -0
  79. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1 -1
  80. package/deps/rocksdb/rocksdb/util/xxhash.h +2 -1
  81. package/index.js +4 -1
  82. package/package.json +1 -1
  83. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  84. package/prebuilds/darwin-x64/node.napi.node +0 -0
  85. package/prebuilds/linux-x64/node.napi.node +0 -0
package/binding.cc CHANGED
@@ -40,13 +40,13 @@ struct Updates;
40
40
  } \
41
41
  }
42
42
 
43
- #define ROCKS_STATUS_THROWS(call) \
44
- { \
45
- const auto status = (call); \
46
- if (!status.ok()) { \
43
+ #define ROCKS_STATUS_THROWS(call) \
44
+ { \
45
+ const auto status = (call); \
46
+ if (!status.ok()) { \
47
47
  napi_throw(env, ToError(env, status)); \
48
- return NULL; \
49
- } \
48
+ return NULL; \
49
+ } \
50
50
  }
51
51
 
52
52
  static bool IsString(napi_env env, napi_value value) {
@@ -224,7 +224,7 @@ static void Finalize(napi_env env, void* data, void* hint) {
224
224
  }
225
225
  }
226
226
 
227
- napi_status Convert(napi_env env, rocksdb::PinnableSlice&& s, bool asBuffer, napi_value& result) {
227
+ napi_status Convert(napi_env env, rocksdb::PinnableSlice& s, bool asBuffer, napi_value& result) {
228
228
  if (asBuffer) {
229
229
  auto ptr = new rocksdb::PinnableSlice(std::move(s));
230
230
  return napi_create_external_buffer(env, ptr->size(), const_cast<char*>(ptr->data()),
@@ -234,7 +234,7 @@ napi_status Convert(napi_env env, rocksdb::PinnableSlice&& s, bool asBuffer, nap
234
234
  }
235
235
  }
236
236
 
237
- napi_status Convert(napi_env env, std::optional<std::string>&& s, bool asBuffer, napi_value& result) {
237
+ napi_status Convert(napi_env env, std::optional<std::string>& s, bool asBuffer, napi_value& result) {
238
238
  if (!s) {
239
239
  return napi_get_null(env, &result);
240
240
  } else if (asBuffer) {
@@ -526,7 +526,7 @@ struct Iterator final : public BaseIterator {
526
526
  const bool fillCache,
527
527
  const bool keyAsBuffer,
528
528
  const bool valueAsBuffer,
529
- const uint32_t highWaterMarkBytes,
529
+ const int32_t highWaterMarkBytes,
530
530
  std::shared_ptr<const rocksdb::Snapshot> snapshot)
531
531
  : BaseIterator(database, column, reverse, lt, lte, gt, gte, limit, fillCache, snapshot),
532
532
  keys_(keys),
@@ -551,7 +551,7 @@ struct Iterator final : public BaseIterator {
551
551
  const bool values_;
552
552
  const bool keyAsBuffer_;
553
553
  const bool valueAsBuffer_;
554
- const uint32_t highWaterMarkBytes_;
554
+ const int32_t highWaterMarkBytes_;
555
555
  bool first_ = true;
556
556
 
557
557
  private:
@@ -663,17 +663,17 @@ struct OpenWorker final : public Worker {
663
663
  rocksdb::Status Execute(Database& database) override {
664
664
  rocksdb::DB* db = nullptr;
665
665
  const auto status = column_families_.empty()
666
- ? rocksdb::DB::Open(options_, location_, &db)
667
- : rocksdb::DB::Open(options_, location_, column_families_, &database.columns_, &db);
666
+ ? rocksdb::DB::Open(options_, location_, &db)
667
+ : rocksdb::DB::Open(options_, location_, column_families_, &database.columns_, &db);
668
668
  database.db_.reset(db);
669
669
  return status;
670
670
  }
671
671
 
672
- napi_status OnOk(napi_env env, napi_value callback) override {
672
+ napi_status OnOk(napi_env env, napi_value callback) override {
673
673
  const auto size = database_->columns_.size();
674
674
  napi_value result;
675
675
  NAPI_STATUS_RETURN(napi_create_object(env, &result));
676
-
676
+
677
677
  for (size_t n = 0; n < size; ++n) {
678
678
  napi_value column;
679
679
  NAPI_STATUS_RETURN(napi_create_external(env, database_->columns_[n], nullptr, nullptr, &column));
@@ -691,7 +691,8 @@ struct OpenWorker final : public Worker {
691
691
  std::vector<rocksdb::ColumnFamilyDescriptor> column_families_;
692
692
  };
693
693
 
694
- napi_status InitOptions(napi_env env, auto& columnOptions, auto options) {
694
+ template <typename T, typename U>
695
+ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
695
696
  const auto memtable_memory_budget = Uint32Property(env, options, "memtableMemoryBudget").value_or(256 * 1024 * 1024);
696
697
 
697
698
  const auto compaction = StringProperty(env, options, "compaction").value_or("level");
@@ -786,16 +787,16 @@ NAPI_METHOD(db_open) {
786
787
 
787
788
  rocksdb::Options dbOptions;
788
789
 
789
- dbOptions.IncreaseParallelism(
790
- Uint32Property(env, options, "parallelism").value_or(std::thread::hardware_concurrency() / 2));
790
+ dbOptions.IncreaseParallelism(Uint32Property(env, options, "parallelism")
791
+ .value_or(std::max<uint32_t>(1, std::thread::hardware_concurrency() / 2)));
791
792
 
792
793
  dbOptions.create_if_missing = BooleanProperty(env, options, "createIfMissing").value_or(true);
793
794
  dbOptions.error_if_exists = BooleanProperty(env, options, "errorIfExists").value_or(false);
794
- dbOptions.avoid_unnecessary_blocking_io = BooleanProperty(env, options, "avoidUnnecessaryBlockingIO").value_or(true);
795
- dbOptions.use_adaptive_mutex = BooleanProperty(env, options, "useAdaptiveMutex").value_or(true);
796
- dbOptions.enable_pipelined_write = BooleanProperty(env, options, "enablePipelinedWrite").value_or(true);
797
- dbOptions.max_background_jobs =
798
- Uint32Property(env, options, "maxBackgroundJobs").value_or(std::thread::hardware_concurrency() / 4);
795
+ dbOptions.avoid_unnecessary_blocking_io = true;
796
+ dbOptions.use_adaptive_mutex = true;
797
+ dbOptions.enable_pipelined_write = false;
798
+ dbOptions.max_background_jobs = Uint32Property(env, options, "maxBackgroundJobs")
799
+ .value_or(std::max<uint32_t>(2, std::thread::hardware_concurrency() / 8));
799
800
  dbOptions.WAL_ttl_seconds = Uint32Property(env, options, "walTTL").value_or(0) / 1e3;
800
801
  dbOptions.WAL_size_limit_MB = Uint32Property(env, options, "walSizeLimit").value_or(0) / 1e6;
801
802
  dbOptions.create_missing_column_families = true;
@@ -831,7 +832,7 @@ NAPI_METHOD(db_open) {
831
832
  NAPI_STATUS_THROWS(InitOptions(env, dbOptions, options));
832
833
 
833
834
  std::vector<rocksdb::ColumnFamilyDescriptor> column_families;
834
-
835
+
835
836
  if (HasProperty(env, options, "columns")) {
836
837
  napi_value columns;
837
838
  NAPI_STATUS_THROWS(napi_get_named_property(env, options, "columns", &columns));
@@ -845,7 +846,7 @@ NAPI_METHOD(db_open) {
845
846
  for (uint32_t n = 0; n < len; ++n) {
846
847
  napi_value key;
847
848
  NAPI_STATUS_THROWS(napi_get_element(env, keys, n, &key));
848
-
849
+
849
850
  napi_value column;
850
851
  NAPI_STATUS_THROWS(napi_get_property(env, columns, key, &column));
851
852
 
@@ -866,12 +867,12 @@ struct CloseWorker final : public Worker {
866
867
  CloseWorker(napi_env env, Database* database, napi_value callback)
867
868
  : Worker(env, database, callback, "leveldown.db.close") {}
868
869
 
869
- rocksdb::Status Execute(Database& database) override {
870
+ rocksdb::Status Execute(Database& database) override {
870
871
  for (auto it : database.columns_) {
871
872
  database.db_->DestroyColumnFamilyHandle(it);
872
873
  }
873
874
 
874
- return database.db_->Close();
875
+ return database.db_->Close();
875
876
  }
876
877
  };
877
878
 
@@ -943,8 +944,8 @@ struct UpdateNextWorker final : public rocksdb::WriteBatch::Handler, public Work
943
944
  napi_value key;
944
945
  napi_value val;
945
946
 
946
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[idx + 0]), updates_->keyAsBuffer_, key));
947
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[idx + 1]), updates_->valueAsBuffer_, val));
947
+ NAPI_STATUS_RETURN(Convert(env, cache_[idx + 0], updates_->keyAsBuffer_, key));
948
+ NAPI_STATUS_RETURN(Convert(env, cache_[idx + 1], updates_->valueAsBuffer_, val));
948
949
 
949
950
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(idx + 0), key));
950
951
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(idx + 1), val));
@@ -1084,7 +1085,7 @@ struct GetWorker final : public Worker {
1084
1085
  napi_status OnOk(napi_env env, napi_value callback) override {
1085
1086
  napi_value argv[2];
1086
1087
  NAPI_STATUS_RETURN(napi_get_null(env, &argv[0]));
1087
- NAPI_STATUS_RETURN(Convert(env, std::move(value_), asBuffer_, argv[1]));
1088
+ NAPI_STATUS_RETURN(Convert(env, value_, asBuffer_, argv[1]));
1088
1089
  return CallFunction(env, callback, 2, argv);
1089
1090
  }
1090
1091
 
@@ -1144,23 +1145,23 @@ struct GetManyWorker final : public Worker {
1144
1145
  readOptions.snapshot = snapshot_.get();
1145
1146
  readOptions.async_io = true;
1146
1147
 
1147
- const auto numKeys = keys_.size();
1148
-
1149
1148
  std::vector<rocksdb::Slice> keys;
1150
1149
  keys.reserve(keys_.size());
1151
1150
  for (const auto& key : keys_) {
1152
1151
  keys.emplace_back(key);
1153
1152
  }
1154
1153
 
1155
- statuses_.resize(numKeys);
1156
- values_.resize(numKeys);
1154
+ statuses_.resize(keys.size());
1155
+ values_.resize(keys.size());
1157
1156
 
1158
- database.db_->MultiGet(readOptions, column_, numKeys, keys.data(), values_.data(), statuses_.data());
1157
+ // database.db_->MultiGet(readOptions, column_, keys.size(), keys.data(), values_.data(), statuses_.data());
1159
1158
 
1160
- keys_.clear();
1161
- snapshot_ = nullptr;
1159
+ // TODO (fix): Use MultiGet once https://github.com/facebook/rocksdb/issues/10186 is resolved.
1160
+ for (auto n = 0; n < keys.size(); ++n) {
1161
+ statuses_[n] = database.db_->Get(readOptions, column_, keys[n], &values_[n]);
1162
+ }
1162
1163
 
1163
- for (auto status : statuses_) {
1164
+ for (const auto& status : statuses_) {
1164
1165
  if (!status.ok() && !status.IsNotFound()) {
1165
1166
  return status;
1166
1167
  }
@@ -1178,16 +1179,13 @@ struct GetManyWorker final : public Worker {
1178
1179
  for (size_t idx = 0; idx < size; idx++) {
1179
1180
  napi_value element;
1180
1181
  if (statuses_[idx].ok()) {
1181
- NAPI_STATUS_RETURN(Convert(env, std::move(values_[idx]), valueAsBuffer_, element));
1182
+ NAPI_STATUS_RETURN(Convert(env, values_[idx], valueAsBuffer_, element));
1182
1183
  } else {
1183
1184
  NAPI_STATUS_RETURN(napi_get_undefined(env, &element));
1184
1185
  }
1185
1186
  NAPI_STATUS_RETURN(napi_set_element(env, array, static_cast<uint32_t>(idx), element));
1186
1187
  }
1187
1188
 
1188
- values_.clear();
1189
- statuses_.clear();
1190
-
1191
1189
  napi_value argv[2];
1192
1190
  NAPI_STATUS_RETURN(napi_get_null(env, &argv[0]));
1193
1191
  argv[1] = array;
@@ -1375,7 +1373,7 @@ NAPI_METHOD(iterator_init) {
1375
1373
  const bool keyAsBuffer = EncodingIsBuffer(env, options, "keyEncoding");
1376
1374
  const bool valueAsBuffer = EncodingIsBuffer(env, options, "valueEncoding");
1377
1375
  const auto limit = Int32Property(env, options, "limit").value_or(-1);
1378
- const auto highWaterMarkBytes = Uint32Property(env, options, "highWaterMarkBytes").value_or(16 * 1024);
1376
+ const auto highWaterMarkBytes = Int32Property(env, options, "highWaterMarkBytes").value_or(16 * 1024);
1379
1377
 
1380
1378
  const auto lt = StringProperty(env, options, "lt");
1381
1379
  const auto lte = StringProperty(env, options, "lte");
@@ -1480,7 +1478,8 @@ struct NextWorker final : public Worker {
1480
1478
  cache_.push_back(v.ToString());
1481
1479
  }
1482
1480
 
1483
- if ((iterator_->highWaterMarkBytes_ != -1 && bytesRead > iterator_->highWaterMarkBytes_) || cache_.size() / 2 >= size_) {
1481
+ if ((iterator_->highWaterMarkBytes_ != -1 && bytesRead > static_cast<size_t>(iterator_->highWaterMarkBytes_)) ||
1482
+ cache_.size() / 2 >= size_) {
1484
1483
  finished_ = false;
1485
1484
  return rocksdb::Status::OK();
1486
1485
  }
@@ -1500,8 +1499,8 @@ struct NextWorker final : public Worker {
1500
1499
  napi_value key;
1501
1500
  napi_value val;
1502
1501
 
1503
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[n + 0]), iterator_->keyAsBuffer_, key));
1504
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[n + 1]), iterator_->valueAsBuffer_, val));
1502
+ NAPI_STATUS_RETURN(Convert(env, cache_[n + 0], iterator_->keyAsBuffer_, key));
1503
+ NAPI_STATUS_RETURN(Convert(env, cache_[n + 1], iterator_->valueAsBuffer_, val));
1505
1504
 
1506
1505
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(n + 0), key));
1507
1506
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(n + 1), val));
@@ -1547,7 +1546,6 @@ NAPI_METHOD(batch_do) {
1547
1546
  NAPI_STATUS_THROWS(napi_get_value_external(env, argv[0], (void**)&database));
1548
1547
 
1549
1548
  const auto operations = argv[1];
1550
- const auto options = argv[2];
1551
1549
 
1552
1550
  rocksdb::WriteBatch batch;
1553
1551
 
@@ -194,7 +194,7 @@ else()
194
194
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wstrict-prototypes")
195
195
  endif()
196
196
  if(MINGW)
197
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -fno-asynchronous-unwind-tables")
197
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format")
198
198
  add_definitions(-D_POSIX_C_SOURCE=1)
199
199
  endif()
200
200
  if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -287,7 +287,9 @@ class CacheBench {
287
287
  exit(1);
288
288
  }
289
289
  } else if (FLAGS_cache_type == "fast_lru_cache") {
290
- cache_ = NewFastLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits);
290
+ cache_ = NewFastLRUCache(
291
+ FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
292
+ false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
291
293
  } else if (FLAGS_cache_type == "lru_cache") {
292
294
  LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false, 0.5);
293
295
  #ifndef ROCKSDB_LITE
@@ -22,6 +22,7 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
22
22
  "CompressionDictionaryBuildingBuffer",
23
23
  "FilterConstruction",
24
24
  "BlockBasedTableReader",
25
+ "FileMetadata",
25
26
  "Misc",
26
27
  }};
27
28
 
@@ -36,6 +37,7 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
36
37
  "compression-dictionary-building-buffer",
37
38
  "filter-construction",
38
39
  "block-based-table-reader",
40
+ "file-metadata",
39
41
  "misc",
40
42
  }};
41
43
 
@@ -180,4 +180,5 @@ template class CacheReservationManagerImpl<
180
180
  template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
181
181
  template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
182
182
  template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
183
+ template class CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>;
183
184
  } // namespace ROCKSDB_NAMESPACE
@@ -36,6 +36,12 @@ class CacheReservationManager {
36
36
  };
37
37
  virtual ~CacheReservationManager() {}
38
38
  virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
39
+ // TODO(hx235): replace the usage of
40
+ // `UpdateCacheReservation(memory_used_delta, increase)` with
41
+ // `UpdateCacheReservation(new_memory_used)` so that we only have one
42
+ // `UpdateCacheReservation` function
43
+ virtual Status UpdateCacheReservation(std::size_t memory_used_delta,
44
+ bool increase) = 0;
39
45
  virtual Status MakeCacheReservation(
40
46
  std::size_t incremental_memory_used,
41
47
  std::unique_ptr<CacheReservationManager::CacheReservationHandle>
@@ -128,6 +134,11 @@ class CacheReservationManagerImpl
128
134
  // On keeping dummy entries the same, it always returns Status::OK().
129
135
  Status UpdateCacheReservation(std::size_t new_memory_used) override;
130
136
 
137
+ Status UpdateCacheReservation(std::size_t /* memory_used_delta */,
138
+ bool /* increase */) override {
139
+ return Status::NotSupported();
140
+ }
141
+
131
142
  // One of the two ways of reserving cache space and releasing is done through
132
143
  // destruction of CacheReservationHandle.
133
144
  // See UpdateCacheReservation() for the other way.
@@ -254,6 +265,23 @@ class ConcurrentCacheReservationManager
254
265
  std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
255
266
  return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
256
267
  }
268
+
269
+ inline Status UpdateCacheReservation(std::size_t memory_used_delta,
270
+ bool increase) override {
271
+ std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
272
+ std::size_t total_mem_used = cache_res_mgr_->GetTotalMemoryUsed();
273
+ Status s;
274
+ if (!increase) {
275
+ assert(total_mem_used >= memory_used_delta);
276
+ s = cache_res_mgr_->UpdateCacheReservation(total_mem_used -
277
+ memory_used_delta);
278
+ } else {
279
+ s = cache_res_mgr_->UpdateCacheReservation(total_mem_used +
280
+ memory_used_delta);
281
+ }
282
+ return s;
283
+ }
284
+
257
285
  inline Status MakeCacheReservation(
258
286
  std::size_t incremental_memory_used,
259
287
  std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
@@ -114,7 +114,9 @@ class CacheTest : public testing::TestWithParam<std::string> {
114
114
  return NewClockCache(capacity);
115
115
  }
116
116
  if (type == kFast) {
117
- return NewFastLRUCache(capacity);
117
+ return NewFastLRUCache(
118
+ capacity, 1 /*estimated_value_size*/, -1 /*num_shard_bits*/,
119
+ false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
118
120
  }
119
121
  return nullptr;
120
122
  }
@@ -137,7 +139,8 @@ class CacheTest : public testing::TestWithParam<std::string> {
137
139
  charge_policy);
138
140
  }
139
141
  if (type == kFast) {
140
- return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
142
+ return NewFastLRUCache(capacity, 1 /*estimated_value_size*/,
143
+ num_shard_bits, strict_capacity_limit,
141
144
  charge_policy);
142
145
  }
143
146
  return nullptr;
@@ -18,15 +18,17 @@
18
18
  #include "port/lang.h"
19
19
  #include "util/mutexlock.h"
20
20
 
21
+ #define KEY_LENGTH \
22
+ 16 // TODO(guido) Make use of this symbol in other parts of the source code
23
+ // (e.g., cache_key.h, cache_test.cc, etc.)
24
+
21
25
  namespace ROCKSDB_NAMESPACE {
22
26
 
23
27
  namespace fast_lru_cache {
24
28
 
25
- LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
26
- : length_bits_(/* historical starting size*/ 4),
27
- list_(new LRUHandle* [size_t{1} << length_bits_] {}),
28
- elems_(0),
29
- max_length_bits_(max_upper_hash_bits) {}
29
+ LRUHandleTable::LRUHandleTable(int hash_bits)
30
+ : length_bits_(hash_bits),
31
+ list_(new LRUHandle* [size_t{1} << length_bits_] {}) {}
30
32
 
31
33
  LRUHandleTable::~LRUHandleTable() {
32
34
  ApplyToEntriesRange(
@@ -42,19 +44,15 @@ LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
42
44
  return *FindPointer(key, hash);
43
45
  }
44
46
 
47
+ inline LRUHandle** LRUHandleTable::Head(uint32_t hash) {
48
+ return &list_[hash >> (32 - length_bits_)];
49
+ }
50
+
45
51
  LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
46
52
  LRUHandle** ptr = FindPointer(h->key(), h->hash);
47
53
  LRUHandle* old = *ptr;
48
54
  h->next_hash = (old == nullptr ? nullptr : old->next_hash);
49
55
  *ptr = h;
50
- if (old == nullptr) {
51
- ++elems_;
52
- if ((elems_ >> length_bits_) > 0) { // elems_ >= length
53
- // Since each cache entry is fairly large, we aim for a small
54
- // average linked list length (<= 1).
55
- Resize();
56
- }
57
- }
58
56
  return old;
59
57
  }
60
58
 
@@ -63,7 +61,6 @@ LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
63
61
  LRUHandle* result = *ptr;
64
62
  if (result != nullptr) {
65
63
  *ptr = result->next_hash;
66
- --elems_;
67
64
  }
68
65
  return result;
69
66
  }
@@ -76,46 +73,13 @@ LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
76
73
  return ptr;
77
74
  }
78
75
 
79
- void LRUHandleTable::Resize() {
80
- if (length_bits_ >= max_length_bits_) {
81
- // Due to reaching limit of hash information, if we made the table bigger,
82
- // we would allocate more addresses but only the same number would be used.
83
- return;
84
- }
85
- if (length_bits_ >= 31) {
86
- // Avoid undefined behavior shifting uint32_t by 32.
87
- return;
88
- }
89
-
90
- uint32_t old_length = uint32_t{1} << length_bits_;
91
- int new_length_bits = length_bits_ + 1;
92
- std::unique_ptr<LRUHandle* []> new_list {
93
- new LRUHandle* [size_t{1} << new_length_bits] {}
94
- };
95
- uint32_t count = 0;
96
- for (uint32_t i = 0; i < old_length; i++) {
97
- LRUHandle* h = list_[i];
98
- while (h != nullptr) {
99
- LRUHandle* next = h->next_hash;
100
- uint32_t hash = h->hash;
101
- LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
102
- h->next_hash = *ptr;
103
- *ptr = h;
104
- h = next;
105
- count++;
106
- }
107
- }
108
- assert(elems_ == count);
109
- list_ = std::move(new_list);
110
- length_bits_ = new_length_bits;
111
- }
112
-
113
- LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
114
- CacheMetadataChargePolicy metadata_charge_policy,
115
- int max_upper_hash_bits)
76
+ LRUCacheShard::LRUCacheShard(size_t capacity, size_t estimated_value_size,
77
+ bool strict_capacity_limit,
78
+ CacheMetadataChargePolicy metadata_charge_policy)
116
79
  : capacity_(0),
117
80
  strict_capacity_limit_(strict_capacity_limit),
118
- table_(max_upper_hash_bits),
81
+ table_(
82
+ GetHashBits(capacity, estimated_value_size, metadata_charge_policy)),
119
83
  usage_(0),
120
84
  lru_usage_(0) {
121
85
  set_metadata_charge_policy(metadata_charge_policy);
@@ -220,6 +184,27 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
220
184
  }
221
185
  }
222
186
 
187
+ int LRUCacheShard::GetHashBits(
188
+ size_t capacity, size_t estimated_value_size,
189
+ CacheMetadataChargePolicy metadata_charge_policy) {
190
+ LRUHandle* e = reinterpret_cast<LRUHandle*>(
191
+ new char[sizeof(LRUHandle) - 1 + KEY_LENGTH]);
192
+ e->key_length = KEY_LENGTH;
193
+ e->deleter = nullptr;
194
+ e->refs = 0;
195
+ e->flags = 0;
196
+ e->refs = 0;
197
+
198
+ e->CalcTotalCharge(estimated_value_size, metadata_charge_policy);
199
+ size_t num_entries = capacity / e->total_charge;
200
+ e->Free();
201
+ int num_hash_bits = 0;
202
+ while (num_entries >>= 1) {
203
+ ++num_hash_bits;
204
+ }
205
+ return num_hash_bits;
206
+ }
207
+
223
208
  void LRUCacheShard::SetCapacity(size_t capacity) {
224
209
  autovector<LRUHandle*> last_reference_list;
225
210
  {
@@ -368,8 +353,9 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
368
353
  size_t charge, Cache::DeleterFn deleter,
369
354
  Cache::Handle** handle,
370
355
  Cache::Priority /*priority*/) {
371
- if (key.size() != 16) {
372
- return Status::NotSupported("FastLRUCache only supports key size 16B.");
356
+ if (key.size() != KEY_LENGTH) {
357
+ return Status::NotSupported("FastLRUCache only supports key size " +
358
+ std::to_string(KEY_LENGTH) + "B");
373
359
  }
374
360
 
375
361
  // Allocate the memory here outside of the mutex.
@@ -431,8 +417,8 @@ size_t LRUCacheShard::GetPinnedUsage() const {
431
417
 
432
418
  std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
433
419
 
434
- LRUCache::LRUCache(size_t capacity, int num_shard_bits,
435
- bool strict_capacity_limit,
420
+ LRUCache::LRUCache(size_t capacity, size_t estimated_value_size,
421
+ int num_shard_bits, bool strict_capacity_limit,
436
422
  CacheMetadataChargePolicy metadata_charge_policy)
437
423
  : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
438
424
  num_shards_ = 1 << num_shard_bits;
@@ -441,8 +427,8 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits,
441
427
  size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
442
428
  for (int i = 0; i < num_shards_; i++) {
443
429
  new (&shards_[i])
444
- LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
445
- /* max_upper_hash_bits */ 32 - num_shard_bits);
430
+ LRUCacheShard(per_shard, estimated_value_size, strict_capacity_limit,
431
+ metadata_charge_policy);
446
432
  }
447
433
  }
448
434
 
@@ -497,7 +483,8 @@ void LRUCache::DisownData() {
497
483
  } // namespace fast_lru_cache
498
484
 
499
485
  std::shared_ptr<Cache> NewFastLRUCache(
500
- size_t capacity, int num_shard_bits, bool strict_capacity_limit,
486
+ size_t capacity, size_t estimated_value_size, int num_shard_bits,
487
+ bool strict_capacity_limit,
501
488
  CacheMetadataChargePolicy metadata_charge_policy) {
502
489
  if (num_shard_bits >= 20) {
503
490
  return nullptr; // The cache cannot be sharded into too many fine pieces.
@@ -506,7 +493,8 @@ std::shared_ptr<Cache> NewFastLRUCache(
506
493
  num_shard_bits = GetDefaultCacheShardBits(capacity);
507
494
  }
508
495
  return std::make_shared<fast_lru_cache::LRUCache>(
509
- capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
496
+ capacity, estimated_value_size, num_shard_bits, strict_capacity_limit,
497
+ metadata_charge_policy);
510
498
  }
511
499
 
512
500
  } // namespace ROCKSDB_NAMESPACE
@@ -114,10 +114,7 @@ struct LRUHandle {
114
114
  // 4.4.3's builtin hashtable.
115
115
  class LRUHandleTable {
116
116
  public:
117
- // If the table uses more hash bits than `max_upper_hash_bits`,
118
- // it will eat into the bits used for sharding, which are constant
119
- // for a given LRUHandleTable.
120
- explicit LRUHandleTable(int max_upper_hash_bits);
117
+ explicit LRUHandleTable(int hash_bits);
121
118
  ~LRUHandleTable();
122
119
 
123
120
  LRUHandle* Lookup(const Slice& key, uint32_t hash);
@@ -139,14 +136,16 @@ class LRUHandleTable {
139
136
 
140
137
  int GetLengthBits() const { return length_bits_; }
141
138
 
139
+ // Return the address of the head of the chain in the bucket given
140
+ // by the hash.
141
+ inline LRUHandle** Head(uint32_t hash);
142
+
142
143
  private:
143
144
  // Return a pointer to slot that points to a cache entry that
144
145
  // matches key/hash. If there is no such cache entry, return a
145
146
  // pointer to the trailing slot in the corresponding linked list.
146
147
  LRUHandle** FindPointer(const Slice& key, uint32_t hash);
147
148
 
148
- void Resize();
149
-
150
149
  // Number of hash bits (upper because lower bits used for sharding)
151
150
  // used for table index. Length == 1 << length_bits_
152
151
  int length_bits_;
@@ -154,20 +153,14 @@ class LRUHandleTable {
154
153
  // The table consists of an array of buckets where each bucket is
155
154
  // a linked list of cache entries that hash into the bucket.
156
155
  std::unique_ptr<LRUHandle*[]> list_;
157
-
158
- // Number of elements currently in the table.
159
- uint32_t elems_;
160
-
161
- // Set from max_upper_hash_bits (see constructor).
162
- const int max_length_bits_;
163
156
  };
164
157
 
165
158
  // A single shard of sharded cache.
166
159
  class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
167
160
  public:
168
- LRUCacheShard(size_t capacity, bool strict_capacity_limit,
169
- CacheMetadataChargePolicy metadata_charge_policy,
170
- int max_upper_hash_bits);
161
+ LRUCacheShard(size_t capacity, size_t estimated_value_size,
162
+ bool strict_capacity_limit,
163
+ CacheMetadataChargePolicy metadata_charge_policy);
171
164
  ~LRUCacheShard() override = default;
172
165
 
173
166
  // Separate from constructor so caller can easily make an array of LRUCache
@@ -239,6 +232,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
239
232
  // holding the mutex_.
240
233
  void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
241
234
 
235
+ // Returns the number of bits used to hash an element in the per-shard
236
+ // table.
237
+ static int GetHashBits(size_t capacity, size_t estimated_value_size,
238
+ CacheMetadataChargePolicy metadata_charge_policy);
239
+
242
240
  // Initialized before use.
243
241
  size_t capacity_;
244
242
 
@@ -284,7 +282,8 @@ class LRUCache
284
282
  #endif
285
283
  : public ShardedCache {
286
284
  public:
287
- LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
285
+ LRUCache(size_t capacity, size_t estimated_value_size, int num_shard_bits,
286
+ bool strict_capacity_limit,
288
287
  CacheMetadataChargePolicy metadata_charge_policy =
289
288
  kDontChargeCacheMetadata);
290
289
  ~LRUCache() override;
@@ -304,9 +303,8 @@ class LRUCache
304
303
  } // namespace fast_lru_cache
305
304
 
306
305
  std::shared_ptr<Cache> NewFastLRUCache(
307
- size_t capacity, int num_shard_bits = -1,
308
- bool strict_capacity_limit = false,
309
- CacheMetadataChargePolicy metadata_charge_policy =
310
- kDefaultCacheMetadataChargePolicy);
306
+ size_t capacity, size_t estimated_value_size, int num_shard_bits,
307
+ bool strict_capacity_limit,
308
+ CacheMetadataChargePolicy metadata_charge_policy);
311
309
 
312
310
  } // namespace ROCKSDB_NAMESPACE
@@ -226,8 +226,8 @@ class FastLRUCacheTest : public testing::Test {
226
226
  cache_ = reinterpret_cast<fast_lru_cache::LRUCacheShard*>(
227
227
  port::cacheline_aligned_alloc(sizeof(fast_lru_cache::LRUCacheShard)));
228
228
  new (cache_) fast_lru_cache::LRUCacheShard(
229
- capacity, false /*strict_capcity_limit*/, kDontChargeCacheMetadata,
230
- 24 /*max_upper_hash_bits*/);
229
+ capacity, 1 /*estimated_value_size*/, false /*strict_capacity_limit*/,
230
+ kDontChargeCacheMetadata);
231
231
  }
232
232
 
233
233
  Status Insert(const std::string& key) {
@@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
3048
3048
  return opt->rep.blob_file_starting_level;
3049
3049
  }
3050
3050
 
3051
+ void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
3052
+ rocksdb_cache_t* blob_cache) {
3053
+ opt->rep.blob_cache = blob_cache->rep;
3054
+ }
3055
+
3051
3056
  void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
3052
3057
  opt->rep.num_levels = n;
3053
3058
  }