@nxtedition/rocksdb 7.0.0 → 7.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/binding.cc +38 -40
  2. package/deps/rocksdb/rocksdb/CMakeLists.txt +1 -1
  3. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +3 -1
  4. package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +2 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +1 -0
  6. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +28 -0
  7. package/deps/rocksdb/rocksdb/cache/cache_test.cc +5 -2
  8. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +48 -60
  9. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.h +18 -20
  10. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +2 -2
  11. package/deps/rocksdb/rocksdb/db/c.cc +5 -0
  12. package/deps/rocksdb/rocksdb/db/column_family.cc +20 -0
  13. package/deps/rocksdb/rocksdb/db/column_family.h +9 -0
  14. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +44 -26
  15. package/deps/rocksdb/rocksdb/db/comparator_db_test.cc +32 -14
  16. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -44
  17. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +3 -1
  18. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +6 -1
  19. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +10 -5
  20. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_files.cc +47 -35
  21. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +2 -1
  22. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +54 -32
  23. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +426 -61
  24. package/deps/rocksdb/rocksdb/db/db_options_test.cc +1 -0
  25. package/deps/rocksdb/rocksdb/db/db_test.cc +102 -24
  26. package/deps/rocksdb/rocksdb/db/db_test2.cc +159 -30
  27. package/deps/rocksdb/rocksdb/db/db_test_util.cc +1 -0
  28. package/deps/rocksdb/rocksdb/db/dbformat.h +1 -1
  29. package/deps/rocksdb/rocksdb/db/version_builder.cc +39 -10
  30. package/deps/rocksdb/rocksdb/db/version_builder.h +4 -1
  31. package/deps/rocksdb/rocksdb/db/version_edit.h +20 -0
  32. package/deps/rocksdb/rocksdb/db/version_set.cc +2 -1
  33. package/deps/rocksdb/rocksdb/db/version_set.h +17 -2
  34. package/deps/rocksdb/rocksdb/db/version_set_test.cc +119 -0
  35. package/deps/rocksdb/rocksdb/db/write_batch.cc +96 -0
  36. package/deps/rocksdb/rocksdb/db/write_batch_internal.h +4 -0
  37. package/deps/rocksdb/rocksdb/db/write_thread.cc +1 -0
  38. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +3 -0
  39. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +9 -0
  40. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +18 -2
  41. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +4 -0
  42. package/deps/rocksdb/rocksdb/db_stress_tool/multi_ops_txns_stress.cc +12 -0
  43. package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +1 -1
  44. package/deps/rocksdb/rocksdb/env/fs_posix.cc +96 -6
  45. package/deps/rocksdb/rocksdb/env/io_posix.cc +51 -18
  46. package/deps/rocksdb/rocksdb/env/io_posix.h +2 -0
  47. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +12 -5
  48. package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.h +22 -6
  49. package/deps/rocksdb/rocksdb/file/prefetch_test.cc +99 -8
  50. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +9 -1
  51. package/deps/rocksdb/rocksdb/include/rocksdb/c.h +3 -0
  52. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +3 -0
  53. package/deps/rocksdb/rocksdb/include/rocksdb/comparator.h +4 -0
  54. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  55. package/deps/rocksdb/rocksdb/include/rocksdb/io_status.h +7 -0
  56. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +11 -1
  57. package/deps/rocksdb/rocksdb/include/rocksdb/slice_transform.h +4 -1
  58. package/deps/rocksdb/rocksdb/include/rocksdb/table.h +14 -1
  59. package/deps/rocksdb/rocksdb/include/rocksdb/write_batch.h +6 -0
  60. package/deps/rocksdb/rocksdb/options/cf_options.cc +12 -1
  61. package/deps/rocksdb/rocksdb/options/cf_options.h +2 -0
  62. package/deps/rocksdb/rocksdb/options/options.cc +8 -1
  63. package/deps/rocksdb/rocksdb/options/options_helper.cc +1 -0
  64. package/deps/rocksdb/rocksdb/options/options_parser.cc +2 -1
  65. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +7 -2
  66. package/deps/rocksdb/rocksdb/options/options_test.cc +52 -0
  67. package/deps/rocksdb/rocksdb/port/port_posix.h +10 -1
  68. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -1
  69. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +1 -1
  70. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +1 -1
  71. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -5
  72. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.cc +16 -10
  73. package/deps/rocksdb/rocksdb/table/block_based/block_prefetcher.h +1 -1
  74. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +1 -1
  75. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_iterator.cc +4 -4
  76. package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +1 -1
  77. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +39 -12
  78. package/deps/rocksdb/rocksdb/util/comparator.cc +10 -0
  79. package/deps/rocksdb/rocksdb/util/ribbon_alg.h +1 -1
  80. package/deps/rocksdb/rocksdb/util/xxhash.h +2 -1
  81. package/index.js +2 -2
  82. package/package.json +1 -1
  83. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  84. package/prebuilds/linux-x64/node.napi.node +0 -0
package/binding.cc CHANGED
@@ -40,13 +40,13 @@ struct Updates;
40
40
  } \
41
41
  }
42
42
 
43
- #define ROCKS_STATUS_THROWS(call) \
44
- { \
45
- const auto status = (call); \
46
- if (!status.ok()) { \
43
+ #define ROCKS_STATUS_THROWS(call) \
44
+ { \
45
+ const auto status = (call); \
46
+ if (!status.ok()) { \
47
47
  napi_throw(env, ToError(env, status)); \
48
- return NULL; \
49
- } \
48
+ return NULL; \
49
+ } \
50
50
  }
51
51
 
52
52
  static bool IsString(napi_env env, napi_value value) {
@@ -224,7 +224,7 @@ static void Finalize(napi_env env, void* data, void* hint) {
224
224
  }
225
225
  }
226
226
 
227
- napi_status Convert(napi_env env, rocksdb::PinnableSlice&& s, bool asBuffer, napi_value& result) {
227
+ napi_status Convert(napi_env env, rocksdb::PinnableSlice& s, bool asBuffer, napi_value& result) {
228
228
  if (asBuffer) {
229
229
  auto ptr = new rocksdb::PinnableSlice(std::move(s));
230
230
  return napi_create_external_buffer(env, ptr->size(), const_cast<char*>(ptr->data()),
@@ -234,7 +234,7 @@ napi_status Convert(napi_env env, rocksdb::PinnableSlice&& s, bool asBuffer, nap
234
234
  }
235
235
  }
236
236
 
237
- napi_status Convert(napi_env env, std::optional<std::string>&& s, bool asBuffer, napi_value& result) {
237
+ napi_status Convert(napi_env env, std::optional<std::string>& s, bool asBuffer, napi_value& result) {
238
238
  if (!s) {
239
239
  return napi_get_null(env, &result);
240
240
  } else if (asBuffer) {
@@ -663,17 +663,17 @@ struct OpenWorker final : public Worker {
663
663
  rocksdb::Status Execute(Database& database) override {
664
664
  rocksdb::DB* db = nullptr;
665
665
  const auto status = column_families_.empty()
666
- ? rocksdb::DB::Open(options_, location_, &db)
667
- : rocksdb::DB::Open(options_, location_, column_families_, &database.columns_, &db);
666
+ ? rocksdb::DB::Open(options_, location_, &db)
667
+ : rocksdb::DB::Open(options_, location_, column_families_, &database.columns_, &db);
668
668
  database.db_.reset(db);
669
669
  return status;
670
670
  }
671
671
 
672
- napi_status OnOk(napi_env env, napi_value callback) override {
672
+ napi_status OnOk(napi_env env, napi_value callback) override {
673
673
  const auto size = database_->columns_.size();
674
674
  napi_value result;
675
675
  NAPI_STATUS_RETURN(napi_create_object(env, &result));
676
-
676
+
677
677
  for (size_t n = 0; n < size; ++n) {
678
678
  napi_value column;
679
679
  NAPI_STATUS_RETURN(napi_create_external(env, database_->columns_[n], nullptr, nullptr, &column));
@@ -691,8 +691,8 @@ struct OpenWorker final : public Worker {
691
691
  std::vector<rocksdb::ColumnFamilyDescriptor> column_families_;
692
692
  };
693
693
 
694
- template <typename T>
695
- napi_status InitOptions(napi_env env, T& columnOptions, auto options) {
694
+ template <typename T, typename U>
695
+ napi_status InitOptions(napi_env env, T& columnOptions, const U& options) {
696
696
  const auto memtable_memory_budget = Uint32Property(env, options, "memtableMemoryBudget").value_or(256 * 1024 * 1024);
697
697
 
698
698
  const auto compaction = StringProperty(env, options, "compaction").value_or("level");
@@ -787,16 +787,16 @@ NAPI_METHOD(db_open) {
787
787
 
788
788
  rocksdb::Options dbOptions;
789
789
 
790
- dbOptions.IncreaseParallelism(
791
- Uint32Property(env, options, "parallelism").value_or(std::thread::hardware_concurrency() / 2));
790
+ dbOptions.IncreaseParallelism(Uint32Property(env, options, "parallelism")
791
+ .value_or(std::max<uint32_t>(1, std::thread::hardware_concurrency() / 2)));
792
792
 
793
793
  dbOptions.create_if_missing = BooleanProperty(env, options, "createIfMissing").value_or(true);
794
794
  dbOptions.error_if_exists = BooleanProperty(env, options, "errorIfExists").value_or(false);
795
795
  dbOptions.avoid_unnecessary_blocking_io = true;
796
796
  dbOptions.use_adaptive_mutex = true;
797
797
  dbOptions.enable_pipelined_write = false;
798
- dbOptions.max_background_jobs =
799
- Uint32Property(env, options, "maxBackgroundJobs").value_or(std::thread::hardware_concurrency() / 4);
798
+ dbOptions.max_background_jobs = Uint32Property(env, options, "maxBackgroundJobs")
799
+ .value_or(std::max<uint32_t>(2, std::thread::hardware_concurrency() / 8));
800
800
  dbOptions.WAL_ttl_seconds = Uint32Property(env, options, "walTTL").value_or(0) / 1e3;
801
801
  dbOptions.WAL_size_limit_MB = Uint32Property(env, options, "walSizeLimit").value_or(0) / 1e6;
802
802
  dbOptions.create_missing_column_families = true;
@@ -832,7 +832,7 @@ NAPI_METHOD(db_open) {
832
832
  NAPI_STATUS_THROWS(InitOptions(env, dbOptions, options));
833
833
 
834
834
  std::vector<rocksdb::ColumnFamilyDescriptor> column_families;
835
-
835
+
836
836
  if (HasProperty(env, options, "columns")) {
837
837
  napi_value columns;
838
838
  NAPI_STATUS_THROWS(napi_get_named_property(env, options, "columns", &columns));
@@ -846,7 +846,7 @@ NAPI_METHOD(db_open) {
846
846
  for (uint32_t n = 0; n < len; ++n) {
847
847
  napi_value key;
848
848
  NAPI_STATUS_THROWS(napi_get_element(env, keys, n, &key));
849
-
849
+
850
850
  napi_value column;
851
851
  NAPI_STATUS_THROWS(napi_get_property(env, columns, key, &column));
852
852
 
@@ -867,12 +867,12 @@ struct CloseWorker final : public Worker {
867
867
  CloseWorker(napi_env env, Database* database, napi_value callback)
868
868
  : Worker(env, database, callback, "leveldown.db.close") {}
869
869
 
870
- rocksdb::Status Execute(Database& database) override {
870
+ rocksdb::Status Execute(Database& database) override {
871
871
  for (auto it : database.columns_) {
872
872
  database.db_->DestroyColumnFamilyHandle(it);
873
873
  }
874
874
 
875
- return database.db_->Close();
875
+ return database.db_->Close();
876
876
  }
877
877
  };
878
878
 
@@ -944,8 +944,8 @@ struct UpdateNextWorker final : public rocksdb::WriteBatch::Handler, public Work
944
944
  napi_value key;
945
945
  napi_value val;
946
946
 
947
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[idx + 0]), updates_->keyAsBuffer_, key));
948
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[idx + 1]), updates_->valueAsBuffer_, val));
947
+ NAPI_STATUS_RETURN(Convert(env, cache_[idx + 0], updates_->keyAsBuffer_, key));
948
+ NAPI_STATUS_RETURN(Convert(env, cache_[idx + 1], updates_->valueAsBuffer_, val));
949
949
 
950
950
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(idx + 0), key));
951
951
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(idx + 1), val));
@@ -1085,7 +1085,7 @@ struct GetWorker final : public Worker {
1085
1085
  napi_status OnOk(napi_env env, napi_value callback) override {
1086
1086
  napi_value argv[2];
1087
1087
  NAPI_STATUS_RETURN(napi_get_null(env, &argv[0]));
1088
- NAPI_STATUS_RETURN(Convert(env, std::move(value_), asBuffer_, argv[1]));
1088
+ NAPI_STATUS_RETURN(Convert(env, value_, asBuffer_, argv[1]));
1089
1089
  return CallFunction(env, callback, 2, argv);
1090
1090
  }
1091
1091
 
@@ -1145,23 +1145,23 @@ struct GetManyWorker final : public Worker {
1145
1145
  readOptions.snapshot = snapshot_.get();
1146
1146
  readOptions.async_io = true;
1147
1147
 
1148
- const auto numKeys = keys_.size();
1149
-
1150
1148
  std::vector<rocksdb::Slice> keys;
1151
1149
  keys.reserve(keys_.size());
1152
1150
  for (const auto& key : keys_) {
1153
1151
  keys.emplace_back(key);
1154
1152
  }
1155
1153
 
1156
- statuses_.resize(numKeys);
1157
- values_.resize(numKeys);
1154
+ statuses_.resize(keys.size());
1155
+ values_.resize(keys.size());
1158
1156
 
1159
- database.db_->MultiGet(readOptions, column_, numKeys, keys.data(), values_.data(), statuses_.data());
1157
+ // database.db_->MultiGet(readOptions, column_, keys.size(), keys.data(), values_.data(), statuses_.data());
1160
1158
 
1161
- keys_.clear();
1162
- snapshot_ = nullptr;
1159
+ // TODO (fix): Use MultiGet once https://github.com/facebook/rocksdb/issues/10186 is resolved.
1160
+ for (auto n = 0; n < keys.size(); ++n) {
1161
+ statuses_[n] = database.db_->Get(readOptions, column_, keys[n], &values_[n]);
1162
+ }
1163
1163
 
1164
- for (auto status : statuses_) {
1164
+ for (const auto& status : statuses_) {
1165
1165
  if (!status.ok() && !status.IsNotFound()) {
1166
1166
  return status;
1167
1167
  }
@@ -1179,16 +1179,13 @@ struct GetManyWorker final : public Worker {
1179
1179
  for (size_t idx = 0; idx < size; idx++) {
1180
1180
  napi_value element;
1181
1181
  if (statuses_[idx].ok()) {
1182
- NAPI_STATUS_RETURN(Convert(env, std::move(values_[idx]), valueAsBuffer_, element));
1182
+ NAPI_STATUS_RETURN(Convert(env, values_[idx], valueAsBuffer_, element));
1183
1183
  } else {
1184
1184
  NAPI_STATUS_RETURN(napi_get_undefined(env, &element));
1185
1185
  }
1186
1186
  NAPI_STATUS_RETURN(napi_set_element(env, array, static_cast<uint32_t>(idx), element));
1187
1187
  }
1188
1188
 
1189
- values_.clear();
1190
- statuses_.clear();
1191
-
1192
1189
  napi_value argv[2];
1193
1190
  NAPI_STATUS_RETURN(napi_get_null(env, &argv[0]));
1194
1191
  argv[1] = array;
@@ -1481,7 +1478,8 @@ struct NextWorker final : public Worker {
1481
1478
  cache_.push_back(v.ToString());
1482
1479
  }
1483
1480
 
1484
- if ((iterator_->highWaterMarkBytes_ != -1 && bytesRead > static_cast<size_t>(iterator_->highWaterMarkBytes_)) || cache_.size() / 2 >= size_) {
1481
+ if ((iterator_->highWaterMarkBytes_ != -1 && bytesRead > static_cast<size_t>(iterator_->highWaterMarkBytes_)) ||
1482
+ cache_.size() / 2 >= size_) {
1485
1483
  finished_ = false;
1486
1484
  return rocksdb::Status::OK();
1487
1485
  }
@@ -1501,8 +1499,8 @@ struct NextWorker final : public Worker {
1501
1499
  napi_value key;
1502
1500
  napi_value val;
1503
1501
 
1504
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[n + 0]), iterator_->keyAsBuffer_, key));
1505
- NAPI_STATUS_RETURN(Convert(env, std::move(cache_[n + 1]), iterator_->valueAsBuffer_, val));
1502
+ NAPI_STATUS_RETURN(Convert(env, cache_[n + 0], iterator_->keyAsBuffer_, key));
1503
+ NAPI_STATUS_RETURN(Convert(env, cache_[n + 1], iterator_->valueAsBuffer_, val));
1506
1504
 
1507
1505
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(n + 0), key));
1508
1506
  NAPI_STATUS_RETURN(napi_set_element(env, result, static_cast<int>(n + 1), val));
@@ -194,7 +194,7 @@ else()
194
194
  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wstrict-prototypes")
195
195
  endif()
196
196
  if(MINGW)
197
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format -fno-asynchronous-unwind-tables")
197
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-format")
198
198
  add_definitions(-D_POSIX_C_SOURCE=1)
199
199
  endif()
200
200
  if(NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -287,7 +287,9 @@ class CacheBench {
287
287
  exit(1);
288
288
  }
289
289
  } else if (FLAGS_cache_type == "fast_lru_cache") {
290
- cache_ = NewFastLRUCache(FLAGS_cache_size, FLAGS_num_shard_bits);
290
+ cache_ = NewFastLRUCache(
291
+ FLAGS_cache_size, FLAGS_value_bytes, FLAGS_num_shard_bits,
292
+ false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
291
293
  } else if (FLAGS_cache_type == "lru_cache") {
292
294
  LRUCacheOptions opts(FLAGS_cache_size, FLAGS_num_shard_bits, false, 0.5);
293
295
  #ifndef ROCKSDB_LITE
@@ -22,6 +22,7 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToCamelString{{
22
22
  "CompressionDictionaryBuildingBuffer",
23
23
  "FilterConstruction",
24
24
  "BlockBasedTableReader",
25
+ "FileMetadata",
25
26
  "Misc",
26
27
  }};
27
28
 
@@ -36,6 +37,7 @@ std::array<std::string, kNumCacheEntryRoles> kCacheEntryRoleToHyphenString{{
36
37
  "compression-dictionary-building-buffer",
37
38
  "filter-construction",
38
39
  "block-based-table-reader",
40
+ "file-metadata",
39
41
  "misc",
40
42
  }};
41
43
 
@@ -180,4 +180,5 @@ template class CacheReservationManagerImpl<
180
180
  template class CacheReservationManagerImpl<CacheEntryRole::kFilterConstruction>;
181
181
  template class CacheReservationManagerImpl<CacheEntryRole::kMisc>;
182
182
  template class CacheReservationManagerImpl<CacheEntryRole::kWriteBuffer>;
183
+ template class CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>;
183
184
  } // namespace ROCKSDB_NAMESPACE
@@ -36,6 +36,12 @@ class CacheReservationManager {
36
36
  };
37
37
  virtual ~CacheReservationManager() {}
38
38
  virtual Status UpdateCacheReservation(std::size_t new_memory_used) = 0;
39
+ // TODO(hx235): replace the usage of
40
+ // `UpdateCacheReservation(memory_used_delta, increase)` with
41
+ // `UpdateCacheReservation(new_memory_used)` so that we only have one
42
+ // `UpdateCacheReservation` function
43
+ virtual Status UpdateCacheReservation(std::size_t memory_used_delta,
44
+ bool increase) = 0;
39
45
  virtual Status MakeCacheReservation(
40
46
  std::size_t incremental_memory_used,
41
47
  std::unique_ptr<CacheReservationManager::CacheReservationHandle>
@@ -128,6 +134,11 @@ class CacheReservationManagerImpl
128
134
  // On keeping dummy entries the same, it always returns Status::OK().
129
135
  Status UpdateCacheReservation(std::size_t new_memory_used) override;
130
136
 
137
+ Status UpdateCacheReservation(std::size_t /* memory_used_delta */,
138
+ bool /* increase */) override {
139
+ return Status::NotSupported();
140
+ }
141
+
131
142
  // One of the two ways of reserving cache space and releasing is done through
132
143
  // destruction of CacheReservationHandle.
133
144
  // See UpdateCacheReservation() for the other way.
@@ -254,6 +265,23 @@ class ConcurrentCacheReservationManager
254
265
  std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
255
266
  return cache_res_mgr_->UpdateCacheReservation(new_memory_used);
256
267
  }
268
+
269
+ inline Status UpdateCacheReservation(std::size_t memory_used_delta,
270
+ bool increase) override {
271
+ std::lock_guard<std::mutex> lock(cache_res_mgr_mu_);
272
+ std::size_t total_mem_used = cache_res_mgr_->GetTotalMemoryUsed();
273
+ Status s;
274
+ if (!increase) {
275
+ assert(total_mem_used >= memory_used_delta);
276
+ s = cache_res_mgr_->UpdateCacheReservation(total_mem_used -
277
+ memory_used_delta);
278
+ } else {
279
+ s = cache_res_mgr_->UpdateCacheReservation(total_mem_used +
280
+ memory_used_delta);
281
+ }
282
+ return s;
283
+ }
284
+
257
285
  inline Status MakeCacheReservation(
258
286
  std::size_t incremental_memory_used,
259
287
  std::unique_ptr<CacheReservationManager::CacheReservationHandle> *handle)
@@ -114,7 +114,9 @@ class CacheTest : public testing::TestWithParam<std::string> {
114
114
  return NewClockCache(capacity);
115
115
  }
116
116
  if (type == kFast) {
117
- return NewFastLRUCache(capacity);
117
+ return NewFastLRUCache(
118
+ capacity, 1 /*estimated_value_size*/, -1 /*num_shard_bits*/,
119
+ false /*strict_capacity_limit*/, kDefaultCacheMetadataChargePolicy);
118
120
  }
119
121
  return nullptr;
120
122
  }
@@ -137,7 +139,8 @@ class CacheTest : public testing::TestWithParam<std::string> {
137
139
  charge_policy);
138
140
  }
139
141
  if (type == kFast) {
140
- return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
142
+ return NewFastLRUCache(capacity, 1 /*estimated_value_size*/,
143
+ num_shard_bits, strict_capacity_limit,
141
144
  charge_policy);
142
145
  }
143
146
  return nullptr;
@@ -18,15 +18,17 @@
18
18
  #include "port/lang.h"
19
19
  #include "util/mutexlock.h"
20
20
 
21
+ #define KEY_LENGTH \
22
+ 16 // TODO(guido) Make use of this symbol in other parts of the source code
23
+ // (e.g., cache_key.h, cache_test.cc, etc.)
24
+
21
25
  namespace ROCKSDB_NAMESPACE {
22
26
 
23
27
  namespace fast_lru_cache {
24
28
 
25
- LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
26
- : length_bits_(/* historical starting size*/ 4),
27
- list_(new LRUHandle* [size_t{1} << length_bits_] {}),
28
- elems_(0),
29
- max_length_bits_(max_upper_hash_bits) {}
29
+ LRUHandleTable::LRUHandleTable(int hash_bits)
30
+ : length_bits_(hash_bits),
31
+ list_(new LRUHandle* [size_t{1} << length_bits_] {}) {}
30
32
 
31
33
  LRUHandleTable::~LRUHandleTable() {
32
34
  ApplyToEntriesRange(
@@ -42,19 +44,15 @@ LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
42
44
  return *FindPointer(key, hash);
43
45
  }
44
46
 
47
+ inline LRUHandle** LRUHandleTable::Head(uint32_t hash) {
48
+ return &list_[hash >> (32 - length_bits_)];
49
+ }
50
+
45
51
  LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
46
52
  LRUHandle** ptr = FindPointer(h->key(), h->hash);
47
53
  LRUHandle* old = *ptr;
48
54
  h->next_hash = (old == nullptr ? nullptr : old->next_hash);
49
55
  *ptr = h;
50
- if (old == nullptr) {
51
- ++elems_;
52
- if ((elems_ >> length_bits_) > 0) { // elems_ >= length
53
- // Since each cache entry is fairly large, we aim for a small
54
- // average linked list length (<= 1).
55
- Resize();
56
- }
57
- }
58
56
  return old;
59
57
  }
60
58
 
@@ -63,7 +61,6 @@ LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
63
61
  LRUHandle* result = *ptr;
64
62
  if (result != nullptr) {
65
63
  *ptr = result->next_hash;
66
- --elems_;
67
64
  }
68
65
  return result;
69
66
  }
@@ -76,46 +73,13 @@ LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
76
73
  return ptr;
77
74
  }
78
75
 
79
- void LRUHandleTable::Resize() {
80
- if (length_bits_ >= max_length_bits_) {
81
- // Due to reaching limit of hash information, if we made the table bigger,
82
- // we would allocate more addresses but only the same number would be used.
83
- return;
84
- }
85
- if (length_bits_ >= 31) {
86
- // Avoid undefined behavior shifting uint32_t by 32.
87
- return;
88
- }
89
-
90
- uint32_t old_length = uint32_t{1} << length_bits_;
91
- int new_length_bits = length_bits_ + 1;
92
- std::unique_ptr<LRUHandle* []> new_list {
93
- new LRUHandle* [size_t{1} << new_length_bits] {}
94
- };
95
- uint32_t count = 0;
96
- for (uint32_t i = 0; i < old_length; i++) {
97
- LRUHandle* h = list_[i];
98
- while (h != nullptr) {
99
- LRUHandle* next = h->next_hash;
100
- uint32_t hash = h->hash;
101
- LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
102
- h->next_hash = *ptr;
103
- *ptr = h;
104
- h = next;
105
- count++;
106
- }
107
- }
108
- assert(elems_ == count);
109
- list_ = std::move(new_list);
110
- length_bits_ = new_length_bits;
111
- }
112
-
113
- LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
114
- CacheMetadataChargePolicy metadata_charge_policy,
115
- int max_upper_hash_bits)
76
+ LRUCacheShard::LRUCacheShard(size_t capacity, size_t estimated_value_size,
77
+ bool strict_capacity_limit,
78
+ CacheMetadataChargePolicy metadata_charge_policy)
116
79
  : capacity_(0),
117
80
  strict_capacity_limit_(strict_capacity_limit),
118
- table_(max_upper_hash_bits),
81
+ table_(
82
+ GetHashBits(capacity, estimated_value_size, metadata_charge_policy)),
119
83
  usage_(0),
120
84
  lru_usage_(0) {
121
85
  set_metadata_charge_policy(metadata_charge_policy);
@@ -220,6 +184,27 @@ void LRUCacheShard::EvictFromLRU(size_t charge,
220
184
  }
221
185
  }
222
186
 
187
+ int LRUCacheShard::GetHashBits(
188
+ size_t capacity, size_t estimated_value_size,
189
+ CacheMetadataChargePolicy metadata_charge_policy) {
190
+ LRUHandle* e = reinterpret_cast<LRUHandle*>(
191
+ new char[sizeof(LRUHandle) - 1 + KEY_LENGTH]);
192
+ e->key_length = KEY_LENGTH;
193
+ e->deleter = nullptr;
194
+ e->refs = 0;
195
+ e->flags = 0;
196
+ e->refs = 0;
197
+
198
+ e->CalcTotalCharge(estimated_value_size, metadata_charge_policy);
199
+ size_t num_entries = capacity / e->total_charge;
200
+ e->Free();
201
+ int num_hash_bits = 0;
202
+ while (num_entries >>= 1) {
203
+ ++num_hash_bits;
204
+ }
205
+ return num_hash_bits;
206
+ }
207
+
223
208
  void LRUCacheShard::SetCapacity(size_t capacity) {
224
209
  autovector<LRUHandle*> last_reference_list;
225
210
  {
@@ -368,8 +353,9 @@ Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
368
353
  size_t charge, Cache::DeleterFn deleter,
369
354
  Cache::Handle** handle,
370
355
  Cache::Priority /*priority*/) {
371
- if (key.size() != 16) {
372
- return Status::NotSupported("FastLRUCache only supports key size 16B.");
356
+ if (key.size() != KEY_LENGTH) {
357
+ return Status::NotSupported("FastLRUCache only supports key size " +
358
+ std::to_string(KEY_LENGTH) + "B");
373
359
  }
374
360
 
375
361
  // Allocate the memory here outside of the mutex.
@@ -431,8 +417,8 @@ size_t LRUCacheShard::GetPinnedUsage() const {
431
417
 
432
418
  std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
433
419
 
434
- LRUCache::LRUCache(size_t capacity, int num_shard_bits,
435
- bool strict_capacity_limit,
420
+ LRUCache::LRUCache(size_t capacity, size_t estimated_value_size,
421
+ int num_shard_bits, bool strict_capacity_limit,
436
422
  CacheMetadataChargePolicy metadata_charge_policy)
437
423
  : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
438
424
  num_shards_ = 1 << num_shard_bits;
@@ -441,8 +427,8 @@ LRUCache::LRUCache(size_t capacity, int num_shard_bits,
441
427
  size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
442
428
  for (int i = 0; i < num_shards_; i++) {
443
429
  new (&shards_[i])
444
- LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
445
- /* max_upper_hash_bits */ 32 - num_shard_bits);
430
+ LRUCacheShard(per_shard, estimated_value_size, strict_capacity_limit,
431
+ metadata_charge_policy);
446
432
  }
447
433
  }
448
434
 
@@ -497,7 +483,8 @@ void LRUCache::DisownData() {
497
483
  } // namespace fast_lru_cache
498
484
 
499
485
  std::shared_ptr<Cache> NewFastLRUCache(
500
- size_t capacity, int num_shard_bits, bool strict_capacity_limit,
486
+ size_t capacity, size_t estimated_value_size, int num_shard_bits,
487
+ bool strict_capacity_limit,
501
488
  CacheMetadataChargePolicy metadata_charge_policy) {
502
489
  if (num_shard_bits >= 20) {
503
490
  return nullptr; // The cache cannot be sharded into too many fine pieces.
@@ -506,7 +493,8 @@ std::shared_ptr<Cache> NewFastLRUCache(
506
493
  num_shard_bits = GetDefaultCacheShardBits(capacity);
507
494
  }
508
495
  return std::make_shared<fast_lru_cache::LRUCache>(
509
- capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
496
+ capacity, estimated_value_size, num_shard_bits, strict_capacity_limit,
497
+ metadata_charge_policy);
510
498
  }
511
499
 
512
500
  } // namespace ROCKSDB_NAMESPACE
@@ -114,10 +114,7 @@ struct LRUHandle {
114
114
  // 4.4.3's builtin hashtable.
115
115
  class LRUHandleTable {
116
116
  public:
117
- // If the table uses more hash bits than `max_upper_hash_bits`,
118
- // it will eat into the bits used for sharding, which are constant
119
- // for a given LRUHandleTable.
120
- explicit LRUHandleTable(int max_upper_hash_bits);
117
+ explicit LRUHandleTable(int hash_bits);
121
118
  ~LRUHandleTable();
122
119
 
123
120
  LRUHandle* Lookup(const Slice& key, uint32_t hash);
@@ -139,14 +136,16 @@ class LRUHandleTable {
139
136
 
140
137
  int GetLengthBits() const { return length_bits_; }
141
138
 
139
+ // Return the address of the head of the chain in the bucket given
140
+ // by the hash.
141
+ inline LRUHandle** Head(uint32_t hash);
142
+
142
143
  private:
143
144
  // Return a pointer to slot that points to a cache entry that
144
145
  // matches key/hash. If there is no such cache entry, return a
145
146
  // pointer to the trailing slot in the corresponding linked list.
146
147
  LRUHandle** FindPointer(const Slice& key, uint32_t hash);
147
148
 
148
- void Resize();
149
-
150
149
  // Number of hash bits (upper because lower bits used for sharding)
151
150
  // used for table index. Length == 1 << length_bits_
152
151
  int length_bits_;
@@ -154,20 +153,14 @@ class LRUHandleTable {
154
153
  // The table consists of an array of buckets where each bucket is
155
154
  // a linked list of cache entries that hash into the bucket.
156
155
  std::unique_ptr<LRUHandle*[]> list_;
157
-
158
- // Number of elements currently in the table.
159
- uint32_t elems_;
160
-
161
- // Set from max_upper_hash_bits (see constructor).
162
- const int max_length_bits_;
163
156
  };
164
157
 
165
158
  // A single shard of sharded cache.
166
159
  class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
167
160
  public:
168
- LRUCacheShard(size_t capacity, bool strict_capacity_limit,
169
- CacheMetadataChargePolicy metadata_charge_policy,
170
- int max_upper_hash_bits);
161
+ LRUCacheShard(size_t capacity, size_t estimated_value_size,
162
+ bool strict_capacity_limit,
163
+ CacheMetadataChargePolicy metadata_charge_policy);
171
164
  ~LRUCacheShard() override = default;
172
165
 
173
166
  // Separate from constructor so caller can easily make an array of LRUCache
@@ -239,6 +232,11 @@ class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
239
232
  // holding the mutex_.
240
233
  void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
241
234
 
235
+ // Returns the number of bits used to hash an element in the per-shard
236
+ // table.
237
+ static int GetHashBits(size_t capacity, size_t estimated_value_size,
238
+ CacheMetadataChargePolicy metadata_charge_policy);
239
+
242
240
  // Initialized before use.
243
241
  size_t capacity_;
244
242
 
@@ -284,7 +282,8 @@ class LRUCache
284
282
  #endif
285
283
  : public ShardedCache {
286
284
  public:
287
- LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
285
+ LRUCache(size_t capacity, size_t estimated_value_size, int num_shard_bits,
286
+ bool strict_capacity_limit,
288
287
  CacheMetadataChargePolicy metadata_charge_policy =
289
288
  kDontChargeCacheMetadata);
290
289
  ~LRUCache() override;
@@ -304,9 +303,8 @@ class LRUCache
304
303
  } // namespace fast_lru_cache
305
304
 
306
305
  std::shared_ptr<Cache> NewFastLRUCache(
307
- size_t capacity, int num_shard_bits = -1,
308
- bool strict_capacity_limit = false,
309
- CacheMetadataChargePolicy metadata_charge_policy =
310
- kDefaultCacheMetadataChargePolicy);
306
+ size_t capacity, size_t estimated_value_size, int num_shard_bits,
307
+ bool strict_capacity_limit,
308
+ CacheMetadataChargePolicy metadata_charge_policy);
311
309
 
312
310
  } // namespace ROCKSDB_NAMESPACE
@@ -226,8 +226,8 @@ class FastLRUCacheTest : public testing::Test {
226
226
  cache_ = reinterpret_cast<fast_lru_cache::LRUCacheShard*>(
227
227
  port::cacheline_aligned_alloc(sizeof(fast_lru_cache::LRUCacheShard)));
228
228
  new (cache_) fast_lru_cache::LRUCacheShard(
229
- capacity, false /*strict_capcity_limit*/, kDontChargeCacheMetadata,
230
- 24 /*max_upper_hash_bits*/);
229
+ capacity, 1 /*estimated_value_size*/, false /*strict_capacity_limit*/,
230
+ kDontChargeCacheMetadata);
231
231
  }
232
232
 
233
233
  Status Insert(const std::string& key) {
@@ -3048,6 +3048,11 @@ int rocksdb_options_get_blob_file_starting_level(rocksdb_options_t* opt) {
3048
3048
  return opt->rep.blob_file_starting_level;
3049
3049
  }
3050
3050
 
3051
+ void rocksdb_options_set_blob_cache(rocksdb_options_t* opt,
3052
+ rocksdb_cache_t* blob_cache) {
3053
+ opt->rep.blob_cache = blob_cache->rep;
3054
+ }
3055
+
3051
3056
  void rocksdb_options_set_num_levels(rocksdb_options_t* opt, int n) {
3052
3057
  opt->rep.num_levels = n;
3053
3058
  }
@@ -619,6 +619,26 @@ ColumnFamilyData::ColumnFamilyData(
619
619
  }
620
620
 
621
621
  RecalculateWriteStallConditions(mutable_cf_options_);
622
+
623
+ if (cf_options.table_factory->IsInstanceOf(
624
+ TableFactory::kBlockBasedTableName()) &&
625
+ cf_options.table_factory->GetOptions<BlockBasedTableOptions>()) {
626
+ const BlockBasedTableOptions* bbto =
627
+ cf_options.table_factory->GetOptions<BlockBasedTableOptions>();
628
+ const auto& options_overrides = bbto->cache_usage_options.options_overrides;
629
+ const auto file_metadata_charged =
630
+ options_overrides.at(CacheEntryRole::kFileMetadata).charged;
631
+ if (bbto->block_cache &&
632
+ file_metadata_charged == CacheEntryRoleOptions::Decision::kEnabled) {
633
+ // TODO(hx235): Add a `ConcurrentCacheReservationManager` at DB scope
634
+ // responsible for reservation of `ObsoleteFileInfo` so that we can keep
635
+ // this `file_metadata_cache_res_mgr_` nonconcurrent
636
+ file_metadata_cache_res_mgr_.reset(new ConcurrentCacheReservationManager(
637
+ std::make_shared<
638
+ CacheReservationManagerImpl<CacheEntryRole::kFileMetadata>>(
639
+ bbto->block_cache)));
640
+ }
641
+ }
622
642
  }
623
643
 
624
644
  // DB mutex held