@nxtedition/rocksdb 7.1.4 → 7.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. package/binding.cc +32 -14
  2. package/deps/rocksdb/iostats.patch +19 -0
  3. package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
  4. package/deps/rocksdb/rocksdb/cache/cache.cc +4 -0
  5. package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +6 -8
  6. package/deps/rocksdb/rocksdb/cache/cache_key.cc +184 -164
  7. package/deps/rocksdb/rocksdb/cache/cache_key.h +38 -29
  8. package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +4 -4
  9. package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
  10. package/deps/rocksdb/rocksdb/cache/clock_cache.cc +92 -42
  11. package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
  12. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +114 -37
  13. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +34 -2
  14. package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +187 -38
  15. package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
  16. package/deps/rocksdb/rocksdb/cache/lru_cache.cc +88 -19
  17. package/deps/rocksdb/rocksdb/cache/lru_cache.h +48 -8
  18. package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +481 -224
  19. package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
  20. package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
  21. package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +3 -7
  22. package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
  23. package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
  24. package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
  25. package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +4 -5
  26. package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -3
  27. package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +12 -4
  28. package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
  29. package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +105 -0
  30. package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
  31. package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
  32. package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
  33. package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
  34. package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
  35. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +56 -53
  36. package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
  37. package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +45 -11
  38. package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
  39. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
  40. package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
  41. package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
  42. package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
  43. package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
  44. package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +239 -190
  45. package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
  46. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
  47. package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
  48. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
  49. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
  50. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
  51. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
  52. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
  53. package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
  54. package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
  55. package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
  56. package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +239 -23
  57. package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
  58. package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
  59. package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
  60. package/deps/rocksdb/rocksdb/db/db_test2.cc +24 -9
  61. package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
  62. package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
  63. package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
  64. package/deps/rocksdb/rocksdb/db/event_helpers.cc +2 -1
  65. package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
  66. package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
  67. package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
  68. package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
  69. package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
  70. package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +7 -1
  71. package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -0
  72. package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
  73. package/deps/rocksdb/rocksdb/db/kv_checksum.h +8 -4
  74. package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
  75. package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
  76. package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
  77. package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
  78. package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
  79. package/deps/rocksdb/rocksdb/db/memtable.cc +222 -47
  80. package/deps/rocksdb/rocksdb/db/memtable.h +70 -14
  81. package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
  82. package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
  83. package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
  84. package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
  85. package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
  86. package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
  87. package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
  88. package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
  89. package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +10 -15
  90. package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
  91. package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
  92. package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
  93. package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
  94. package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
  95. package/deps/rocksdb/rocksdb/db/version_set.cc +71 -28
  96. package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
  97. package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +7 -7
  98. package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
  99. package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
  100. package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
  101. package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
  102. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
  103. package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
  104. package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
  105. package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
  106. package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
  107. package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
  108. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +5 -0
  109. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +12 -0
  110. package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +8 -0
  111. package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
  112. package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
  113. package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
  114. package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
  115. package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
  116. package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
  117. package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
  118. package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +29 -1
  119. package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +31 -6
  120. package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
  121. package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
  122. package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
  123. package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
  124. package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
  125. package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
  126. package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
  127. package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
  128. package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
  129. package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
  130. package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +1 -1
  131. package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
  132. package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
  133. package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
  134. package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
  135. package/deps/rocksdb/rocksdb/options/cf_options.cc +10 -3
  136. package/deps/rocksdb/rocksdb/options/cf_options.h +10 -5
  137. package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -1
  138. package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
  139. package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
  140. package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
  141. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -6
  142. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -0
  143. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +52 -12
  144. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -7
  145. package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +9 -1
  146. package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
  147. package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
  148. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
  149. package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
  150. package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
  151. package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
  152. package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
  153. package/deps/rocksdb/rocksdb/table/unique_id.cc +22 -24
  154. package/deps/rocksdb/rocksdb/table/unique_id_impl.h +2 -1
  155. package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +7 -0
  156. package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
  157. package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
  158. package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
  159. package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
  160. package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
  161. package/deps/rocksdb/rocksdb/util/async_file_reader.cc +2 -1
  162. package/deps/rocksdb/rocksdb/util/async_file_reader.h +3 -3
  163. package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -1
  164. package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
  165. package/deps/rocksdb/rocksdb/util/hash_test.cc +67 -0
  166. package/deps/rocksdb/rocksdb/util/math.h +41 -0
  167. package/deps/rocksdb/rocksdb/util/math128.h +6 -0
  168. package/deps/rocksdb/rocksdb/util/single_thread_executor.h +2 -1
  169. package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
  170. package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
  171. package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +3 -6
  172. package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
  173. package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
  174. package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +5 -0
  175. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +6 -0
  176. package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
  177. package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
  178. package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
  179. package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
  180. package/index.js +17 -8
  181. package/package.json +1 -1
  182. package/prebuilds/darwin-arm64/node.napi.node +0 -0
  183. package/prebuilds/darwin-x64/node.napi.node +0 -0
  184. package/prebuilds/linux-x64/node.napi.node +0 -0
  185. package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
@@ -69,10 +69,10 @@ ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) {
69
69
  // updates where it would be possible to combine into one CAS (more metadata
70
70
  // under one atomic field) or maybe two atomic updates (one arithmetic, one
71
71
  // bitwise). Something to think about optimizing.
72
- e->InternalToExternalRef();
73
72
  e->SetHit();
74
73
  // The handle is now referenced, so we take it out of clock.
75
74
  ClockOff(e);
75
+ e->InternalToExternalRef();
76
76
  }
77
77
 
78
78
  return e;
@@ -312,17 +312,20 @@ void ClockHandleTable::ClockRun(size_t charge) {
312
312
  // hot element, it will be hard to get an exclusive ref.
313
313
  // Do we need a mechanism to prevent an element from sitting
314
314
  // for a long time in cache waiting to be evicted?
315
- assert(charge <= capacity_);
316
315
  autovector<ClockHandle> deleted;
317
316
  uint32_t max_iterations =
318
- 1 + static_cast<uint32_t>(GetTableSize() * kLoadFactor);
317
+ ClockHandle::ClockPriority::HIGH *
318
+ (1 +
319
+ static_cast<uint32_t>(
320
+ GetTableSize() *
321
+ kLoadFactor)); // It may take up to HIGH passes to evict an element.
319
322
  size_t usage_local = usage_;
320
- while (usage_local + charge > capacity_ && max_iterations--) {
323
+ size_t capacity_local = capacity_;
324
+ while (usage_local + charge > capacity_local && max_iterations--) {
321
325
  uint32_t steps = 1 + static_cast<uint32_t>(1 / kLoadFactor);
322
326
  uint32_t clock_pointer_local = (clock_pointer_ += steps) - steps;
323
327
  for (uint32_t i = 0; i < steps; i++) {
324
328
  ClockHandle* h = &array_[ModTableSize(clock_pointer_local + i)];
325
-
326
329
  if (h->TryExclusiveRef()) {
327
330
  if (h->WillBeDeleted()) {
328
331
  Remove(h, &deleted);
@@ -335,7 +338,6 @@ void ClockHandleTable::ClockRun(size_t charge) {
335
338
  // exclusive ref, we know we are in the latter case. This can only
336
339
  // happen when the last external reference to an element was
337
340
  // released, and the element was not immediately removed.
338
-
339
341
  ClockOn(h);
340
342
  }
341
343
  ClockHandle::ClockPriority priority = h->GetClockPriority();
@@ -358,6 +360,7 @@ ClockCacheShard::ClockCacheShard(
358
360
  size_t capacity, size_t estimated_value_size, bool strict_capacity_limit,
359
361
  CacheMetadataChargePolicy metadata_charge_policy)
360
362
  : strict_capacity_limit_(strict_capacity_limit),
363
+ detached_usage_(0),
361
364
  table_(capacity, CalcHashBits(capacity, estimated_value_size,
362
365
  metadata_charge_policy)) {
363
366
  set_metadata_charge_policy(metadata_charge_policy);
@@ -401,15 +404,24 @@ void ClockCacheShard::ApplyToSomeEntries(
401
404
  *state = index_end << (32 - length_bits);
402
405
  }
403
406
 
404
- table_.ApplyToEntriesRange(
407
+ table_.ConstApplyToEntriesRange(
405
408
  [callback,
406
- metadata_charge_policy = metadata_charge_policy_](ClockHandle* h) {
409
+ metadata_charge_policy = metadata_charge_policy_](const ClockHandle* h) {
407
410
  callback(h->key(), h->value, h->GetCharge(metadata_charge_policy),
408
411
  h->deleter);
409
412
  },
410
413
  index_begin, index_end, false);
411
414
  }
412
415
 
416
+ ClockHandle* ClockCacheShard::DetachedInsert(ClockHandle* h) {
417
+ ClockHandle* e = new ClockHandle();
418
+ *e = *h;
419
+ e->SetDetached();
420
+ e->TryExternalRef();
421
+ detached_usage_ += h->total_charge;
422
+ return e;
423
+ }
424
+
413
425
  size_t ClockCacheShard::CalcEstimatedHandleCharge(
414
426
  size_t estimated_value_size,
415
427
  CacheMetadataChargePolicy metadata_charge_policy) {
@@ -430,12 +442,16 @@ int ClockCacheShard::CalcHashBits(
430
442
  return FloorLog2((num_entries << 1) - 1);
431
443
  }
432
444
 
433
- void ClockCacheShard::SetCapacity(size_t /*capacity*/) {
434
- assert(false); // Not supported.
445
+ void ClockCacheShard::SetCapacity(size_t capacity) {
446
+ if (capacity > table_.GetCapacity()) {
447
+ assert(false); // Not supported.
448
+ }
449
+ table_.SetCapacity(capacity);
450
+ table_.ClockRun(detached_usage_);
435
451
  }
436
452
 
437
- void ClockCacheShard::SetStrictCapacityLimit(bool /*strict_capacity_limit*/) {
438
- assert(false); // Not supported.
453
+ void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
454
+ strict_capacity_limit_ = strict_capacity_limit;
439
455
  }
440
456
 
441
457
  Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
@@ -459,27 +475,32 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
459
475
 
460
476
  Status s = Status::OK();
461
477
 
478
+ // Use a local copy to minimize cache synchronization.
479
+ size_t detached_usage = detached_usage_;
480
+
462
481
  // Free space with the clock policy until enough space is freed or there are
463
482
  // no evictable elements.
464
- table_.ClockRun(tmp.total_charge);
483
+ table_.ClockRun(tmp.total_charge + detached_usage);
465
484
 
466
- // occupancy_ and usage_ are contended members across concurrent updates
467
- // on the same shard, so we use a single copy to reduce cache synchronization.
485
+ // Use local copies to minimize cache synchronization
486
+ // (occupancy_ and usage_ are read and written by all insertions).
468
487
  uint32_t occupancy_local = table_.GetOccupancy();
469
- size_t usage_local = table_.GetUsage();
470
- assert(occupancy_local <= table_.GetOccupancyLimit());
471
-
472
- autovector<ClockHandle> deleted;
473
-
474
- if ((usage_local + tmp.total_charge > table_.GetCapacity() &&
475
- (strict_capacity_limit_ || handle == nullptr)) ||
476
- occupancy_local > table_.GetOccupancyLimit()) {
488
+ size_t total_usage = table_.GetUsage() + detached_usage;
489
+
490
+ // TODO: Currently we support strict_capacity_limit == false as long as the
491
+ // number of pinned elements is below table_.GetOccupancyLimit(). We can
492
+ // always support it as follows: whenever we exceed this limit, we dynamically
493
+ // allocate a handle and return it (when the user provides a handle pointer,
494
+ // of course). Then, Release checks whether the handle was dynamically
495
+ // allocated, or is stored in the table.
496
+ if (total_usage + tmp.total_charge > table_.GetCapacity() &&
497
+ (strict_capacity_limit_ || handle == nullptr)) {
477
498
  if (handle == nullptr) {
478
499
  // Don't insert the entry but still return ok, as if the entry inserted
479
500
  // into cache and get evicted immediately.
480
- deleted.push_back(tmp);
501
+ tmp.FreeData();
481
502
  } else {
482
- if (occupancy_local > table_.GetOccupancyLimit()) {
503
+ if (occupancy_local + 1 > table_.GetOccupancyLimit()) {
483
504
  // TODO: Consider using a distinct status for this case, but usually
484
505
  // it will be handled the same way as reaching charge capacity limit
485
506
  s = Status::MemoryLimit(
@@ -491,22 +512,34 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
491
512
  }
492
513
  }
493
514
  } else {
494
- // Insert into the cache. Note that the cache might get larger than its
495
- // capacity if not enough space was freed up.
496
- ClockHandle* h = table_.Insert(&tmp, &deleted, handle != nullptr);
497
- assert(h != nullptr); // The occupancy is way below the table size, so this
498
- // insertion should never fail.
515
+ ClockHandle* h = nullptr;
516
+ if (handle != nullptr && occupancy_local + 1 > table_.GetOccupancyLimit()) {
517
+ // Even if the user wishes to overload the cache, we can't insert into
518
+ // the hash table. Instead, we dynamically allocate a new handle.
519
+ h = DetachedInsert(&tmp);
520
+ // TODO: Return special status?
521
+ } else {
522
+ // Insert into the cache. Note that the cache might get larger than its
523
+ // capacity if not enough space was freed up.
524
+ autovector<ClockHandle> deleted;
525
+ h = table_.Insert(&tmp, &deleted, handle != nullptr);
526
+ if (h == nullptr && handle != nullptr) {
527
+ // The table is full. This can happen when many threads simultaneously
528
+ // attempt an insert, and the table is operating close to full capacity.
529
+ h = DetachedInsert(&tmp);
530
+ }
531
+ // Notice that if handle == nullptr, we don't insert the entry but still
532
+ // return ok.
533
+ if (deleted.size() > 0) {
534
+ s = Status::OkOverwritten();
535
+ }
536
+ table_.Free(&deleted);
537
+ }
499
538
  if (handle != nullptr) {
500
539
  *handle = reinterpret_cast<Cache::Handle*>(h);
501
540
  }
502
-
503
- if (deleted.size() > 0) {
504
- s = Status::OkOverwritten();
505
- }
506
541
  }
507
542
 
508
- table_.Free(&deleted);
509
-
510
543
  return s;
511
544
  }
512
545
 
@@ -516,7 +549,7 @@ Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
516
549
 
517
550
  bool ClockCacheShard::Ref(Cache::Handle* h) {
518
551
  ClockHandle* e = reinterpret_cast<ClockHandle*>(h);
519
- assert(e->HasExternalRefs());
552
+ assert(e->ExternalRefs() > 0);
520
553
  return e->TryExternalRef();
521
554
  }
522
555
 
@@ -530,6 +563,20 @@ bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
530
563
  }
531
564
 
532
565
  ClockHandle* h = reinterpret_cast<ClockHandle*>(handle);
566
+
567
+ if (UNLIKELY(h->IsDetached())) {
568
+ h->ReleaseExternalRef();
569
+ if (h->TryExclusiveRef()) {
570
+ // Only the last reference will succeed.
571
+ // Don't bother releasing the exclusive ref.
572
+ h->FreeData();
573
+ detached_usage_ -= h->total_charge;
574
+ delete h;
575
+ return true;
576
+ }
577
+ return false;
578
+ }
579
+
533
580
  uint32_t refs = h->refs;
534
581
  bool last_reference = ((refs & ClockHandle::EXTERNAL_REFS) == 1);
535
582
  bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED;
@@ -569,14 +616,15 @@ size_t ClockCacheShard::GetPinnedUsage() const {
569
616
  size_t clock_usage = 0;
570
617
 
571
618
  table_.ConstApplyToEntriesRange(
572
- [&clock_usage](ClockHandle* h) {
573
- if (h->HasExternalRefs()) {
619
+ [&clock_usage](const ClockHandle* h) {
620
+ if (h->ExternalRefs() > 1) {
621
+ // We check > 1 because we are holding an external ref.
574
622
  clock_usage += h->total_charge;
575
623
  }
576
624
  },
577
625
  0, table_.GetTableSize(), true);
578
626
 
579
- return clock_usage;
627
+ return clock_usage + detached_usage_;
580
628
  }
581
629
 
582
630
  ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
@@ -649,8 +697,10 @@ void ClockCache::DisownData() {
649
697
  std::shared_ptr<Cache> NewClockCache(
650
698
  size_t capacity, int num_shard_bits, bool strict_capacity_limit,
651
699
  CacheMetadataChargePolicy metadata_charge_policy) {
652
- return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit, 0.5,
653
- nullptr, kDefaultToAdaptiveMutex, metadata_charge_policy);
700
+ return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
701
+ /* high_pri_pool_ratio */ 0.5, nullptr,
702
+ kDefaultToAdaptiveMutex, metadata_charge_policy,
703
+ /* low_pri_pool_ratio */ 0.0);
654
704
  }
655
705
 
656
706
  std::shared_ptr<Cache> ExperimentalNewClockCache(
@@ -9,6 +9,8 @@
9
9
 
10
10
  #pragma once
11
11
 
12
+ #include <sys/types.h>
13
+
12
14
  #include <array>
13
15
  #include <atomic>
14
16
  #include <cstdint>
@@ -28,6 +30,9 @@ namespace ROCKSDB_NAMESPACE {
28
30
 
29
31
  namespace clock_cache {
30
32
 
33
+ // Forward declaration of friend class.
34
+ class ClockCacheTest;
35
+
31
36
  // An experimental alternative to LRUCache, using a lock-free, open-addressed
32
37
  // hash table and clock eviction.
33
38
 
@@ -63,10 +68,10 @@ namespace clock_cache {
63
68
  // can't be immediately deleted. In these cases, the flag will be later read
64
69
  // and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is
65
70
  // used not only to defer deletions, but also as a barrier for external
66
- // references: once WILL_BE_DELETED is set, lookups (which are the means to
67
- // acquire new external references) will ignore the handle. For this reason,
68
- // when WILL_BE_DELETED is set, we say the handle is invisible (and
69
- // otherwise, that it's visible).
71
+ // references: once WILL_BE_DELETED is set, lookups (which are the most
72
+ // common way to acquire new external references) will ignore the handle.
73
+ // For this reason, when WILL_BE_DELETED is set, we say the handle is
74
+ // invisible (and, otherwise, that it's visible).
70
75
  //
71
76
  //
72
77
  // 3. HASHING AND COLLISION RESOLUTION
@@ -192,10 +197,10 @@ struct ClockHandle {
192
197
  size_t total_charge;
193
198
  std::array<char, kCacheKeySize> key_data;
194
199
 
195
- static constexpr uint8_t kIsElementOffset = 1;
196
- static constexpr uint8_t kClockPriorityOffset = 2;
197
- static constexpr uint8_t kIsHitOffset = 4;
198
- static constexpr uint8_t kCachePriorityOffset = 5;
200
+ static constexpr uint8_t kIsElementOffset = 0;
201
+ static constexpr uint8_t kClockPriorityOffset = 1;
202
+ static constexpr uint8_t kIsHitOffset = 3;
203
+ static constexpr uint8_t kCachePriorityOffset = 4;
199
204
 
200
205
  enum Flags : uint8_t {
201
206
  // Whether the slot is in use by an element.
@@ -252,9 +257,8 @@ struct ClockHandle {
252
257
  // Whether a thread has an exclusive reference to the slot.
253
258
  EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
254
259
  // Whether the handle will be deleted soon. When this bit is set, new
255
- // internal
256
- // or external references to this handle stop being accepted.
257
- // There is an exception: external references can be created from
260
+ // internal references to this handle stop being accepted.
261
+ // External references may still be granted---they can be created from
258
262
  // existing external references, or converting from existing internal
259
263
  // references.
260
264
  WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
@@ -274,6 +278,9 @@ struct ClockHandle {
274
278
 
275
279
  std::atomic<uint32_t> refs;
276
280
 
281
+ // True iff the handle is allocated separately from hash table.
282
+ bool detached;
283
+
277
284
  ClockHandle()
278
285
  : value(nullptr),
279
286
  deleter(nullptr),
@@ -281,7 +288,8 @@ struct ClockHandle {
281
288
  total_charge(0),
282
289
  flags(0),
283
290
  displacements(0),
284
- refs(0) {
291
+ refs(0),
292
+ detached(false) {
285
293
  SetWillBeDeleted(false);
286
294
  SetIsElement(false);
287
295
  SetClockPriority(ClockPriority::NONE);
@@ -300,6 +308,7 @@ struct ClockHandle {
300
308
  value = other.value;
301
309
  deleter = other.deleter;
302
310
  key_data = other.key_data;
311
+ hash = other.hash;
303
312
  total_charge = other.total_charge;
304
313
  }
305
314
 
@@ -350,13 +359,13 @@ struct ClockHandle {
350
359
 
351
360
  // flags functions.
352
361
 
353
- bool IsElement() const { return flags & IS_ELEMENT; }
362
+ bool IsElement() const { return flags & Flags::IS_ELEMENT; }
354
363
 
355
364
  void SetIsElement(bool is_element) {
356
365
  if (is_element) {
357
- flags |= IS_ELEMENT;
366
+ flags |= Flags::IS_ELEMENT;
358
367
  } else {
359
- flags &= static_cast<uint8_t>(~IS_ELEMENT);
368
+ flags &= static_cast<uint8_t>(~Flags::IS_ELEMENT);
360
369
  }
361
370
  }
362
371
 
@@ -400,6 +409,10 @@ struct ClockHandle {
400
409
  flags |= new_priority;
401
410
  }
402
411
 
412
+ bool IsDetached() { return detached; }
413
+
414
+ void SetDetached() { detached = true; }
415
+
403
416
  inline bool IsEmpty() const {
404
417
  return !this->IsElement() && this->displacements == 0;
405
418
  }
@@ -424,7 +437,9 @@ struct ClockHandle {
424
437
  }
425
438
  }
426
439
 
427
- bool HasExternalRefs() const { return (refs & EXTERNAL_REFS) > 0; }
440
+ uint32_t ExternalRefs() const {
441
+ return (refs & EXTERNAL_REFS) >> kExternalRefsOffset;
442
+ }
428
443
 
429
444
  // Tries to take an internal ref. Returns true iff it succeeds.
430
445
  inline bool TryInternalRef() {
@@ -437,7 +452,7 @@ struct ClockHandle {
437
452
 
438
453
  // Tries to take an external ref. Returns true iff it succeeds.
439
454
  inline bool TryExternalRef() {
440
- if (!((refs += kOneExternalRef) & (EXCLUSIVE_REF | WILL_BE_DELETED))) {
455
+ if (!((refs += kOneExternalRef) & EXCLUSIVE_REF)) {
441
456
  return true;
442
457
  }
443
458
  refs -= kOneExternalRef;
@@ -529,8 +544,8 @@ class ClockHandleTable {
529
544
  // Makes h non-evictable.
530
545
  void ClockOff(ClockHandle* h);
531
546
 
532
- // Runs the clock eviction algorithm until there is enough space to
533
- // insert an element with the given charge.
547
+ // Runs the clock eviction algorithm until usage_ + charge is at most
548
+ // capacity_.
534
549
  void ClockRun(size_t charge);
535
550
 
536
551
  // Remove h from the hash table. Requires an exclusive ref to h.
@@ -548,8 +563,6 @@ class ClockHandleTable {
548
563
  RemoveAll(key, hash, probe, deleted);
549
564
  }
550
565
 
551
- void Free(autovector<ClockHandle>* deleted);
552
-
553
566
  // Tries to remove h from the hash table. If the attempt is successful,
554
567
  // the function hands over an exclusive ref to h.
555
568
  bool TryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
@@ -558,8 +571,13 @@ class ClockHandleTable {
558
571
  // success. Requires that the caller thread has no shared ref to h.
559
572
  bool SpinTryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
560
573
 
561
- template <typename T>
562
- void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end,
574
+ // Call this function after an Insert, Remove, RemoveAll, TryRemove
575
+ // or SpinTryRemove. It frees the deleted values and updates the hash table
576
+ // metadata.
577
+ void Free(autovector<ClockHandle>* deleted);
578
+
579
+ void ApplyToEntriesRange(std::function<void(ClockHandle*)> func,
580
+ uint32_t index_begin, uint32_t index_end,
563
581
  bool apply_if_will_be_deleted) {
564
582
  for (uint32_t i = index_begin; i < index_end; i++) {
565
583
  ClockHandle* h = &array_[i];
@@ -573,18 +591,20 @@ class ClockHandleTable {
573
591
  }
574
592
  }
575
593
 
576
- template <typename T>
577
- void ConstApplyToEntriesRange(T func, uint32_t index_begin,
578
- uint32_t index_end,
594
+ void ConstApplyToEntriesRange(std::function<void(const ClockHandle*)> func,
595
+ uint32_t index_begin, uint32_t index_end,
579
596
  bool apply_if_will_be_deleted) const {
580
597
  for (uint32_t i = index_begin; i < index_end; i++) {
581
598
  ClockHandle* h = &array_[i];
582
- if (h->TryExclusiveRef()) {
599
+ // We take an external ref because we are handing over control
600
+ // to a user-defined function, and because the handle will not be
601
+ // modified.
602
+ if (h->TryExternalRef()) {
583
603
  if (h->IsElement() &&
584
604
  (apply_if_will_be_deleted || !h->WillBeDeleted())) {
585
605
  func(h);
586
606
  }
587
- h->ReleaseExclusiveRef();
607
+ h->ReleaseExternalRef();
588
608
  }
589
609
  }
590
610
  }
@@ -601,6 +621,8 @@ class ClockHandleTable {
601
621
 
602
622
  size_t GetCapacity() const { return capacity_; }
603
623
 
624
+ void SetCapacity(size_t capacity) { capacity_ = capacity; }
625
+
604
626
  // Returns x mod 2^{length_bits_}.
605
627
  uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
606
628
 
@@ -652,7 +674,7 @@ class ClockHandleTable {
652
674
  const uint32_t occupancy_limit_;
653
675
 
654
676
  // Maximum total charge of all elements stored in the table.
655
- const size_t capacity_;
677
+ size_t capacity_;
656
678
 
657
679
  // We partition the following members into different cache lines
658
680
  // to avoid false sharing among Lookup, Release, Erase and Insert
@@ -745,10 +767,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
745
767
 
746
768
  private:
747
769
  friend class ClockCache;
770
+ friend class ClockCacheTest;
748
771
 
749
- // Free some space following strict clock policy until enough space
750
- // to hold (usage_ + charge) is freed or there are no evictable elements.
751
- void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
772
+ ClockHandle* DetachedInsert(ClockHandle* h);
752
773
 
753
774
  // Returns the charge of a single handle.
754
775
  static size_t CalcEstimatedHandleCharge(
@@ -763,6 +784,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
763
784
  // Whether to reject insertion if cache reaches its full capacity.
764
785
  std::atomic<bool> strict_capacity_limit_;
765
786
 
787
+ // Handles allocated separately from the table.
788
+ std::atomic<size_t> detached_usage_;
789
+
766
790
  ClockHandleTable table_;
767
791
  }; // class ClockCacheShard
768
792
 
@@ -797,6 +821,7 @@ class ClockCache
797
821
 
798
822
  private:
799
823
  ClockCacheShard* shards_ = nullptr;
824
+
800
825
  int num_shards_;
801
826
  }; // class ClockCache
802
827