@nxtedition/rocksdb 7.1.4 → 7.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +32 -14
- package/deps/rocksdb/iostats.patch +19 -0
- package/deps/rocksdb/rocksdb/CMakeLists.txt +15 -1
- package/deps/rocksdb/rocksdb/cache/cache.cc +4 -0
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +6 -8
- package/deps/rocksdb/rocksdb/cache/cache_key.cc +184 -164
- package/deps/rocksdb/rocksdb/cache/cache_key.h +38 -29
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager_test.cc +4 -4
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +93 -58
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +92 -42
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +57 -32
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +114 -37
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +34 -2
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +187 -38
- package/deps/rocksdb/rocksdb/cache/fast_lru_cache.cc +3 -1
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +88 -19
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +48 -8
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +481 -224
- package/deps/rocksdb/rocksdb/crash_test.mk +15 -1
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc +2 -2
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +3 -7
- package/deps/rocksdb/rocksdb/db/blob/blob_index.h +1 -1
- package/deps/rocksdb/rocksdb/db/blob/blob_log_format.cc +3 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_log_writer.cc +25 -19
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +4 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +2 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +12 -4
- package/deps/rocksdb/rocksdb/db/blob/db_blob_basic_test.cc +149 -0
- package/deps/rocksdb/rocksdb/db/blob/db_blob_compaction_test.cc +105 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +2 -15
- package/deps/rocksdb/rocksdb/db/column_family_test.cc +17 -4
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +8 -8
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +0 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +5 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +56 -53
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +33 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +45 -11
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job_test.cc +1 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +143 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +43 -18
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +48 -65
- package/deps/rocksdb/rocksdb/db/corruption_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +73 -4
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +239 -190
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +71 -2
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +144 -33
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +18 -35
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +11 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc +7 -7
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +15 -8
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +3 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +11 -0
- package/deps/rocksdb/rocksdb/db/db_iter.cc +69 -11
- package/deps/rocksdb/rocksdb/db/db_iter.h +16 -0
- package/deps/rocksdb/rocksdb/db/db_kv_checksum_test.cc +239 -23
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_test.cc +61 -28
- package/deps/rocksdb/rocksdb/db/db_test2.cc +24 -9
- package/deps/rocksdb/rocksdb/db/db_wal_test.cc +17 -0
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_compaction_test.cc +61 -0
- package/deps/rocksdb/rocksdb/db/db_write_test.cc +130 -0
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +2 -1
- package/deps/rocksdb/rocksdb/db/experimental.cc +7 -8
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +1 -2
- package/deps/rocksdb/rocksdb/db/flush_job.cc +11 -7
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +7 -1
- package/deps/rocksdb/rocksdb/db/forward_iterator.cc +4 -2
- package/deps/rocksdb/rocksdb/db/import_column_family_job.cc +7 -1
- package/deps/rocksdb/rocksdb/db/import_column_family_job.h +6 -0
- package/deps/rocksdb/rocksdb/db/import_column_family_test.cc +6 -0
- package/deps/rocksdb/rocksdb/db/kv_checksum.h +8 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +48 -11
- package/deps/rocksdb/rocksdb/db/log_reader.h +8 -2
- package/deps/rocksdb/rocksdb/db/log_test.cc +10 -1
- package/deps/rocksdb/rocksdb/db/log_writer.cc +7 -1
- package/deps/rocksdb/rocksdb/db/manual_compaction_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/memtable.cc +222 -47
- package/deps/rocksdb/rocksdb/db/memtable.h +70 -14
- package/deps/rocksdb/rocksdb/db/memtable_list.cc +14 -8
- package/deps/rocksdb/rocksdb/db/memtable_list_test.cc +30 -10
- package/deps/rocksdb/rocksdb/db/perf_context_test.cc +5 -5
- package/deps/rocksdb/rocksdb/db/pinned_iterators_manager.h +5 -0
- package/deps/rocksdb/rocksdb/db/repair.cc +2 -3
- package/deps/rocksdb/rocksdb/db/seqno_time_test.cc +3 -7
- package/deps/rocksdb/rocksdb/db/table_cache.cc +72 -0
- package/deps/rocksdb/rocksdb/db/table_cache.h +19 -1
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +10 -15
- package/deps/rocksdb/rocksdb/db/table_properties_collector_test.cc +2 -2
- package/deps/rocksdb/rocksdb/db/version_builder_test.cc +35 -64
- package/deps/rocksdb/rocksdb/db/version_edit.cc +3 -32
- package/deps/rocksdb/rocksdb/db/version_edit.h +2 -12
- package/deps/rocksdb/rocksdb/db/version_edit_test.cc +10 -23
- package/deps/rocksdb/rocksdb/db/version_set.cc +71 -28
- package/deps/rocksdb/rocksdb/db/version_set.h +3 -3
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +7 -7
- package/deps/rocksdb/rocksdb/db/version_set_test.cc +17 -15
- package/deps/rocksdb/rocksdb/db/wal_manager.cc +0 -4
- package/deps/rocksdb/rocksdb/db/wal_manager_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/wide/db_wide_basic_test.cc +137 -42
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.cc +21 -0
- package/deps/rocksdb/rocksdb/db/wide/wide_column_serialization.h +1 -0
- package/deps/rocksdb/rocksdb/db/write_batch_test.cc +2 -1
- package/deps/rocksdb/rocksdb/db/write_callback_test.cc +4 -4
- package/deps/rocksdb/rocksdb/db/write_thread.cc +51 -46
- package/deps/rocksdb/rocksdb/db/write_thread.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +12 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +8 -0
- package/deps/rocksdb/rocksdb/env/env_posix.cc +1 -1
- package/deps/rocksdb/rocksdb/env/env_test.cc +38 -8
- package/deps/rocksdb/rocksdb/env/file_system.cc +20 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +2 -46
- package/deps/rocksdb/rocksdb/env/io_posix.cc +1 -0
- package/deps/rocksdb/rocksdb/file/writable_file_writer.cc +110 -5
- package/deps/rocksdb/rocksdb/file/writable_file_writer.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +29 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +31 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +10 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +3 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/wide_columns.h +2 -0
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger.cc +12 -0
- package/deps/rocksdb/rocksdb/logging/auto_roll_logger_test.cc +9 -13
- package/deps/rocksdb/rocksdb/logging/env_logger.h +39 -13
- package/deps/rocksdb/rocksdb/memory/memory_allocator_test.cc +1 -1
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +1 -1
- package/deps/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc +1 -1
- package/deps/rocksdb/rocksdb/microbench/db_basic_bench.cc +6 -0
- package/deps/rocksdb/rocksdb/monitoring/iostats_context_imp.h +4 -1
- package/deps/rocksdb/rocksdb/options/cf_options.cc +10 -3
- package/deps/rocksdb/rocksdb/options/cf_options.h +10 -5
- package/deps/rocksdb/rocksdb/options/options_helper.cc +4 -1
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +3 -1
- package/deps/rocksdb/rocksdb/options/options_test.cc +4 -2
- package/deps/rocksdb/rocksdb/port/util_logger.h +1 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +2 -6
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +1 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +52 -12
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +5 -7
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +9 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +28 -10
- package/deps/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +5 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +1 -0
- package/deps/rocksdb/rocksdb/table/get_context.cc +16 -6
- package/deps/rocksdb/rocksdb/table/table_reader.h +9 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +2 -1
- package/deps/rocksdb/rocksdb/table/unique_id.cc +22 -24
- package/deps/rocksdb/rocksdb/table/unique_id_impl.h +2 -1
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py +7 -0
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +41 -4
- package/deps/rocksdb/rocksdb/tools/db_sanity_test.cc +5 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +7 -8
- package/deps/rocksdb/rocksdb/tools/ldb_cmd_test.cc +6 -6
- package/deps/rocksdb/rocksdb/tools/reduce_levels_test.cc +1 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.h +3 -3
- package/deps/rocksdb/rocksdb/util/coro_utils.h +2 -1
- package/deps/rocksdb/rocksdb/util/file_reader_writer_test.cc +2 -0
- package/deps/rocksdb/rocksdb/util/hash_test.cc +67 -0
- package/deps/rocksdb/rocksdb/util/math.h +41 -0
- package/deps/rocksdb/rocksdb/util/math128.h +6 -0
- package/deps/rocksdb/rocksdb/util/single_thread_executor.h +2 -1
- package/deps/rocksdb/rocksdb/util/stderr_logger.h +13 -0
- package/deps/rocksdb/rocksdb/utilities/backup/backup_engine_test.cc +55 -46
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +3 -6
- package/deps/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/counted_fs.cc +10 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +5 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_lock_manager.h +6 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/ttl/ttl_test.cc +2 -2
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc +2 -2
- package/index.js +17 -8
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/prebuilds/darwin-x64/node.napi.node +0 -0
- package/prebuilds/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/logging/posix_logger.h +0 -179
|
@@ -69,10 +69,10 @@ ClockHandle* ClockHandleTable::Lookup(const Slice& key, uint32_t hash) {
|
|
|
69
69
|
// updates where it would be possible to combine into one CAS (more metadata
|
|
70
70
|
// under one atomic field) or maybe two atomic updates (one arithmetic, one
|
|
71
71
|
// bitwise). Something to think about optimizing.
|
|
72
|
-
e->InternalToExternalRef();
|
|
73
72
|
e->SetHit();
|
|
74
73
|
// The handle is now referenced, so we take it out of clock.
|
|
75
74
|
ClockOff(e);
|
|
75
|
+
e->InternalToExternalRef();
|
|
76
76
|
}
|
|
77
77
|
|
|
78
78
|
return e;
|
|
@@ -312,17 +312,20 @@ void ClockHandleTable::ClockRun(size_t charge) {
|
|
|
312
312
|
// hot element, it will be hard to get an exclusive ref.
|
|
313
313
|
// Do we need a mechanism to prevent an element from sitting
|
|
314
314
|
// for a long time in cache waiting to be evicted?
|
|
315
|
-
assert(charge <= capacity_);
|
|
316
315
|
autovector<ClockHandle> deleted;
|
|
317
316
|
uint32_t max_iterations =
|
|
318
|
-
|
|
317
|
+
ClockHandle::ClockPriority::HIGH *
|
|
318
|
+
(1 +
|
|
319
|
+
static_cast<uint32_t>(
|
|
320
|
+
GetTableSize() *
|
|
321
|
+
kLoadFactor)); // It may take up to HIGH passes to evict an element.
|
|
319
322
|
size_t usage_local = usage_;
|
|
320
|
-
|
|
323
|
+
size_t capacity_local = capacity_;
|
|
324
|
+
while (usage_local + charge > capacity_local && max_iterations--) {
|
|
321
325
|
uint32_t steps = 1 + static_cast<uint32_t>(1 / kLoadFactor);
|
|
322
326
|
uint32_t clock_pointer_local = (clock_pointer_ += steps) - steps;
|
|
323
327
|
for (uint32_t i = 0; i < steps; i++) {
|
|
324
328
|
ClockHandle* h = &array_[ModTableSize(clock_pointer_local + i)];
|
|
325
|
-
|
|
326
329
|
if (h->TryExclusiveRef()) {
|
|
327
330
|
if (h->WillBeDeleted()) {
|
|
328
331
|
Remove(h, &deleted);
|
|
@@ -335,7 +338,6 @@ void ClockHandleTable::ClockRun(size_t charge) {
|
|
|
335
338
|
// exclusive ref, we know we are in the latter case. This can only
|
|
336
339
|
// happen when the last external reference to an element was
|
|
337
340
|
// released, and the element was not immediately removed.
|
|
338
|
-
|
|
339
341
|
ClockOn(h);
|
|
340
342
|
}
|
|
341
343
|
ClockHandle::ClockPriority priority = h->GetClockPriority();
|
|
@@ -358,6 +360,7 @@ ClockCacheShard::ClockCacheShard(
|
|
|
358
360
|
size_t capacity, size_t estimated_value_size, bool strict_capacity_limit,
|
|
359
361
|
CacheMetadataChargePolicy metadata_charge_policy)
|
|
360
362
|
: strict_capacity_limit_(strict_capacity_limit),
|
|
363
|
+
detached_usage_(0),
|
|
361
364
|
table_(capacity, CalcHashBits(capacity, estimated_value_size,
|
|
362
365
|
metadata_charge_policy)) {
|
|
363
366
|
set_metadata_charge_policy(metadata_charge_policy);
|
|
@@ -401,15 +404,24 @@ void ClockCacheShard::ApplyToSomeEntries(
|
|
|
401
404
|
*state = index_end << (32 - length_bits);
|
|
402
405
|
}
|
|
403
406
|
|
|
404
|
-
table_.
|
|
407
|
+
table_.ConstApplyToEntriesRange(
|
|
405
408
|
[callback,
|
|
406
|
-
metadata_charge_policy = metadata_charge_policy_](ClockHandle* h) {
|
|
409
|
+
metadata_charge_policy = metadata_charge_policy_](const ClockHandle* h) {
|
|
407
410
|
callback(h->key(), h->value, h->GetCharge(metadata_charge_policy),
|
|
408
411
|
h->deleter);
|
|
409
412
|
},
|
|
410
413
|
index_begin, index_end, false);
|
|
411
414
|
}
|
|
412
415
|
|
|
416
|
+
ClockHandle* ClockCacheShard::DetachedInsert(ClockHandle* h) {
|
|
417
|
+
ClockHandle* e = new ClockHandle();
|
|
418
|
+
*e = *h;
|
|
419
|
+
e->SetDetached();
|
|
420
|
+
e->TryExternalRef();
|
|
421
|
+
detached_usage_ += h->total_charge;
|
|
422
|
+
return e;
|
|
423
|
+
}
|
|
424
|
+
|
|
413
425
|
size_t ClockCacheShard::CalcEstimatedHandleCharge(
|
|
414
426
|
size_t estimated_value_size,
|
|
415
427
|
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
@@ -430,12 +442,16 @@ int ClockCacheShard::CalcHashBits(
|
|
|
430
442
|
return FloorLog2((num_entries << 1) - 1);
|
|
431
443
|
}
|
|
432
444
|
|
|
433
|
-
void ClockCacheShard::SetCapacity(size_t
|
|
434
|
-
|
|
445
|
+
void ClockCacheShard::SetCapacity(size_t capacity) {
|
|
446
|
+
if (capacity > table_.GetCapacity()) {
|
|
447
|
+
assert(false); // Not supported.
|
|
448
|
+
}
|
|
449
|
+
table_.SetCapacity(capacity);
|
|
450
|
+
table_.ClockRun(detached_usage_);
|
|
435
451
|
}
|
|
436
452
|
|
|
437
|
-
void ClockCacheShard::SetStrictCapacityLimit(bool
|
|
438
|
-
|
|
453
|
+
void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
|
|
454
|
+
strict_capacity_limit_ = strict_capacity_limit;
|
|
439
455
|
}
|
|
440
456
|
|
|
441
457
|
Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
|
|
@@ -459,27 +475,32 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
|
|
|
459
475
|
|
|
460
476
|
Status s = Status::OK();
|
|
461
477
|
|
|
478
|
+
// Use a local copy to minimize cache synchronization.
|
|
479
|
+
size_t detached_usage = detached_usage_;
|
|
480
|
+
|
|
462
481
|
// Free space with the clock policy until enough space is freed or there are
|
|
463
482
|
// no evictable elements.
|
|
464
|
-
table_.ClockRun(tmp.total_charge);
|
|
483
|
+
table_.ClockRun(tmp.total_charge + detached_usage);
|
|
465
484
|
|
|
466
|
-
//
|
|
467
|
-
//
|
|
485
|
+
// Use local copies to minimize cache synchronization
|
|
486
|
+
// (occupancy_ and usage_ are read and written by all insertions).
|
|
468
487
|
uint32_t occupancy_local = table_.GetOccupancy();
|
|
469
|
-
size_t
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
488
|
+
size_t total_usage = table_.GetUsage() + detached_usage;
|
|
489
|
+
|
|
490
|
+
// TODO: Currently we support strict_capacity_limit == false as long as the
|
|
491
|
+
// number of pinned elements is below table_.GetOccupancyLimit(). We can
|
|
492
|
+
// always support it as follows: whenever we exceed this limit, we dynamically
|
|
493
|
+
// allocate a handle and return it (when the user provides a handle pointer,
|
|
494
|
+
// of course). Then, Release checks whether the handle was dynamically
|
|
495
|
+
// allocated, or is stored in the table.
|
|
496
|
+
if (total_usage + tmp.total_charge > table_.GetCapacity() &&
|
|
497
|
+
(strict_capacity_limit_ || handle == nullptr)) {
|
|
477
498
|
if (handle == nullptr) {
|
|
478
499
|
// Don't insert the entry but still return ok, as if the entry inserted
|
|
479
500
|
// into cache and get evicted immediately.
|
|
480
|
-
|
|
501
|
+
tmp.FreeData();
|
|
481
502
|
} else {
|
|
482
|
-
if (occupancy_local > table_.GetOccupancyLimit()) {
|
|
503
|
+
if (occupancy_local + 1 > table_.GetOccupancyLimit()) {
|
|
483
504
|
// TODO: Consider using a distinct status for this case, but usually
|
|
484
505
|
// it will be handled the same way as reaching charge capacity limit
|
|
485
506
|
s = Status::MemoryLimit(
|
|
@@ -491,22 +512,34 @@ Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
|
|
|
491
512
|
}
|
|
492
513
|
}
|
|
493
514
|
} else {
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
515
|
+
ClockHandle* h = nullptr;
|
|
516
|
+
if (handle != nullptr && occupancy_local + 1 > table_.GetOccupancyLimit()) {
|
|
517
|
+
// Even if the user wishes to overload the cache, we can't insert into
|
|
518
|
+
// the hash table. Instead, we dynamically allocate a new handle.
|
|
519
|
+
h = DetachedInsert(&tmp);
|
|
520
|
+
// TODO: Return special status?
|
|
521
|
+
} else {
|
|
522
|
+
// Insert into the cache. Note that the cache might get larger than its
|
|
523
|
+
// capacity if not enough space was freed up.
|
|
524
|
+
autovector<ClockHandle> deleted;
|
|
525
|
+
h = table_.Insert(&tmp, &deleted, handle != nullptr);
|
|
526
|
+
if (h == nullptr && handle != nullptr) {
|
|
527
|
+
// The table is full. This can happen when many threads simultaneously
|
|
528
|
+
// attempt an insert, and the table is operating close to full capacity.
|
|
529
|
+
h = DetachedInsert(&tmp);
|
|
530
|
+
}
|
|
531
|
+
// Notice that if handle == nullptr, we don't insert the entry but still
|
|
532
|
+
// return ok.
|
|
533
|
+
if (deleted.size() > 0) {
|
|
534
|
+
s = Status::OkOverwritten();
|
|
535
|
+
}
|
|
536
|
+
table_.Free(&deleted);
|
|
537
|
+
}
|
|
499
538
|
if (handle != nullptr) {
|
|
500
539
|
*handle = reinterpret_cast<Cache::Handle*>(h);
|
|
501
540
|
}
|
|
502
|
-
|
|
503
|
-
if (deleted.size() > 0) {
|
|
504
|
-
s = Status::OkOverwritten();
|
|
505
|
-
}
|
|
506
541
|
}
|
|
507
542
|
|
|
508
|
-
table_.Free(&deleted);
|
|
509
|
-
|
|
510
543
|
return s;
|
|
511
544
|
}
|
|
512
545
|
|
|
@@ -516,7 +549,7 @@ Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
|
|
|
516
549
|
|
|
517
550
|
bool ClockCacheShard::Ref(Cache::Handle* h) {
|
|
518
551
|
ClockHandle* e = reinterpret_cast<ClockHandle*>(h);
|
|
519
|
-
assert(e->
|
|
552
|
+
assert(e->ExternalRefs() > 0);
|
|
520
553
|
return e->TryExternalRef();
|
|
521
554
|
}
|
|
522
555
|
|
|
@@ -530,6 +563,20 @@ bool ClockCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
|
|
|
530
563
|
}
|
|
531
564
|
|
|
532
565
|
ClockHandle* h = reinterpret_cast<ClockHandle*>(handle);
|
|
566
|
+
|
|
567
|
+
if (UNLIKELY(h->IsDetached())) {
|
|
568
|
+
h->ReleaseExternalRef();
|
|
569
|
+
if (h->TryExclusiveRef()) {
|
|
570
|
+
// Only the last reference will succeed.
|
|
571
|
+
// Don't bother releasing the exclusive ref.
|
|
572
|
+
h->FreeData();
|
|
573
|
+
detached_usage_ -= h->total_charge;
|
|
574
|
+
delete h;
|
|
575
|
+
return true;
|
|
576
|
+
}
|
|
577
|
+
return false;
|
|
578
|
+
}
|
|
579
|
+
|
|
533
580
|
uint32_t refs = h->refs;
|
|
534
581
|
bool last_reference = ((refs & ClockHandle::EXTERNAL_REFS) == 1);
|
|
535
582
|
bool will_be_deleted = refs & ClockHandle::WILL_BE_DELETED;
|
|
@@ -569,14 +616,15 @@ size_t ClockCacheShard::GetPinnedUsage() const {
|
|
|
569
616
|
size_t clock_usage = 0;
|
|
570
617
|
|
|
571
618
|
table_.ConstApplyToEntriesRange(
|
|
572
|
-
[&clock_usage](ClockHandle* h) {
|
|
573
|
-
if (h->
|
|
619
|
+
[&clock_usage](const ClockHandle* h) {
|
|
620
|
+
if (h->ExternalRefs() > 1) {
|
|
621
|
+
// We check > 1 because we are holding an external ref.
|
|
574
622
|
clock_usage += h->total_charge;
|
|
575
623
|
}
|
|
576
624
|
},
|
|
577
625
|
0, table_.GetTableSize(), true);
|
|
578
626
|
|
|
579
|
-
return clock_usage;
|
|
627
|
+
return clock_usage + detached_usage_;
|
|
580
628
|
}
|
|
581
629
|
|
|
582
630
|
ClockCache::ClockCache(size_t capacity, size_t estimated_value_size,
|
|
@@ -649,8 +697,10 @@ void ClockCache::DisownData() {
|
|
|
649
697
|
std::shared_ptr<Cache> NewClockCache(
|
|
650
698
|
size_t capacity, int num_shard_bits, bool strict_capacity_limit,
|
|
651
699
|
CacheMetadataChargePolicy metadata_charge_policy) {
|
|
652
|
-
return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
|
|
653
|
-
|
|
700
|
+
return NewLRUCache(capacity, num_shard_bits, strict_capacity_limit,
|
|
701
|
+
/* high_pri_pool_ratio */ 0.5, nullptr,
|
|
702
|
+
kDefaultToAdaptiveMutex, metadata_charge_policy,
|
|
703
|
+
/* low_pri_pool_ratio */ 0.0);
|
|
654
704
|
}
|
|
655
705
|
|
|
656
706
|
std::shared_ptr<Cache> ExperimentalNewClockCache(
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
|
|
10
10
|
#pragma once
|
|
11
11
|
|
|
12
|
+
#include <sys/types.h>
|
|
13
|
+
|
|
12
14
|
#include <array>
|
|
13
15
|
#include <atomic>
|
|
14
16
|
#include <cstdint>
|
|
@@ -28,6 +30,9 @@ namespace ROCKSDB_NAMESPACE {
|
|
|
28
30
|
|
|
29
31
|
namespace clock_cache {
|
|
30
32
|
|
|
33
|
+
// Forward declaration of friend class.
|
|
34
|
+
class ClockCacheTest;
|
|
35
|
+
|
|
31
36
|
// An experimental alternative to LRUCache, using a lock-free, open-addressed
|
|
32
37
|
// hash table and clock eviction.
|
|
33
38
|
|
|
@@ -63,10 +68,10 @@ namespace clock_cache {
|
|
|
63
68
|
// can't be immediately deleted. In these cases, the flag will be later read
|
|
64
69
|
// and acted upon by the eviction algorithm. Importantly, WILL_BE_DELETED is
|
|
65
70
|
// used not only to defer deletions, but also as a barrier for external
|
|
66
|
-
// references: once WILL_BE_DELETED is set, lookups (which are the
|
|
67
|
-
// acquire new external references) will ignore the handle.
|
|
68
|
-
// when WILL_BE_DELETED is set, we say the handle is
|
|
69
|
-
// otherwise, that it's visible).
|
|
71
|
+
// references: once WILL_BE_DELETED is set, lookups (which are the most
|
|
72
|
+
// common way to acquire new external references) will ignore the handle.
|
|
73
|
+
// For this reason, when WILL_BE_DELETED is set, we say the handle is
|
|
74
|
+
// invisible (and, otherwise, that it's visible).
|
|
70
75
|
//
|
|
71
76
|
//
|
|
72
77
|
// 3. HASHING AND COLLISION RESOLUTION
|
|
@@ -192,10 +197,10 @@ struct ClockHandle {
|
|
|
192
197
|
size_t total_charge;
|
|
193
198
|
std::array<char, kCacheKeySize> key_data;
|
|
194
199
|
|
|
195
|
-
static constexpr uint8_t kIsElementOffset =
|
|
196
|
-
static constexpr uint8_t kClockPriorityOffset =
|
|
197
|
-
static constexpr uint8_t kIsHitOffset =
|
|
198
|
-
static constexpr uint8_t kCachePriorityOffset =
|
|
200
|
+
static constexpr uint8_t kIsElementOffset = 0;
|
|
201
|
+
static constexpr uint8_t kClockPriorityOffset = 1;
|
|
202
|
+
static constexpr uint8_t kIsHitOffset = 3;
|
|
203
|
+
static constexpr uint8_t kCachePriorityOffset = 4;
|
|
199
204
|
|
|
200
205
|
enum Flags : uint8_t {
|
|
201
206
|
// Whether the slot is in use by an element.
|
|
@@ -252,9 +257,8 @@ struct ClockHandle {
|
|
|
252
257
|
// Whether a thread has an exclusive reference to the slot.
|
|
253
258
|
EXCLUSIVE_REF = uint32_t{1} << kExclusiveRefOffset, // Bit 30
|
|
254
259
|
// Whether the handle will be deleted soon. When this bit is set, new
|
|
255
|
-
// internal
|
|
256
|
-
//
|
|
257
|
-
// There is an exception: external references can be created from
|
|
260
|
+
// internal references to this handle stop being accepted.
|
|
261
|
+
// External references may still be granted---they can be created from
|
|
258
262
|
// existing external references, or converting from existing internal
|
|
259
263
|
// references.
|
|
260
264
|
WILL_BE_DELETED = uint32_t{1} << kWillBeDeletedOffset // Bit 31
|
|
@@ -274,6 +278,9 @@ struct ClockHandle {
|
|
|
274
278
|
|
|
275
279
|
std::atomic<uint32_t> refs;
|
|
276
280
|
|
|
281
|
+
// True iff the handle is allocated separately from hash table.
|
|
282
|
+
bool detached;
|
|
283
|
+
|
|
277
284
|
ClockHandle()
|
|
278
285
|
: value(nullptr),
|
|
279
286
|
deleter(nullptr),
|
|
@@ -281,7 +288,8 @@ struct ClockHandle {
|
|
|
281
288
|
total_charge(0),
|
|
282
289
|
flags(0),
|
|
283
290
|
displacements(0),
|
|
284
|
-
refs(0)
|
|
291
|
+
refs(0),
|
|
292
|
+
detached(false) {
|
|
285
293
|
SetWillBeDeleted(false);
|
|
286
294
|
SetIsElement(false);
|
|
287
295
|
SetClockPriority(ClockPriority::NONE);
|
|
@@ -300,6 +308,7 @@ struct ClockHandle {
|
|
|
300
308
|
value = other.value;
|
|
301
309
|
deleter = other.deleter;
|
|
302
310
|
key_data = other.key_data;
|
|
311
|
+
hash = other.hash;
|
|
303
312
|
total_charge = other.total_charge;
|
|
304
313
|
}
|
|
305
314
|
|
|
@@ -350,13 +359,13 @@ struct ClockHandle {
|
|
|
350
359
|
|
|
351
360
|
// flags functions.
|
|
352
361
|
|
|
353
|
-
bool IsElement() const { return flags & IS_ELEMENT; }
|
|
362
|
+
bool IsElement() const { return flags & Flags::IS_ELEMENT; }
|
|
354
363
|
|
|
355
364
|
void SetIsElement(bool is_element) {
|
|
356
365
|
if (is_element) {
|
|
357
|
-
flags |= IS_ELEMENT;
|
|
366
|
+
flags |= Flags::IS_ELEMENT;
|
|
358
367
|
} else {
|
|
359
|
-
flags &= static_cast<uint8_t>(~IS_ELEMENT);
|
|
368
|
+
flags &= static_cast<uint8_t>(~Flags::IS_ELEMENT);
|
|
360
369
|
}
|
|
361
370
|
}
|
|
362
371
|
|
|
@@ -400,6 +409,10 @@ struct ClockHandle {
|
|
|
400
409
|
flags |= new_priority;
|
|
401
410
|
}
|
|
402
411
|
|
|
412
|
+
bool IsDetached() { return detached; }
|
|
413
|
+
|
|
414
|
+
void SetDetached() { detached = true; }
|
|
415
|
+
|
|
403
416
|
inline bool IsEmpty() const {
|
|
404
417
|
return !this->IsElement() && this->displacements == 0;
|
|
405
418
|
}
|
|
@@ -424,7 +437,9 @@ struct ClockHandle {
|
|
|
424
437
|
}
|
|
425
438
|
}
|
|
426
439
|
|
|
427
|
-
|
|
440
|
+
uint32_t ExternalRefs() const {
|
|
441
|
+
return (refs & EXTERNAL_REFS) >> kExternalRefsOffset;
|
|
442
|
+
}
|
|
428
443
|
|
|
429
444
|
// Tries to take an internal ref. Returns true iff it succeeds.
|
|
430
445
|
inline bool TryInternalRef() {
|
|
@@ -437,7 +452,7 @@ struct ClockHandle {
|
|
|
437
452
|
|
|
438
453
|
// Tries to take an external ref. Returns true iff it succeeds.
|
|
439
454
|
inline bool TryExternalRef() {
|
|
440
|
-
if (!((refs += kOneExternalRef) &
|
|
455
|
+
if (!((refs += kOneExternalRef) & EXCLUSIVE_REF)) {
|
|
441
456
|
return true;
|
|
442
457
|
}
|
|
443
458
|
refs -= kOneExternalRef;
|
|
@@ -529,8 +544,8 @@ class ClockHandleTable {
|
|
|
529
544
|
// Makes h non-evictable.
|
|
530
545
|
void ClockOff(ClockHandle* h);
|
|
531
546
|
|
|
532
|
-
// Runs the clock eviction algorithm until
|
|
533
|
-
//
|
|
547
|
+
// Runs the clock eviction algorithm until usage_ + charge is at most
|
|
548
|
+
// capacity_.
|
|
534
549
|
void ClockRun(size_t charge);
|
|
535
550
|
|
|
536
551
|
// Remove h from the hash table. Requires an exclusive ref to h.
|
|
@@ -548,8 +563,6 @@ class ClockHandleTable {
|
|
|
548
563
|
RemoveAll(key, hash, probe, deleted);
|
|
549
564
|
}
|
|
550
565
|
|
|
551
|
-
void Free(autovector<ClockHandle>* deleted);
|
|
552
|
-
|
|
553
566
|
// Tries to remove h from the hash table. If the attempt is successful,
|
|
554
567
|
// the function hands over an exclusive ref to h.
|
|
555
568
|
bool TryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
|
|
@@ -558,8 +571,13 @@ class ClockHandleTable {
|
|
|
558
571
|
// success. Requires that the caller thread has no shared ref to h.
|
|
559
572
|
bool SpinTryRemove(ClockHandle* h, autovector<ClockHandle>* deleted);
|
|
560
573
|
|
|
561
|
-
|
|
562
|
-
|
|
574
|
+
// Call this function after an Insert, Remove, RemoveAll, TryRemove
|
|
575
|
+
// or SpinTryRemove. It frees the deleted values and updates the hash table
|
|
576
|
+
// metadata.
|
|
577
|
+
void Free(autovector<ClockHandle>* deleted);
|
|
578
|
+
|
|
579
|
+
void ApplyToEntriesRange(std::function<void(ClockHandle*)> func,
|
|
580
|
+
uint32_t index_begin, uint32_t index_end,
|
|
563
581
|
bool apply_if_will_be_deleted) {
|
|
564
582
|
for (uint32_t i = index_begin; i < index_end; i++) {
|
|
565
583
|
ClockHandle* h = &array_[i];
|
|
@@ -573,18 +591,20 @@ class ClockHandleTable {
|
|
|
573
591
|
}
|
|
574
592
|
}
|
|
575
593
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
uint32_t index_end,
|
|
594
|
+
void ConstApplyToEntriesRange(std::function<void(const ClockHandle*)> func,
|
|
595
|
+
uint32_t index_begin, uint32_t index_end,
|
|
579
596
|
bool apply_if_will_be_deleted) const {
|
|
580
597
|
for (uint32_t i = index_begin; i < index_end; i++) {
|
|
581
598
|
ClockHandle* h = &array_[i];
|
|
582
|
-
|
|
599
|
+
// We take an external ref because we are handing over control
|
|
600
|
+
// to a user-defined function, and because the handle will not be
|
|
601
|
+
// modified.
|
|
602
|
+
if (h->TryExternalRef()) {
|
|
583
603
|
if (h->IsElement() &&
|
|
584
604
|
(apply_if_will_be_deleted || !h->WillBeDeleted())) {
|
|
585
605
|
func(h);
|
|
586
606
|
}
|
|
587
|
-
h->
|
|
607
|
+
h->ReleaseExternalRef();
|
|
588
608
|
}
|
|
589
609
|
}
|
|
590
610
|
}
|
|
@@ -601,6 +621,8 @@ class ClockHandleTable {
|
|
|
601
621
|
|
|
602
622
|
size_t GetCapacity() const { return capacity_; }
|
|
603
623
|
|
|
624
|
+
void SetCapacity(size_t capacity) { capacity_ = capacity; }
|
|
625
|
+
|
|
604
626
|
// Returns x mod 2^{length_bits_}.
|
|
605
627
|
uint32_t ModTableSize(uint32_t x) { return x & length_bits_mask_; }
|
|
606
628
|
|
|
@@ -652,7 +674,7 @@ class ClockHandleTable {
|
|
|
652
674
|
const uint32_t occupancy_limit_;
|
|
653
675
|
|
|
654
676
|
// Maximum total charge of all elements stored in the table.
|
|
655
|
-
|
|
677
|
+
size_t capacity_;
|
|
656
678
|
|
|
657
679
|
// We partition the following members into different cache lines
|
|
658
680
|
// to avoid false sharing among Lookup, Release, Erase and Insert
|
|
@@ -745,10 +767,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
|
|
|
745
767
|
|
|
746
768
|
private:
|
|
747
769
|
friend class ClockCache;
|
|
770
|
+
friend class ClockCacheTest;
|
|
748
771
|
|
|
749
|
-
|
|
750
|
-
// to hold (usage_ + charge) is freed or there are no evictable elements.
|
|
751
|
-
void EvictFromClock(size_t charge, autovector<ClockHandle>* deleted);
|
|
772
|
+
ClockHandle* DetachedInsert(ClockHandle* h);
|
|
752
773
|
|
|
753
774
|
// Returns the charge of a single handle.
|
|
754
775
|
static size_t CalcEstimatedHandleCharge(
|
|
@@ -763,6 +784,9 @@ class ALIGN_AS(CACHE_LINE_SIZE) ClockCacheShard final : public CacheShard {
|
|
|
763
784
|
// Whether to reject insertion if cache reaches its full capacity.
|
|
764
785
|
std::atomic<bool> strict_capacity_limit_;
|
|
765
786
|
|
|
787
|
+
// Handles allocated separately from the table.
|
|
788
|
+
std::atomic<size_t> detached_usage_;
|
|
789
|
+
|
|
766
790
|
ClockHandleTable table_;
|
|
767
791
|
}; // class ClockCacheShard
|
|
768
792
|
|
|
@@ -797,6 +821,7 @@ class ClockCache
|
|
|
797
821
|
|
|
798
822
|
private:
|
|
799
823
|
ClockCacheShard* shards_ = nullptr;
|
|
824
|
+
|
|
800
825
|
int num_shards_;
|
|
801
826
|
}; // class ClockCache
|
|
802
827
|
|