@nxtedition/rocksdb 13.5.13 → 15.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.cc +55 -180
- package/binding.gyp +2 -2
- package/chained-batch.js +9 -16
- package/deps/rocksdb/rocksdb/BUCK +18 -1
- package/deps/rocksdb/rocksdb/CMakeLists.txt +10 -3
- package/deps/rocksdb/rocksdb/Makefile +20 -9
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +90 -13
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +88 -75
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +44 -36
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +184 -148
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +5 -11
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +116 -47
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +1 -1
- package/deps/rocksdb/rocksdb/cache/secondary_cache_adapter.cc +3 -6
- package/deps/rocksdb/rocksdb/db/arena_wrapped_db_iter.h +1 -1
- package/deps/rocksdb/rocksdb/db/builder.cc +4 -2
- package/deps/rocksdb/rocksdb/db/c.cc +207 -0
- package/deps/rocksdb/rocksdb/db/c_test.c +72 -0
- package/deps/rocksdb/rocksdb/db/column_family.cc +3 -2
- package/deps/rocksdb/rocksdb/db/column_family.h +5 -0
- package/deps/rocksdb/rocksdb/db/compact_files_test.cc +4 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +2 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +51 -38
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +29 -12
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc +5 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +566 -366
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.h +131 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.cc +4 -4
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker.h +13 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc +12 -7
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h +8 -10
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc +97 -76
- package/deps/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc +11 -14
- package/deps/rocksdb/rocksdb/db/compaction/compaction_service_job.cc +1 -1
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +8 -0
- package/deps/rocksdb/rocksdb/db/compaction/tiered_compaction_test.cc +16 -3
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +1 -0
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +448 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +22 -20
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +4 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +5 -5
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_open.cc +7 -3
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_iter.cc +104 -0
- package/deps/rocksdb/rocksdb/db/db_iter.h +4 -11
- package/deps/rocksdb/rocksdb/db/db_iterator_test.cc +331 -58
- package/deps/rocksdb/rocksdb/db/db_memtable_test.cc +129 -0
- package/deps/rocksdb/rocksdb/db/db_sst_test.cc +64 -0
- package/deps/rocksdb/rocksdb/db/db_table_properties_test.cc +40 -0
- package/deps/rocksdb/rocksdb/db/db_test2.cc +25 -15
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +42 -24
- package/deps/rocksdb/rocksdb/db/db_test_util.h +29 -14
- package/deps/rocksdb/rocksdb/db/db_universal_compaction_test.cc +69 -36
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +0 -1
- package/deps/rocksdb/rocksdb/db/event_helpers.cc +1 -0
- package/deps/rocksdb/rocksdb/db/experimental.cc +5 -4
- package/deps/rocksdb/rocksdb/db/external_sst_file_basic_test.cc +8 -1
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc +275 -79
- package/deps/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h +23 -5
- package/deps/rocksdb/rocksdb/db/external_sst_file_test.cc +591 -175
- package/deps/rocksdb/rocksdb/db/flush_job.cc +3 -4
- package/deps/rocksdb/rocksdb/db/log_reader.cc +5 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +84 -35
- package/deps/rocksdb/rocksdb/db/memtable.h +39 -34
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +1 -0
- package/deps/rocksdb/rocksdb/db/merge_operator.cc +1 -1
- package/deps/rocksdb/rocksdb/db/multi_scan.cc +11 -5
- package/deps/rocksdb/rocksdb/db/version_edit.cc +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -1
- package/deps/rocksdb/rocksdb/db/version_edit_handler.cc +34 -14
- package/deps/rocksdb/rocksdb/db/version_edit_handler.h +28 -5
- package/deps/rocksdb/rocksdb/db/version_set.cc +159 -14
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt +1 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc +60 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_common.h +16 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compaction_service.h +75 -10
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.cc +28 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_compression_manager.h +2 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc +31 -1
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc +50 -2
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h +57 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h +0 -4
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc +266 -35
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h +5 -0
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc +0 -6
- package/deps/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc +18 -2
- package/deps/rocksdb/rocksdb/env/env.cc +12 -0
- package/deps/rocksdb/rocksdb/env/env_test.cc +18 -0
- package/deps/rocksdb/rocksdb/env/file_system_tracer.cc +2 -0
- package/deps/rocksdb/rocksdb/env/fs_posix.cc +9 -5
- package/deps/rocksdb/rocksdb/env/io_posix.cc +4 -2
- package/deps/rocksdb/rocksdb/file/random_access_file_reader.cc +19 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_compression.h +33 -31
- package/deps/rocksdb/rocksdb/include/rocksdb/advanced_options.h +42 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/c.h +93 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +43 -49
- package/deps/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h +4 -3
- package/deps/rocksdb/rocksdb/include/rocksdb/compression_type.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/data_structure.h +487 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/db.h +11 -12
- package/deps/rocksdb/rocksdb/include/rocksdb/env.h +135 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/file_system.h +5 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iostats_context.h +12 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/iterator.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/ldb_tool.h +8 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/memtablerep.h +12 -8
- package/deps/rocksdb/rocksdb/include/rocksdb/metadata.h +3 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/multi_scan.h +19 -9
- package/deps/rocksdb/rocksdb/include/rocksdb/options.h +219 -24
- package/deps/rocksdb/rocksdb/include/rocksdb/point_lock_bench_tool.h +14 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +2 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/slice.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/statistics.h +7 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/status.h +16 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/table.h +16 -4
- package/deps/rocksdb/rocksdb/include/rocksdb/table_properties.h +13 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/types.h +4 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/universal_compaction.h +0 -2
- package/deps/rocksdb/rocksdb/include/rocksdb/user_defined_index.h +45 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/cache_dump_load.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h +1 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h +6 -1
- package/deps/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +2 -2
- package/deps/rocksdb/rocksdb/memory/memory_allocator_impl.h +3 -3
- package/deps/rocksdb/rocksdb/memtable/inlineskiplist.h +77 -51
- package/deps/rocksdb/rocksdb/memtable/skiplist.h +10 -13
- package/deps/rocksdb/rocksdb/memtable/skiplistrep.cc +16 -7
- package/deps/rocksdb/rocksdb/memtable/vectorrep.cc +9 -4
- package/deps/rocksdb/rocksdb/monitoring/iostats_context.cc +2 -0
- package/deps/rocksdb/rocksdb/monitoring/statistics.cc +6 -0
- package/deps/rocksdb/rocksdb/options/cf_options.cc +13 -1
- package/deps/rocksdb/rocksdb/options/cf_options.h +6 -2
- package/deps/rocksdb/rocksdb/options/options.cc +2 -0
- package/deps/rocksdb/rocksdb/options/options_helper.cc +9 -8
- package/deps/rocksdb/rocksdb/options/options_settable_test.cc +9 -5
- package/deps/rocksdb/rocksdb/port/mmap.cc +1 -1
- package/deps/rocksdb/rocksdb/port/win/xpress_win.cc +51 -0
- package/deps/rocksdb/rocksdb/port/win/xpress_win.h +4 -0
- package/deps/rocksdb/rocksdb/src.mk +8 -2
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +1125 -765
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +35 -24
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +29 -4
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.cc +732 -256
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_iterator.h +225 -16
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +102 -26
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +2 -75
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_test.cc +433 -141
- package/deps/rocksdb/rocksdb/table/block_based/block_builder.h +2 -0
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy.cc +17 -10
- package/deps/rocksdb/rocksdb/table/block_based/flush_block_policy_impl.h +20 -0
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.cc +112 -85
- package/deps/rocksdb/rocksdb/table/block_based/index_builder.h +191 -36
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +2 -2
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +1 -1
- package/deps/rocksdb/rocksdb/table/block_based/user_defined_index_wrapper.h +108 -31
- package/deps/rocksdb/rocksdb/table/external_table.cc +7 -3
- package/deps/rocksdb/rocksdb/table/format.cc +6 -12
- package/deps/rocksdb/rocksdb/table/format.h +10 -0
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +1 -1
- package/deps/rocksdb/rocksdb/table/iterator_wrapper.h +1 -1
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +1 -1
- package/deps/rocksdb/rocksdb/table/meta_blocks.cc +5 -0
- package/deps/rocksdb/rocksdb/table/multiget_context.h +3 -1
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.cc +118 -46
- package/deps/rocksdb/rocksdb/table/sst_file_dumper.h +9 -8
- package/deps/rocksdb/rocksdb/table/table_builder.h +5 -0
- package/deps/rocksdb/rocksdb/table/table_properties.cc +16 -0
- package/deps/rocksdb/rocksdb/table/table_test.cc +1540 -155
- package/deps/rocksdb/rocksdb/test_util/testutil.h +21 -5
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +26 -5
- package/deps/rocksdb/rocksdb/tools/ldb.cc +1 -2
- package/deps/rocksdb/rocksdb/tools/ldb_cmd.cc +2 -0
- package/deps/rocksdb/rocksdb/tools/ldb_tool.cc +9 -3
- package/deps/rocksdb/rocksdb/tools/sst_dump_test.cc +133 -165
- package/deps/rocksdb/rocksdb/tools/sst_dump_tool.cc +173 -64
- package/deps/rocksdb/rocksdb/util/aligned_buffer.h +69 -0
- package/deps/rocksdb/rocksdb/util/atomic.h +6 -0
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.cc +29 -20
- package/deps/rocksdb/rocksdb/util/auto_tune_compressor.h +10 -6
- package/deps/rocksdb/rocksdb/util/bit_fields.h +338 -0
- package/deps/rocksdb/rocksdb/util/coding.h +3 -3
- package/deps/rocksdb/rocksdb/util/compaction_job_stats_impl.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.cc +777 -82
- package/deps/rocksdb/rocksdb/util/compression.h +5 -0
- package/deps/rocksdb/rocksdb/util/compression_test.cc +5 -3
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.cc +2 -2
- package/deps/rocksdb/rocksdb/util/dynamic_bloom.h +15 -14
- package/deps/rocksdb/rocksdb/util/interval_test.cc +102 -0
- package/deps/rocksdb/rocksdb/util/semaphore.h +164 -0
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.cc +10 -6
- package/deps/rocksdb/rocksdb/util/simple_mixed_compressor.h +4 -2
- package/deps/rocksdb/rocksdb/util/slice_test.cc +136 -0
- package/deps/rocksdb/rocksdb/util/status.cc +1 -0
- package/deps/rocksdb/rocksdb/util/string_util.cc +2 -16
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +1 -1
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.cc +7 -4
- package/deps/rocksdb/rocksdb/utilities/fault_injection_fs.h +35 -14
- package/deps/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/lock_manager.cc +5 -2
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/any_lock_manager_test.h +244 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench.cc +18 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_bench_tool.cc +159 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.cc +1244 -161
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager.h +66 -12
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_stress_test.cc +103 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.cc +1275 -8
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test.h +40 -262
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_manager_test_common.h +78 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/point/point_lock_validation_test_runner.h +469 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_locking_test.cc +2 -6
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h +9 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/timestamped_snapshot_test.cc +18 -9
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_base.h +2 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc +2 -1
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +72 -44
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.h +92 -15
- package/deps/rocksdb/rocksdb/utilities/transactions/write_committed_transaction_ts_test.cc +6 -20
- package/deps/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc +143 -112
- package/deps/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc +23 -16
- package/index.js +18 -42
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/@nxtedition+rocksdb.node +0 -0
- package/prebuilds/linux-x64/@nxtedition+rocksdb.node +0 -0
- package/util.h +38 -12
- package/deps/rocksdb/rocksdb/db_stress_tool/db_stress_stat.cc +0 -17
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
#include "rocksdb/slice.h"
|
|
14
14
|
#include "rocksdb/utilities/transaction_db_mutex.h"
|
|
15
15
|
#include "test_util/sync_point.h"
|
|
16
|
-
#include "util/cast_util.h"
|
|
17
16
|
#include "util/hash.h"
|
|
18
17
|
#include "util/thread_local.h"
|
|
19
18
|
#include "utilities/transactions/pessimistic_transaction_db.h"
|
|
@@ -21,36 +20,275 @@
|
|
|
21
20
|
|
|
22
21
|
namespace ROCKSDB_NAMESPACE {
|
|
23
22
|
|
|
23
|
+
constexpr bool kDebugLog = false;
|
|
24
|
+
|
|
25
|
+
// KeyLockWaiter represents a waiter for a key lock. It contains a conditional
|
|
26
|
+
// variable to allow waiter to wait for the key lock. It also contains other
|
|
27
|
+
// metadata about the waiter such as transaction id, lock type etc.
|
|
28
|
+
struct KeyLockWaiter {
|
|
29
|
+
KeyLockWaiter(std::shared_ptr<TransactionDBCondVar> c, TransactionID i,
|
|
30
|
+
bool ex)
|
|
31
|
+
: id(i), exclusive(ex), ready(false), cv(std::move(c)) {}
|
|
32
|
+
|
|
33
|
+
// disable copy constructor and assignment operator, move and move
|
|
34
|
+
// assignment
|
|
35
|
+
KeyLockWaiter(const KeyLockWaiter&) = delete;
|
|
36
|
+
KeyLockWaiter& operator=(const KeyLockWaiter&) = delete;
|
|
37
|
+
KeyLockWaiter(KeyLockWaiter&&) = delete;
|
|
38
|
+
KeyLockWaiter& operator=(KeyLockWaiter&&) = delete;
|
|
39
|
+
|
|
40
|
+
~KeyLockWaiter() = default;
|
|
41
|
+
|
|
42
|
+
// Reset the waiter to be used again
|
|
43
|
+
void Reset(TransactionID i, bool e) {
|
|
44
|
+
id = i;
|
|
45
|
+
exclusive = e;
|
|
46
|
+
ready = false;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Check whether the waiter has been notified that it is its turn to take the
|
|
50
|
+
// lock
|
|
51
|
+
bool IsReady() const { return ready; }
|
|
52
|
+
|
|
53
|
+
// Wait until its turn to take the lock forever
|
|
54
|
+
Status Wait(std::shared_ptr<TransactionDBMutex>& mutex) {
|
|
55
|
+
// Mutex is already locked by caller
|
|
56
|
+
// Check ready flag before wait
|
|
57
|
+
if (ready) {
|
|
58
|
+
return Status::OK();
|
|
59
|
+
}
|
|
60
|
+
return AfterWait(cv->Wait(mutex));
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Wait until its turn to take the lock within timeout_us
|
|
64
|
+
Status WaitFor(std::shared_ptr<TransactionDBMutex>& mutex,
|
|
65
|
+
int64_t timeout_us) {
|
|
66
|
+
// Mutex is already locked by caller
|
|
67
|
+
// Check ready flag before wait
|
|
68
|
+
if (ready) {
|
|
69
|
+
return Status::OK();
|
|
70
|
+
}
|
|
71
|
+
return AfterWait(cv->WaitFor(mutex, timeout_us));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Notify the waiter to take the lock
|
|
75
|
+
void Notify() {
|
|
76
|
+
// Mutex is already locked by caller
|
|
77
|
+
ready = true;
|
|
78
|
+
cv->Notify();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
TransactionID id;
|
|
82
|
+
bool exclusive;
|
|
83
|
+
|
|
84
|
+
private:
|
|
85
|
+
Status AfterWait(Status wait_result) {
|
|
86
|
+
if (wait_result.ok() || wait_result.IsTimedOut()) {
|
|
87
|
+
// check ready again after wake up.
|
|
88
|
+
if (ready) {
|
|
89
|
+
return Status::OK();
|
|
90
|
+
} else {
|
|
91
|
+
return Status::TimedOut(Status::SubCode::kMutexTimeout);
|
|
92
|
+
}
|
|
93
|
+
} else {
|
|
94
|
+
return wait_result;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Track whether the waiter has been woken up explicitly.
|
|
99
|
+
bool ready;
|
|
100
|
+
// TODO(Xingbo), Switch to std::binary_semaphore, once we have c++20
|
|
101
|
+
// semaphore is likely more performant than mutex + cv.
|
|
102
|
+
// Although we will also need to implement TransactionDBSemaphore, which would
|
|
103
|
+
// be required if external system wants to do instrumented lock wait tracking
|
|
104
|
+
std::shared_ptr<TransactionDBCondVar> cv;
|
|
105
|
+
};
|
|
106
|
+
|
|
24
107
|
struct LockInfo {
|
|
108
|
+
LockInfo(TransactionID id, uint64_t time, bool ex)
|
|
109
|
+
: exclusive(ex), expiration_time(time) {
|
|
110
|
+
txn_ids.push_back(id);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
DECLARE_DEFAULT_MOVES(LockInfo);
|
|
114
|
+
|
|
25
115
|
bool exclusive;
|
|
26
116
|
autovector<TransactionID> txn_ids;
|
|
27
117
|
|
|
28
118
|
// Transaction locks are not valid after this time in us
|
|
29
119
|
uint64_t expiration_time;
|
|
30
120
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
121
|
+
// waiter queue for this key
|
|
122
|
+
// TODO xingbo, use intrusive list to avoid extra memory allocation
|
|
123
|
+
std::unique_ptr<std::list<KeyLockWaiter*>> waiter_queue;
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// Print debug info for lock waiter wake up action.
|
|
127
|
+
void DebugWakeUpWaiter(TransactionID txn_id, TransactionID waiter_id,
|
|
128
|
+
const std::string& key, const std::string& msg) {
|
|
129
|
+
if (kDebugLog) {
|
|
130
|
+
// print which waiter got woken up
|
|
131
|
+
fprintf(stderr,
|
|
132
|
+
"Txn %" PRIu64 ": wake up next waiter on %s Txn %" PRIu64
|
|
133
|
+
" on key %s\n",
|
|
134
|
+
txn_id, msg.c_str(), waiter_id, key.c_str());
|
|
135
|
+
fflush(stderr);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Key lock waiter context, used for free the lock automatically
|
|
140
|
+
struct KeyLockWaiterContext {
|
|
141
|
+
// When a lock waiter is aborted due to dead lock or time out, this function
|
|
142
|
+
// is used to wake up the waiters after it, if they could proceed.
|
|
143
|
+
void TryWakeUpNextWaiters(const LockInfo& lock_info, const std::string& key) {
|
|
144
|
+
if (waiter_queue != nullptr && lock_waiter != waiter_queue->end()) {
|
|
145
|
+
bool wake_up_next_shared_waiters = false;
|
|
146
|
+
|
|
147
|
+
if (lock_waiter == waiter_queue->begin()) {
|
|
148
|
+
// if lock waiter is at the head of the queue, check the current lock
|
|
149
|
+
// status. If it is exclusive lock, no waiter should be woken up. other
|
|
150
|
+
// wise, try to wake up shared lock waiters on the right side of itself.
|
|
151
|
+
wake_up_next_shared_waiters = !lock_info.exclusive;
|
|
152
|
+
} else {
|
|
153
|
+
// if lock waiter is not at the head of the queue, check the previous
|
|
154
|
+
// lock status. If it is active and shared, it should try to wake up the
|
|
155
|
+
// shared lock waiter on the right side of itself.
|
|
156
|
+
auto lock_waiter_prev = lock_waiter;
|
|
157
|
+
lock_waiter_prev--;
|
|
158
|
+
wake_up_next_shared_waiters =
|
|
159
|
+
(*lock_waiter_prev)->IsReady() && !(*lock_waiter_prev)->exclusive;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
if (wake_up_next_shared_waiters) {
|
|
163
|
+
// Go through all the waiters on the right side of the lock waiter and
|
|
164
|
+
// wake up the shared lock waiter until the end of the queue or
|
|
165
|
+
// encountered an exclusive lock waiter.
|
|
166
|
+
auto lock_waiter_next = lock_waiter;
|
|
167
|
+
lock_waiter_next++;
|
|
168
|
+
while (lock_waiter_next != waiter_queue->end() &&
|
|
169
|
+
!(*lock_waiter_next)->exclusive) {
|
|
170
|
+
(*lock_waiter_next)->Notify();
|
|
171
|
+
DebugWakeUpWaiter((*lock_waiter)->id, (*lock_waiter_next)->id, key,
|
|
172
|
+
"TryWakeUpNextWaiters");
|
|
173
|
+
lock_waiter_next++;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
34
177
|
}
|
|
35
|
-
LockInfo(const LockInfo& lock_info)
|
|
36
178
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
179
|
+
~KeyLockWaiterContext() {
|
|
180
|
+
if (waiter_queue != nullptr && lock_waiter != waiter_queue->end()) {
|
|
181
|
+
waiter_queue->erase(lock_waiter);
|
|
182
|
+
lock_waiter = waiter_queue->end();
|
|
183
|
+
}
|
|
184
|
+
waiter_queue = nullptr;
|
|
42
185
|
}
|
|
43
|
-
|
|
186
|
+
|
|
187
|
+
// The waiter queue the lock waiter joined. Used for remove the waiter from
|
|
188
|
+
// the waiter queue.
|
|
189
|
+
std::list<KeyLockWaiter*>* waiter_queue = nullptr;
|
|
190
|
+
// The stable iterator that tracks the position of the waiter in the waiter
|
|
191
|
+
// queue. Used for remove the waiter from the waiter queue.
|
|
192
|
+
std::list<KeyLockWaiter*>::iterator lock_waiter;
|
|
44
193
|
};
|
|
45
194
|
|
|
46
195
|
struct LockMapStripe {
|
|
47
|
-
explicit LockMapStripe(std::shared_ptr<TransactionDBMutexFactory> factory
|
|
48
|
-
|
|
49
|
-
|
|
196
|
+
explicit LockMapStripe(std::shared_ptr<TransactionDBMutexFactory> factory,
|
|
197
|
+
ThreadLocalPtr& key_lock_waiter)
|
|
198
|
+
: mutex_factory_(std::move(factory)), key_lock_waiter_(key_lock_waiter) {
|
|
199
|
+
stripe_mutex = mutex_factory_->AllocateMutex();
|
|
200
|
+
stripe_cv = mutex_factory_->AllocateCondVar();
|
|
201
|
+
|
|
50
202
|
assert(stripe_mutex);
|
|
51
203
|
assert(stripe_cv);
|
|
52
204
|
}
|
|
53
205
|
|
|
206
|
+
LockInfo* GetLockInfo(const std::string& key) {
|
|
207
|
+
auto lock_info_iter = keys.find(key);
|
|
208
|
+
if (lock_info_iter != keys.end()) {
|
|
209
|
+
return &lock_info_iter->second;
|
|
210
|
+
} else {
|
|
211
|
+
return nullptr;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Wait until its turn to take the lock of this key within timeout_us.
|
|
216
|
+
// By default timeout_us == 0, which means wait forever
|
|
217
|
+
void JoinWaitQueue(LockInfo& lock_info, TransactionID id, bool exclusive,
|
|
218
|
+
bool isUpgrade, KeyLockWaiterContext& waiter_context) {
|
|
219
|
+
if (lock_info.waiter_queue == nullptr) {
|
|
220
|
+
// no waiter queue yet, create a new one
|
|
221
|
+
lock_info.waiter_queue = std::make_unique<std::list<KeyLockWaiter*>>();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
auto waiter_queue = lock_info.waiter_queue.get();
|
|
225
|
+
|
|
226
|
+
// by default insert the new lock waiter at the end of the queue.
|
|
227
|
+
auto insert_point = waiter_queue->end();
|
|
228
|
+
|
|
229
|
+
if (isUpgrade) {
|
|
230
|
+
// If transaction is upgrading a shared lock to exclusive lock, prioritize
|
|
231
|
+
// it by moving its lock waiter before the first exclusive lock in the
|
|
232
|
+
// queue if there is one, or end of the queue if not exist. It will be
|
|
233
|
+
// able to acquire the lock after the other shared locks waiters at the
|
|
234
|
+
// front of queue acquired and released locks. This reduces the chance of
|
|
235
|
+
// deadlock, which makes transaction run more efficiently.
|
|
236
|
+
|
|
237
|
+
if (waiter_context.waiter_queue != nullptr) {
|
|
238
|
+
// If waiter_context is already initialized, it means current
|
|
239
|
+
// transaction already joined the lock queue. Don't move the lock
|
|
240
|
+
// position if it is already at the head of the queue or the lock
|
|
241
|
+
// waiters before it are ready to take the lock.
|
|
242
|
+
if (waiter_context.lock_waiter == waiter_queue->begin()) {
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
auto prev_lock_waiter = waiter_context.lock_waiter;
|
|
247
|
+
prev_lock_waiter--;
|
|
248
|
+
if ((*prev_lock_waiter)->IsReady()) {
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Remove existing lock waiter
|
|
253
|
+
waiter_queue->erase(waiter_context.lock_waiter);
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// For upgrade, insert waiter either at the end of the queue or before the
|
|
257
|
+
// first exlusive lock waiter.
|
|
258
|
+
insert_point = waiter_queue->begin();
|
|
259
|
+
while ((insert_point != waiter_queue->end()) &&
|
|
260
|
+
(!(*insert_point)->exclusive)) {
|
|
261
|
+
insert_point++;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Insert the new lock waiter
|
|
266
|
+
waiter_context.lock_waiter =
|
|
267
|
+
waiter_queue->insert(insert_point, GetKeyLockWaiter(id, exclusive));
|
|
268
|
+
|
|
269
|
+
waiter_context.waiter_queue = waiter_queue;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Wait on an existing KeyLockWaiter until its turn to take the lock or
|
|
273
|
+
// timeout
|
|
274
|
+
Status WaitOnLock(std::list<KeyLockWaiter*>::iterator& lock_waiter,
|
|
275
|
+
int64_t timeout_us = 0) {
|
|
276
|
+
Status ret;
|
|
277
|
+
if (timeout_us == 0) {
|
|
278
|
+
ret = (*lock_waiter)->Wait(stripe_mutex);
|
|
279
|
+
} else {
|
|
280
|
+
ret = (*lock_waiter)->WaitFor(stripe_mutex, timeout_us);
|
|
281
|
+
}
|
|
282
|
+
return ret;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
void ReleaseLastLockHolder(
|
|
286
|
+
LockInfo& lock_info,
|
|
287
|
+
UnorderedMap<std::string, LockInfo>::iterator stripe_iter,
|
|
288
|
+
LockMap* lock_map, TransactionID txn_id, const std::string& key,
|
|
289
|
+
const int64_t max_num_locks, autovector<TransactionID>& txns,
|
|
290
|
+
autovector<TransactionID>::iterator& txn_it);
|
|
291
|
+
|
|
54
292
|
// Mutex must be held before modifying keys map
|
|
55
293
|
std::shared_ptr<TransactionDBMutex> stripe_mutex;
|
|
56
294
|
|
|
@@ -60,16 +298,39 @@ struct LockMapStripe {
|
|
|
60
298
|
// Locked keys mapped to the info about the transactions that locked them.
|
|
61
299
|
// TODO(agiardullo): Explore performance of other data structures.
|
|
62
300
|
UnorderedMap<std::string, LockInfo> keys;
|
|
301
|
+
|
|
302
|
+
private:
|
|
303
|
+
std::shared_ptr<TransactionDBMutexFactory> mutex_factory_;
|
|
304
|
+
|
|
305
|
+
// key lock waiter, wrapped in thread local for reusing it across
|
|
306
|
+
// transactions.
|
|
307
|
+
ThreadLocalPtr& key_lock_waiter_;
|
|
308
|
+
|
|
309
|
+
// Return key lock waiter stored in thread local var, create on first use
|
|
310
|
+
KeyLockWaiter* GetKeyLockWaiter(TransactionID id, bool exclusive) {
|
|
311
|
+
KeyLockWaiter* waiter = nullptr;
|
|
312
|
+
if (key_lock_waiter_.Get() == nullptr) {
|
|
313
|
+
// create key lock waiter
|
|
314
|
+
key_lock_waiter_.Reset(
|
|
315
|
+
new KeyLockWaiter(mutex_factory_->AllocateCondVar(), id, exclusive));
|
|
316
|
+
waiter = static_cast<KeyLockWaiter*>(key_lock_waiter_.Get());
|
|
317
|
+
} else {
|
|
318
|
+
waiter = static_cast<KeyLockWaiter*>(key_lock_waiter_.Get());
|
|
319
|
+
waiter->Reset(id, exclusive);
|
|
320
|
+
}
|
|
321
|
+
return waiter;
|
|
322
|
+
}
|
|
63
323
|
};
|
|
64
324
|
|
|
65
325
|
// Map of #num_stripes LockMapStripes
|
|
66
326
|
struct LockMap {
|
|
67
327
|
explicit LockMap(size_t num_stripes,
|
|
68
|
-
std::shared_ptr<TransactionDBMutexFactory> factory
|
|
69
|
-
|
|
328
|
+
std::shared_ptr<TransactionDBMutexFactory> factory,
|
|
329
|
+
ThreadLocalPtr& key_lock_waiter)
|
|
330
|
+
: num_stripes_(num_stripes), key_lock_waiter_(key_lock_waiter) {
|
|
70
331
|
lock_map_stripes_.reserve(num_stripes);
|
|
71
332
|
for (size_t i = 0; i < num_stripes; i++) {
|
|
72
|
-
LockMapStripe* stripe = new LockMapStripe(factory);
|
|
333
|
+
LockMapStripe* stripe = new LockMapStripe(factory, key_lock_waiter_);
|
|
73
334
|
lock_map_stripes_.push_back(stripe);
|
|
74
335
|
}
|
|
75
336
|
}
|
|
@@ -78,20 +339,80 @@ struct LockMap {
|
|
|
78
339
|
for (auto stripe : lock_map_stripes_) {
|
|
79
340
|
delete stripe;
|
|
80
341
|
}
|
|
342
|
+
// Validate total locked key count is 0, when lock map is destructed.
|
|
343
|
+
assert(locked_key_cnt.LoadRelaxed() == 0);
|
|
81
344
|
}
|
|
82
345
|
|
|
83
346
|
// Number of sepearate LockMapStripes to create, each with their own Mutex
|
|
84
347
|
const size_t num_stripes_;
|
|
348
|
+
ThreadLocalPtr& key_lock_waiter_;
|
|
85
349
|
|
|
86
350
|
// Count of keys that are currently locked in this column family.
|
|
351
|
+
// Note that multiple shared locks on the same key is counted as 1 lock.
|
|
87
352
|
// (Only maintained if PointLockManager::max_num_locks_ is positive.)
|
|
88
|
-
|
|
353
|
+
RelaxedAtomic<int64_t> locked_key_cnt{0};
|
|
89
354
|
|
|
90
355
|
std::vector<LockMapStripe*> lock_map_stripes_;
|
|
91
356
|
|
|
92
357
|
size_t GetStripe(const std::string& key) const;
|
|
93
358
|
};
|
|
94
359
|
|
|
360
|
+
inline void RemoveTransaction(autovector<TransactionID>& txns,
|
|
361
|
+
autovector<TransactionID>::iterator& txn_it) {
|
|
362
|
+
if (txns.size() > 1) {
|
|
363
|
+
auto last_it = txns.end() - 1;
|
|
364
|
+
if (txn_it != last_it) {
|
|
365
|
+
*txn_it = *last_it;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
txns.pop_back();
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
void LockMapStripe::ReleaseLastLockHolder(
|
|
372
|
+
LockInfo& lock_info,
|
|
373
|
+
UnorderedMap<std::string, LockInfo>::iterator stripe_iter,
|
|
374
|
+
LockMap* lock_map, TransactionID txn_id, const std::string& key,
|
|
375
|
+
const int64_t max_num_locks, autovector<TransactionID>& txns,
|
|
376
|
+
autovector<TransactionID>::iterator& txn_it) {
|
|
377
|
+
// check whether there is other waiting transactions
|
|
378
|
+
if (lock_info.waiter_queue == nullptr || lock_info.waiter_queue->empty()) {
|
|
379
|
+
keys.erase(stripe_iter);
|
|
380
|
+
if (max_num_locks > 0) {
|
|
381
|
+
// Maintain lock count if there is a limit on the number of
|
|
382
|
+
// locks.
|
|
383
|
+
assert(lock_map->locked_key_cnt.LoadRelaxed() > 0);
|
|
384
|
+
lock_map->locked_key_cnt.FetchSubRelaxed(1);
|
|
385
|
+
}
|
|
386
|
+
} else {
|
|
387
|
+
// there are waiters in the queue, so we need to wake the next
|
|
388
|
+
// one up
|
|
389
|
+
RemoveTransaction(txns, txn_it);
|
|
390
|
+
// loop through the waiter queue and wake up all the shared lock
|
|
391
|
+
// waiters until the first exclusive lock waiter, or wake up the
|
|
392
|
+
// first waiter, if it is waiting for an exclusive lock.
|
|
393
|
+
bool first_waiter = true;
|
|
394
|
+
for (auto& waiter : *lock_info.waiter_queue) {
|
|
395
|
+
if (waiter->exclusive) {
|
|
396
|
+
if (first_waiter) {
|
|
397
|
+
// the first waiter is an exclusive lock waiter, wake it
|
|
398
|
+
// up Note that they are only notified, but not removed
|
|
399
|
+
// from the waiter queue. This allows new transaction to
|
|
400
|
+
// be aware that there are waiters ahead of them.
|
|
401
|
+
waiter->Notify();
|
|
402
|
+
DebugWakeUpWaiter(txn_id, waiter->id, key, "UnlockKey X waiter");
|
|
403
|
+
}
|
|
404
|
+
// found the first exclusive lock waiter, stop
|
|
405
|
+
break;
|
|
406
|
+
} else {
|
|
407
|
+
// wake up the shared lock waiter
|
|
408
|
+
waiter->Notify();
|
|
409
|
+
DebugWakeUpWaiter(txn_id, waiter->id, key, "UnlockKey S waiter");
|
|
410
|
+
}
|
|
411
|
+
first_waiter = false;
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
95
416
|
namespace {
|
|
96
417
|
void UnrefLockMapsCache(void* ptr) {
|
|
97
418
|
// Called when a thread exits or a ThreadLocalPtr gets destroyed.
|
|
@@ -99,6 +420,10 @@ void UnrefLockMapsCache(void* ptr) {
|
|
|
99
420
|
static_cast<UnorderedMap<uint32_t, std::shared_ptr<LockMap>>*>(ptr);
|
|
100
421
|
delete lock_maps_cache;
|
|
101
422
|
}
|
|
423
|
+
void UnrefKeyLockWaiter(void* ptr) {
|
|
424
|
+
auto key_lock_waiter = static_cast<KeyLockWaiter*>(ptr);
|
|
425
|
+
delete key_lock_waiter;
|
|
426
|
+
}
|
|
102
427
|
} // anonymous namespace
|
|
103
428
|
|
|
104
429
|
PointLockManager::PointLockManager(PessimisticTransactionDB* txn_db,
|
|
@@ -107,6 +432,7 @@ PointLockManager::PointLockManager(PessimisticTransactionDB* txn_db,
|
|
|
107
432
|
default_num_stripes_(opt.num_stripes),
|
|
108
433
|
max_num_locks_(opt.max_num_locks),
|
|
109
434
|
lock_maps_cache_(new ThreadLocalPtr(&UnrefLockMapsCache)),
|
|
435
|
+
key_lock_waiter_(&UnrefKeyLockWaiter),
|
|
110
436
|
dlock_buffer_(opt.max_num_deadlocks),
|
|
111
437
|
mutex_factory_(opt.custom_mutex_factory
|
|
112
438
|
? opt.custom_mutex_factory
|
|
@@ -122,7 +448,8 @@ void PointLockManager::AddColumnFamily(const ColumnFamilyHandle* cf) {
|
|
|
122
448
|
|
|
123
449
|
if (lock_maps_.find(cf->GetID()) == lock_maps_.end()) {
|
|
124
450
|
lock_maps_.emplace(cf->GetID(), std::make_shared<LockMap>(
|
|
125
|
-
default_num_stripes_, mutex_factory_
|
|
451
|
+
default_num_stripes_, mutex_factory_,
|
|
452
|
+
key_lock_waiter_));
|
|
126
453
|
} else {
|
|
127
454
|
// column_family already exists in lock map
|
|
128
455
|
assert(false);
|
|
@@ -242,16 +569,18 @@ Status PointLockManager::TryLock(PessimisticTransaction* txn,
|
|
|
242
569
|
|
|
243
570
|
LockInfo lock_info(txn->GetID(), txn->GetExpirationTime(), exclusive);
|
|
244
571
|
int64_t timeout = txn->GetLockTimeout();
|
|
572
|
+
int64_t deadlock_timeout_us = txn->GetDeadlockTimeout();
|
|
245
573
|
|
|
246
574
|
return AcquireWithTimeout(txn, lock_map, stripe, column_family_id, key, env,
|
|
247
|
-
timeout, lock_info);
|
|
575
|
+
timeout, deadlock_timeout_us, lock_info);
|
|
248
576
|
}
|
|
249
577
|
|
|
250
578
|
// Helper function for TryLock().
|
|
251
579
|
Status PointLockManager::AcquireWithTimeout(
|
|
252
580
|
PessimisticTransaction* txn, LockMap* lock_map, LockMapStripe* stripe,
|
|
253
581
|
ColumnFamilyId column_family_id, const std::string& key, Env* env,
|
|
254
|
-
int64_t timeout,
|
|
582
|
+
int64_t timeout, int64_t /*deadlock_timeout_us*/,
|
|
583
|
+
const LockInfo& lock_info) {
|
|
255
584
|
Status result;
|
|
256
585
|
uint64_t end_time = 0;
|
|
257
586
|
|
|
@@ -322,9 +651,6 @@ Status PointLockManager::AcquireWithTimeout(
|
|
|
322
651
|
// instead of exiting this while loop below.
|
|
323
652
|
uint64_t now = env->NowMicros();
|
|
324
653
|
if (static_cast<uint64_t>(cv_end_time) > now) {
|
|
325
|
-
// This may be invoked multiple times since we divide
|
|
326
|
-
// the time into smaller intervals.
|
|
327
|
-
(void)ROCKSDB_THREAD_YIELD_CHECK_ABORT();
|
|
328
654
|
result = stripe->stripe_cv->WaitFor(stripe->stripe_mutex,
|
|
329
655
|
cv_end_time - now);
|
|
330
656
|
cv_wait_fail = !result.ok() && !result.IsTimedOut();
|
|
@@ -367,6 +693,130 @@ Status PointLockManager::AcquireWithTimeout(
|
|
|
367
693
|
return result;
|
|
368
694
|
}
|
|
369
695
|
|
|
696
|
+
// Try to lock this key after we have acquired the mutex.
|
|
697
|
+
// Sets *expire_time to the expiration time in microseconds
|
|
698
|
+
// or 0 if no expiration.
|
|
699
|
+
//
|
|
700
|
+
// Returns Status::TimeOut if the lock cannot be acquired due to it being
|
|
701
|
+
// held by other transactions, `txn_ids` will be populated with the id of
|
|
702
|
+
// transactions that hold the lock, excluding lock_info.txn_ids[0].
|
|
703
|
+
// Returns Status::Aborted(kLockLimit) if the lock cannot be acquired due to
|
|
704
|
+
// reaching per CF limit on the number of locks.
|
|
705
|
+
//
|
|
706
|
+
// REQUIRED: Stripe mutex must be held. txn_ids must be empty.
|
|
707
|
+
Status PointLockManager::AcquireLocked(LockMap* lock_map, LockMapStripe* stripe,
|
|
708
|
+
const std::string& key, Env* env,
|
|
709
|
+
const LockInfo& txn_lock_info,
|
|
710
|
+
uint64_t* expire_time,
|
|
711
|
+
autovector<TransactionID>* txn_ids) {
|
|
712
|
+
assert(txn_lock_info.txn_ids.size() == 1);
|
|
713
|
+
assert(txn_ids && txn_ids->empty());
|
|
714
|
+
|
|
715
|
+
Status result;
|
|
716
|
+
// Check if this key is already locked
|
|
717
|
+
auto stripe_iter = stripe->keys.find(key);
|
|
718
|
+
if (stripe_iter != stripe->keys.end()) {
|
|
719
|
+
// Lock already held
|
|
720
|
+
auto& lock_info = stripe_iter->second;
|
|
721
|
+
assert(lock_info.txn_ids.size() == 1 || !lock_info.exclusive);
|
|
722
|
+
|
|
723
|
+
if (lock_info.exclusive || txn_lock_info.exclusive) {
|
|
724
|
+
if (lock_info.txn_ids.size() == 1 &&
|
|
725
|
+
lock_info.txn_ids[0] == txn_lock_info.txn_ids[0]) {
|
|
726
|
+
// The list contains one txn and we're it, so just take it.
|
|
727
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
728
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
729
|
+
} else {
|
|
730
|
+
// Check if it's expired. Skips over txn_lock_info.txn_ids[0] in case
|
|
731
|
+
// it's there for a shared lock with multiple holders which was not
|
|
732
|
+
// caught in the first case.
|
|
733
|
+
if (IsLockExpired(txn_lock_info.txn_ids[0], lock_info, env,
|
|
734
|
+
expire_time)) {
|
|
735
|
+
// lock is expired, can steal it
|
|
736
|
+
lock_info.txn_ids = txn_lock_info.txn_ids;
|
|
737
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
738
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
739
|
+
// lock_cnt does not change
|
|
740
|
+
} else {
|
|
741
|
+
result = Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
742
|
+
for (auto id : lock_info.txn_ids) {
|
|
743
|
+
// A transaction is not blocked by itself
|
|
744
|
+
if (id != txn_lock_info.txn_ids[0]) {
|
|
745
|
+
txn_ids->push_back(id);
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
} else {
|
|
751
|
+
// We are requesting shared access to a shared lock, so just grant it.
|
|
752
|
+
lock_info.txn_ids.push_back(txn_lock_info.txn_ids[0]);
|
|
753
|
+
// Using std::max means that expiration time never goes down even when
|
|
754
|
+
// a transaction is removed from the list. The correct solution would be
|
|
755
|
+
// to track expiry for every transaction, but this would also work for
|
|
756
|
+
// now.
|
|
757
|
+
lock_info.expiration_time =
|
|
758
|
+
std::max(lock_info.expiration_time, txn_lock_info.expiration_time);
|
|
759
|
+
}
|
|
760
|
+
} else {
|
|
761
|
+
// Lock not held.
|
|
762
|
+
// Check lock limit
|
|
763
|
+
if (max_num_locks_ > 0 &&
|
|
764
|
+
lock_map->locked_key_cnt.LoadRelaxed() >= max_num_locks_) {
|
|
765
|
+
result = Status::LockLimit();
|
|
766
|
+
} else {
|
|
767
|
+
// acquire lock
|
|
768
|
+
stripe->keys.try_emplace(key, txn_lock_info.txn_ids[0],
|
|
769
|
+
txn_lock_info.expiration_time,
|
|
770
|
+
txn_lock_info.exclusive);
|
|
771
|
+
|
|
772
|
+
// Maintain lock count if there is a limit on the number of locks
|
|
773
|
+
if (max_num_locks_ > 0) {
|
|
774
|
+
lock_map->locked_key_cnt.FetchAddRelaxed(1);
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
return result;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
void PointLockManager::UnLockKey(PessimisticTransaction* txn,
|
|
783
|
+
const std::string& key, LockMapStripe* stripe,
|
|
784
|
+
LockMap* lock_map, Env* env) {
|
|
785
|
+
#ifdef NDEBUG
|
|
786
|
+
(void)env;
|
|
787
|
+
#endif
|
|
788
|
+
TransactionID txn_id = txn->GetID();
|
|
789
|
+
|
|
790
|
+
auto stripe_iter = stripe->keys.find(key);
|
|
791
|
+
if (stripe_iter != stripe->keys.end()) {
|
|
792
|
+
auto& txns = stripe_iter->second.txn_ids;
|
|
793
|
+
auto txn_it = std::find(txns.begin(), txns.end(), txn_id);
|
|
794
|
+
// Found the key we locked. unlock it.
|
|
795
|
+
if (txn_it != txns.end()) {
|
|
796
|
+
if (txns.size() == 1) {
|
|
797
|
+
stripe->keys.erase(stripe_iter);
|
|
798
|
+
} else {
|
|
799
|
+
auto last_it = txns.end() - 1;
|
|
800
|
+
if (txn_it != last_it) {
|
|
801
|
+
*txn_it = *last_it;
|
|
802
|
+
}
|
|
803
|
+
txns.pop_back();
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
if (max_num_locks_ > 0) {
|
|
807
|
+
// Maintain lock count if there is a limit on the number of locks.
|
|
808
|
+
assert(lock_map->locked_key_cnt.LoadRelaxed() > 0);
|
|
809
|
+
lock_map->locked_key_cnt.FetchSubRelaxed(1);
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
} else {
|
|
813
|
+
// This key is either not locked or locked by someone else. This should
|
|
814
|
+
// only happen if the unlocking transaction has expired.
|
|
815
|
+
assert(txn->GetExpirationTime() > 0 &&
|
|
816
|
+
txn->GetExpirationTime() < env->NowMicros());
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
|
|
370
820
|
void PointLockManager::DecrementWaiters(
|
|
371
821
|
const PessimisticTransaction* txn,
|
|
372
822
|
const autovector<TransactionID>& wait_ids) {
|
|
@@ -484,143 +934,22 @@ bool PointLockManager::IncrementWaiters(
|
|
|
484
934
|
return true;
|
|
485
935
|
}
|
|
486
936
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
//
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
//
|
|
497
|
-
// REQUIRED: Stripe mutex must be held. txn_ids must be empty.
|
|
498
|
-
Status PointLockManager::AcquireLocked(LockMap* lock_map, LockMapStripe* stripe,
|
|
499
|
-
const std::string& key, Env* env,
|
|
500
|
-
const LockInfo& txn_lock_info,
|
|
501
|
-
uint64_t* expire_time,
|
|
502
|
-
autovector<TransactionID>* txn_ids) {
|
|
503
|
-
assert(txn_lock_info.txn_ids.size() == 1);
|
|
504
|
-
assert(txn_ids && txn_ids->empty());
|
|
937
|
+
void PointLockManager::UnLock(PessimisticTransaction* txn,
|
|
938
|
+
ColumnFamilyId column_family_id,
|
|
939
|
+
const std::string& key, Env* env) {
|
|
940
|
+
std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(column_family_id);
|
|
941
|
+
LockMap* lock_map = lock_map_ptr.get();
|
|
942
|
+
if (lock_map == nullptr) {
|
|
943
|
+
// Column Family must have been dropped.
|
|
944
|
+
return;
|
|
945
|
+
}
|
|
505
946
|
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
// Lock already held
|
|
511
|
-
LockInfo& lock_info = stripe_iter->second;
|
|
512
|
-
assert(lock_info.txn_ids.size() == 1 || !lock_info.exclusive);
|
|
947
|
+
// Lock the mutex for the stripe that this key hashes to
|
|
948
|
+
size_t stripe_num = lock_map->GetStripe(key);
|
|
949
|
+
assert(lock_map->lock_map_stripes_.size() > stripe_num);
|
|
950
|
+
LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
|
|
513
951
|
|
|
514
|
-
|
|
515
|
-
if (lock_info.txn_ids.size() == 1 &&
|
|
516
|
-
lock_info.txn_ids[0] == txn_lock_info.txn_ids[0]) {
|
|
517
|
-
// The list contains one txn and we're it, so just take it.
|
|
518
|
-
lock_info.exclusive = txn_lock_info.exclusive;
|
|
519
|
-
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
520
|
-
} else {
|
|
521
|
-
// Check if it's expired. Skips over txn_lock_info.txn_ids[0] in case
|
|
522
|
-
// it's there for a shared lock with multiple holders which was not
|
|
523
|
-
// caught in the first case.
|
|
524
|
-
if (IsLockExpired(txn_lock_info.txn_ids[0], lock_info, env,
|
|
525
|
-
expire_time)) {
|
|
526
|
-
// lock is expired, can steal it
|
|
527
|
-
lock_info.txn_ids = txn_lock_info.txn_ids;
|
|
528
|
-
lock_info.exclusive = txn_lock_info.exclusive;
|
|
529
|
-
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
530
|
-
// lock_cnt does not change
|
|
531
|
-
} else {
|
|
532
|
-
result = Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
533
|
-
for (auto id : lock_info.txn_ids) {
|
|
534
|
-
// A transaction is not blocked by itself
|
|
535
|
-
if (id != txn_lock_info.txn_ids[0]) {
|
|
536
|
-
txn_ids->push_back(id);
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
}
|
|
540
|
-
}
|
|
541
|
-
} else {
|
|
542
|
-
// We are requesting shared access to a shared lock, so just grant it.
|
|
543
|
-
lock_info.txn_ids.push_back(txn_lock_info.txn_ids[0]);
|
|
544
|
-
// Using std::max means that expiration time never goes down even when
|
|
545
|
-
// a transaction is removed from the list. The correct solution would be
|
|
546
|
-
// to track expiry for every transaction, but this would also work for
|
|
547
|
-
// now.
|
|
548
|
-
lock_info.expiration_time =
|
|
549
|
-
std::max(lock_info.expiration_time, txn_lock_info.expiration_time);
|
|
550
|
-
}
|
|
551
|
-
} else { // Lock not held.
|
|
552
|
-
// Check lock limit
|
|
553
|
-
if (max_num_locks_ > 0 &&
|
|
554
|
-
lock_map->lock_cnt.load(std::memory_order_acquire) >= max_num_locks_) {
|
|
555
|
-
result = Status::LockLimit();
|
|
556
|
-
} else {
|
|
557
|
-
// acquire lock
|
|
558
|
-
stripe->keys.emplace(key, txn_lock_info);
|
|
559
|
-
|
|
560
|
-
// Maintain lock count if there is a limit on the number of locks
|
|
561
|
-
if (max_num_locks_) {
|
|
562
|
-
lock_map->lock_cnt++;
|
|
563
|
-
}
|
|
564
|
-
}
|
|
565
|
-
}
|
|
566
|
-
|
|
567
|
-
return result;
|
|
568
|
-
}
|
|
569
|
-
|
|
570
|
-
void PointLockManager::UnLockKey(PessimisticTransaction* txn,
|
|
571
|
-
const std::string& key, LockMapStripe* stripe,
|
|
572
|
-
LockMap* lock_map, Env* env) {
|
|
573
|
-
#ifdef NDEBUG
|
|
574
|
-
(void)env;
|
|
575
|
-
#endif
|
|
576
|
-
TransactionID txn_id = txn->GetID();
|
|
577
|
-
|
|
578
|
-
auto stripe_iter = stripe->keys.find(key);
|
|
579
|
-
if (stripe_iter != stripe->keys.end()) {
|
|
580
|
-
auto& txns = stripe_iter->second.txn_ids;
|
|
581
|
-
auto txn_it = std::find(txns.begin(), txns.end(), txn_id);
|
|
582
|
-
// Found the key we locked. unlock it.
|
|
583
|
-
if (txn_it != txns.end()) {
|
|
584
|
-
if (txns.size() == 1) {
|
|
585
|
-
stripe->keys.erase(stripe_iter);
|
|
586
|
-
} else {
|
|
587
|
-
auto last_it = txns.end() - 1;
|
|
588
|
-
if (txn_it != last_it) {
|
|
589
|
-
*txn_it = *last_it;
|
|
590
|
-
}
|
|
591
|
-
txns.pop_back();
|
|
592
|
-
}
|
|
593
|
-
|
|
594
|
-
if (max_num_locks_ > 0) {
|
|
595
|
-
// Maintain lock count if there is a limit on the number of locks.
|
|
596
|
-
assert(lock_map->lock_cnt.load(std::memory_order_relaxed) > 0);
|
|
597
|
-
lock_map->lock_cnt--;
|
|
598
|
-
}
|
|
599
|
-
}
|
|
600
|
-
} else {
|
|
601
|
-
// This key is either not locked or locked by someone else. This should
|
|
602
|
-
// only happen if the unlocking transaction has expired.
|
|
603
|
-
assert(txn->GetExpirationTime() > 0 &&
|
|
604
|
-
txn->GetExpirationTime() < env->NowMicros());
|
|
605
|
-
}
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
void PointLockManager::UnLock(PessimisticTransaction* txn,
|
|
609
|
-
ColumnFamilyId column_family_id,
|
|
610
|
-
const std::string& key, Env* env) {
|
|
611
|
-
std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(column_family_id);
|
|
612
|
-
LockMap* lock_map = lock_map_ptr.get();
|
|
613
|
-
if (lock_map == nullptr) {
|
|
614
|
-
// Column Family must have been dropped.
|
|
615
|
-
return;
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
// Lock the mutex for the stripe that this key hashes to
|
|
619
|
-
size_t stripe_num = lock_map->GetStripe(key);
|
|
620
|
-
assert(lock_map->lock_map_stripes_.size() > stripe_num);
|
|
621
|
-
LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
|
|
622
|
-
|
|
623
|
-
stripe->stripe_mutex->Lock().PermitUncheckedError();
|
|
952
|
+
stripe->stripe_mutex->Lock().AssertOK();
|
|
624
953
|
UnLockKey(txn, key, stripe, lock_map, env);
|
|
625
954
|
stripe->stripe_mutex->UnLock();
|
|
626
955
|
|
|
@@ -662,7 +991,7 @@ void PointLockManager::UnLock(PessimisticTransaction* txn,
|
|
|
662
991
|
assert(lock_map->lock_map_stripes_.size() > stripe_num);
|
|
663
992
|
LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
|
|
664
993
|
|
|
665
|
-
stripe->stripe_mutex->Lock().
|
|
994
|
+
stripe->stripe_mutex->Lock().AssertOK();
|
|
666
995
|
|
|
667
996
|
for (const std::string* key : stripe_keys) {
|
|
668
997
|
UnLockKey(txn, *key, stripe, lock_map, env);
|
|
@@ -693,7 +1022,7 @@ PointLockManager::PointLockStatus PointLockManager::GetPointLockStatus() {
|
|
|
693
1022
|
const auto& stripes = lock_maps_[i]->lock_map_stripes_;
|
|
694
1023
|
// Iterate and lock all stripes in ascending order.
|
|
695
1024
|
for (const auto& j : stripes) {
|
|
696
|
-
j->stripe_mutex->Lock().
|
|
1025
|
+
j->stripe_mutex->Lock().AssertOK();
|
|
697
1026
|
for (const auto& it : j->keys) {
|
|
698
1027
|
struct KeyLockInfo info;
|
|
699
1028
|
info.exclusive = it.second.exclusive;
|
|
@@ -745,4 +1074,758 @@ void PointLockManager::UnLock(PessimisticTransaction* /* txn */,
|
|
|
745
1074
|
// no-op
|
|
746
1075
|
}
|
|
747
1076
|
|
|
1077
|
+
// PerKeyPointLockManager implementation
|
|
1078
|
+
PerKeyPointLockManager::PerKeyPointLockManager(PessimisticTransactionDB* db,
|
|
1079
|
+
const TransactionDBOptions& opt)
|
|
1080
|
+
: PointLockManager(db, opt) {}
|
|
1081
|
+
|
|
1082
|
+
void DebugLockStatus(TransactionID my_txn_id, const LockInfo& lock_info,
|
|
1083
|
+
const std::string& key,
|
|
1084
|
+
const KeyLockWaiterContext& key_lock_waiter_ctx) {
|
|
1085
|
+
if (kDebugLog) {
|
|
1086
|
+
char msg[512];
|
|
1087
|
+
size_t offset = 0;
|
|
1088
|
+
|
|
1089
|
+
// print lock holders
|
|
1090
|
+
offset += snprintf(msg + offset, sizeof(msg),
|
|
1091
|
+
"Txn %" PRIu64 ": LockStatus key %s: holder [",
|
|
1092
|
+
my_txn_id, key.c_str());
|
|
1093
|
+
for (const auto& txn_id : lock_info.txn_ids) {
|
|
1094
|
+
offset += snprintf(msg + offset, sizeof(msg), "%s%" PRIu64 ",",
|
|
1095
|
+
lock_info.exclusive ? "X" : "S", txn_id);
|
|
1096
|
+
}
|
|
1097
|
+
|
|
1098
|
+
// print waiter queue
|
|
1099
|
+
offset += snprintf(msg + offset, sizeof(msg), "], waiter_queue [");
|
|
1100
|
+
for (auto it = key_lock_waiter_ctx.waiter_queue->begin();
|
|
1101
|
+
it != key_lock_waiter_ctx.waiter_queue->end(); it++) {
|
|
1102
|
+
offset += snprintf(msg + offset, sizeof(msg), "%s%" PRIu64 ",",
|
|
1103
|
+
(*it)->exclusive ? "X" : "S", (*it)->id);
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
offset += snprintf(msg + offset, sizeof(msg), "]\n");
|
|
1107
|
+
fprintf(stderr, "%s", msg);
|
|
1108
|
+
fflush(stderr);
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
int64_t PerKeyPointLockManager::CalculateWaitEndTime(int64_t expire_time_hint,
|
|
1113
|
+
int64_t end_time) {
|
|
1114
|
+
int64_t cv_end_time = -1;
|
|
1115
|
+
if (expire_time_hint > 0 && end_time > 0) {
|
|
1116
|
+
cv_end_time = std::min(expire_time_hint, end_time);
|
|
1117
|
+
} else if (expire_time_hint > 0) {
|
|
1118
|
+
cv_end_time = expire_time_hint;
|
|
1119
|
+
} else if (end_time > 0) {
|
|
1120
|
+
cv_end_time = end_time;
|
|
1121
|
+
}
|
|
1122
|
+
return cv_end_time;
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
// Acquire lock within timeout.
|
|
1126
|
+
// This function is similar to PointLockManger::AcquireWithTimeout with
|
|
1127
|
+
// following differences.
|
|
1128
|
+
//
|
|
1129
|
+
// If deadlock_timeout_us is not 0, it first performs a wait without doing dead
|
|
1130
|
+
// lock detection. This wait duration is specified by deadlock_timeout_us.
|
|
1131
|
+
// If this wait times out and it is still not able to acquire the lock, perform
|
|
1132
|
+
// the deadlock detection before wait again.
|
|
1133
|
+
//
|
|
1134
|
+
// It uses a per key lock waiter queue to handle lock waiting and wake up
|
|
1135
|
+
// efficiently. When a transaction is waiting for acquiring a lock on a key, it
|
|
1136
|
+
// joins a wait queue that is dedicated for this key. It will either timeout, or
|
|
1137
|
+
// get woken up when it is its turn to take the lock. This is more efficient
|
|
1138
|
+
// than the PointLockManger implementation where all lock waiters wait on the
|
|
1139
|
+
// same lock stripe cond var.
|
|
1140
|
+
Status PerKeyPointLockManager::AcquireWithTimeout(
|
|
1141
|
+
PessimisticTransaction* txn, LockMap* lock_map, LockMapStripe* stripe,
|
|
1142
|
+
ColumnFamilyId column_family_id, const std::string& key, Env* env,
|
|
1143
|
+
int64_t timeout, int64_t deadlock_timeout_us,
|
|
1144
|
+
const LockInfo& txn_lock_info) {
|
|
1145
|
+
Status result;
|
|
1146
|
+
uint64_t end_time = 0;
|
|
1147
|
+
auto my_txn_id = txn_lock_info.txn_ids[0];
|
|
1148
|
+
|
|
1149
|
+
if (timeout > 0) {
|
|
1150
|
+
uint64_t start_time = env->NowMicros();
|
|
1151
|
+
end_time = start_time + timeout;
|
|
1152
|
+
}
|
|
1153
|
+
|
|
1154
|
+
if (timeout < 0) {
|
|
1155
|
+
// If timeout is negative, we wait indefinitely to acquire the lock
|
|
1156
|
+
result = stripe->stripe_mutex->Lock();
|
|
1157
|
+
} else {
|
|
1158
|
+
result = stripe->stripe_mutex->TryLockFor(timeout);
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
if (!result.ok()) {
|
|
1162
|
+
// failed to acquire mutex
|
|
1163
|
+
return result;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// Acquire lock if we are able to
|
|
1167
|
+
uint64_t expire_time_hint = 0;
|
|
1168
|
+
autovector<TransactionID> wait_ids;
|
|
1169
|
+
bool isUpgrade = false;
|
|
1170
|
+
|
|
1171
|
+
auto lock_info = stripe->GetLockInfo(key);
|
|
1172
|
+
|
|
1173
|
+
auto wait_before_deadlock_detection =
|
|
1174
|
+
txn->IsDeadlockDetect() && (deadlock_timeout_us > 0);
|
|
1175
|
+
result = AcquireLocked(
|
|
1176
|
+
lock_map, stripe, key, env, txn_lock_info, &expire_time_hint,
|
|
1177
|
+
// If wait before deadlock detection, it executes a fast path to save CPU
|
|
1178
|
+
// cycles, wait ids are not collected.
|
|
1179
|
+
wait_before_deadlock_detection ? nullptr : &wait_ids, &lock_info,
|
|
1180
|
+
&isUpgrade, true);
|
|
1181
|
+
if (!result.ok() && timeout != 0 &&
|
|
1182
|
+
/* No need to retry after reach lock limit or aborted */
|
|
1183
|
+
!result.IsLockLimit() && !result.IsAborted()) {
|
|
1184
|
+
assert(lock_info);
|
|
1185
|
+
|
|
1186
|
+
PERF_TIMER_GUARD(key_lock_wait_time);
|
|
1187
|
+
PERF_COUNTER_ADD(key_lock_wait_count, 1);
|
|
1188
|
+
// If we weren't able to acquire the lock, we will keep retrying as long
|
|
1189
|
+
// as the timeout allows.
|
|
1190
|
+
bool timed_out = false;
|
|
1191
|
+
bool cv_wait_fail = false;
|
|
1192
|
+
|
|
1193
|
+
KeyLockWaiterContext key_lock_waiter_ctx;
|
|
1194
|
+
|
|
1195
|
+
// Decide how long to wait
|
|
1196
|
+
auto cv_end_time = CalculateWaitEndTime(expire_time_hint, end_time);
|
|
1197
|
+
|
|
1198
|
+
// We will try to wait a little bit before checking deadlock, as
|
|
1199
|
+
// deadlock check is expensive.
|
|
1200
|
+
if (wait_before_deadlock_detection) {
|
|
1201
|
+
int64_t now = env->NowMicros();
|
|
1202
|
+
if (cv_end_time < 0 || cv_end_time > now) {
|
|
1203
|
+
if (kDebugLog) {
|
|
1204
|
+
// print lock status before deadlock detection
|
|
1205
|
+
fprintf(stderr,
|
|
1206
|
+
"Txn %" PRIu64
|
|
1207
|
+
" wait before deadlock detection %s, exclusive lock "
|
|
1208
|
+
"%d\n",
|
|
1209
|
+
my_txn_id, key.c_str(), txn_lock_info.exclusive);
|
|
1210
|
+
fflush(stderr);
|
|
1211
|
+
}
|
|
1212
|
+
stripe->JoinWaitQueue(*lock_info, my_txn_id, txn_lock_info.exclusive,
|
|
1213
|
+
false, key_lock_waiter_ctx);
|
|
1214
|
+
DebugLockStatus(my_txn_id, *lock_info, key, key_lock_waiter_ctx);
|
|
1215
|
+
|
|
1216
|
+
TEST_SYNC_POINT(
|
|
1217
|
+
"PerKeyPointLockManager::AcquireWithTimeout:"
|
|
1218
|
+
"WaitingTxnBeforeDeadLockDetection");
|
|
1219
|
+
result = stripe->WaitOnLock(
|
|
1220
|
+
key_lock_waiter_ctx.lock_waiter,
|
|
1221
|
+
std::min(cv_end_time - now, (int64_t)deadlock_timeout_us));
|
|
1222
|
+
assert(result.ok() || result.IsTimedOut());
|
|
1223
|
+
// Refresh lock info pointer, as this pointer is not guaranteed to be
|
|
1224
|
+
// stable in folly
|
|
1225
|
+
lock_info = stripe->GetLockInfo(key);
|
|
1226
|
+
// try to take a lock again to get wait ids after deadlock timeout
|
|
1227
|
+
result = AcquireLocked(lock_map, stripe, key, env, txn_lock_info,
|
|
1228
|
+
&expire_time_hint, &wait_ids, &lock_info,
|
|
1229
|
+
&isUpgrade, !result.ok());
|
|
1230
|
+
} else {
|
|
1231
|
+
// Already timed out
|
|
1232
|
+
timed_out = true;
|
|
1233
|
+
result = Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
|
|
1237
|
+
while (!result.ok() && !timed_out && !result.IsAborted()) {
|
|
1238
|
+
// Refresh wait end time
|
|
1239
|
+
cv_end_time = CalculateWaitEndTime(expire_time_hint, end_time);
|
|
1240
|
+
|
|
1241
|
+
// We are dependent on a transaction to finish, so perform deadlock
|
|
1242
|
+
// detection.
|
|
1243
|
+
if (!wait_ids.empty()) {
|
|
1244
|
+
if (txn->IsDeadlockDetect()) {
|
|
1245
|
+
if (IncrementWaiters(txn, wait_ids, key, column_family_id,
|
|
1246
|
+
txn_lock_info.exclusive, env)) {
|
|
1247
|
+
result = Status::Busy(Status::SubCode::kDeadlock);
|
|
1248
|
+
break;
|
|
1249
|
+
}
|
|
1250
|
+
}
|
|
1251
|
+
txn->SetWaitingTxn(wait_ids, column_family_id, &key);
|
|
1252
|
+
}
|
|
1253
|
+
|
|
1254
|
+
TEST_SYNC_POINT("PointLockManager::AcquireWithTimeout:WaitingTxn");
|
|
1255
|
+
|
|
1256
|
+
if (kDebugLog) {
|
|
1257
|
+
// print transaction lock status and wait ids
|
|
1258
|
+
char msg[512];
|
|
1259
|
+
size_t offset = 0;
|
|
1260
|
+
offset += snprintf(msg + offset, sizeof(msg),
|
|
1261
|
+
"Txn %" PRIu64
|
|
1262
|
+
" wait after deadlock detection %s, exclusive lock "
|
|
1263
|
+
"%d, upgrade %d, wait_ids [",
|
|
1264
|
+
my_txn_id, key.c_str(), txn_lock_info.exclusive,
|
|
1265
|
+
isUpgrade);
|
|
1266
|
+
|
|
1267
|
+
for (auto it = wait_ids.begin(); it != wait_ids.end(); it++) {
|
|
1268
|
+
offset += snprintf(msg + offset, sizeof(msg), "%" PRIu64 ",", *it);
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
offset += snprintf(msg + offset, sizeof(msg), "]\n");
|
|
1272
|
+
|
|
1273
|
+
fprintf(stderr, "%s", msg);
|
|
1274
|
+
fflush(stderr);
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
// If it has not joined wait queue, join it now.
|
|
1278
|
+
// If it is a lock upgrade, rejoin it.
|
|
1279
|
+
if (isUpgrade || (key_lock_waiter_ctx.waiter_queue == nullptr)) {
|
|
1280
|
+
stripe->JoinWaitQueue(*lock_info, my_txn_id, txn_lock_info.exclusive,
|
|
1281
|
+
isUpgrade, key_lock_waiter_ctx);
|
|
1282
|
+
|
|
1283
|
+
DebugLockStatus(my_txn_id, *lock_info, key, key_lock_waiter_ctx);
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
int64_t now = 0;
|
|
1287
|
+
if (cv_end_time < 0) {
|
|
1288
|
+
// Wait indefinitely
|
|
1289
|
+
result = stripe->WaitOnLock(key_lock_waiter_ctx.lock_waiter);
|
|
1290
|
+
cv_wait_fail = !result.ok();
|
|
1291
|
+
} else {
|
|
1292
|
+
now = env->NowMicros();
|
|
1293
|
+
if (cv_end_time > now) {
|
|
1294
|
+
result = stripe->WaitOnLock(key_lock_waiter_ctx.lock_waiter,
|
|
1295
|
+
cv_end_time - now);
|
|
1296
|
+
|
|
1297
|
+
cv_wait_fail = !result.ok() && !result.IsTimedOut();
|
|
1298
|
+
} else {
|
|
1299
|
+
// now >= cv_end_time, we already timed out
|
|
1300
|
+
result = Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
#ifndef NDEBUG
|
|
1305
|
+
stripe->stripe_mutex->UnLock();
|
|
1306
|
+
TEST_SYNC_POINT_CALLBACK(
|
|
1307
|
+
"PerKeyPointLockManager::AcquireWithTimeout:AfterWokenUp",
|
|
1308
|
+
&my_txn_id);
|
|
1309
|
+
TEST_SYNC_POINT(
|
|
1310
|
+
"PerKeyPointLockManager::AcquireWithTimeout:BeforeTakeLock");
|
|
1311
|
+
auto lock_status = stripe->stripe_mutex->Lock();
|
|
1312
|
+
assert(lock_status.ok());
|
|
1313
|
+
#endif
|
|
1314
|
+
|
|
1315
|
+
if (!wait_ids.empty()) {
|
|
1316
|
+
txn->ClearWaitingTxn();
|
|
1317
|
+
if (txn->IsDeadlockDetect()) {
|
|
1318
|
+
DecrementWaiters(txn, wait_ids);
|
|
1319
|
+
}
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
if (cv_wait_fail) {
|
|
1323
|
+
break;
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
if (result.IsTimedOut()) {
|
|
1327
|
+
timed_out = true;
|
|
1328
|
+
// Even though we timed out, we will still make one more attempt to
|
|
1329
|
+
// acquire lock below (it is possible the lock expired and we
|
|
1330
|
+
// were never signaled).
|
|
1331
|
+
}
|
|
1332
|
+
assert(result.ok() || result.IsTimedOut());
|
|
1333
|
+
|
|
1334
|
+
// Refresh lock info pointer, as this pointer is not guaranteed to be
|
|
1335
|
+
// stable in folly
|
|
1336
|
+
lock_info = stripe->GetLockInfo(key);
|
|
1337
|
+
|
|
1338
|
+
// Try to get the lock again.
|
|
1339
|
+
result = AcquireLocked(
|
|
1340
|
+
lock_map, stripe, key, env, txn_lock_info, &expire_time_hint,
|
|
1341
|
+
&wait_ids, &lock_info, &isUpgrade,
|
|
1342
|
+
/* If wait is timed out, it means it is not its turn to take the lock.
|
|
1343
|
+
* Therefore, it should still follow FIFO order. */
|
|
1344
|
+
timed_out);
|
|
1345
|
+
auto fail_to_take_lock_on_its_turn = !timed_out && !result.ok();
|
|
1346
|
+
if (fail_to_take_lock_on_its_turn) {
|
|
1347
|
+
// If it is its turn, but it failed to take lock, something is broken.
|
|
1348
|
+
// Assert this should not happen in debug build during testing.
|
|
1349
|
+
// In prod, it simply gives up the attempt.
|
|
1350
|
+
assert(!fail_to_take_lock_on_its_turn);
|
|
1351
|
+
break;
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
if (!result.ok() && cv_end_time >= 0) {
|
|
1355
|
+
if (static_cast<int64_t>(end_time) <= now) {
|
|
1356
|
+
// lock timeout timed out
|
|
1357
|
+
result = Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1358
|
+
timed_out = true;
|
|
1359
|
+
}
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
|
|
1363
|
+
// For any reason that the transaction failed to acquire the lock, it should
|
|
1364
|
+
// try to wake up next waiters, if they are ready to proceed.
|
|
1365
|
+
if (!result.ok()) {
|
|
1366
|
+
key_lock_waiter_ctx.TryWakeUpNextWaiters(*lock_info, key);
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
stripe->stripe_mutex->UnLock();
|
|
1371
|
+
|
|
1372
|
+
// On timeout, persist the lock information so we can debug the contention
|
|
1373
|
+
if (result.IsTimedOut()) {
|
|
1374
|
+
txn->SetWaitingTxn(wait_ids, column_family_id, &key, true);
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
return result;
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
Status PerKeyPointLockManager::FillWaitIds(LockInfo& lock_info,
|
|
1381
|
+
const LockInfo& txn_lock_info,
|
|
1382
|
+
autovector<TransactionID>* wait_ids,
|
|
1383
|
+
bool& isUpgrade,
|
|
1384
|
+
TransactionID& my_txn_id,
|
|
1385
|
+
const std::string& key) {
|
|
1386
|
+
if (wait_ids != nullptr) {
|
|
1387
|
+
for (auto id : lock_info.txn_ids) {
|
|
1388
|
+
// A transaction is not blocked by itself
|
|
1389
|
+
if (id != my_txn_id) {
|
|
1390
|
+
wait_ids->push_back(id);
|
|
1391
|
+
} else {
|
|
1392
|
+
// Itself is already holding a lock, so it is either an upgrade or
|
|
1393
|
+
// downgrade. Downgrade has already been handled above. Assert it
|
|
1394
|
+
// is an upgrade here.
|
|
1395
|
+
auto is_upgrade = !lock_info.exclusive && txn_lock_info.exclusive;
|
|
1396
|
+
if (!is_upgrade) {
|
|
1397
|
+
if (kDebugLog) {
|
|
1398
|
+
fprintf(stderr,
|
|
1399
|
+
"txn id %" PRIu64 " assert failed on lock upgrade key %s\n",
|
|
1400
|
+
my_txn_id, key.c_str());
|
|
1401
|
+
fflush(stderr);
|
|
1402
|
+
}
|
|
1403
|
+
assert(is_upgrade);
|
|
1404
|
+
return Status::Aborted(Status::SubCode::kNotExpectedCodePath);
|
|
1405
|
+
}
|
|
1406
|
+
isUpgrade = true;
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1409
|
+
}
|
|
1410
|
+
return Status::OK();
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
// This function is similar to PointLockManager::AcquireLocked with following
|
|
1414
|
+
// differences.
|
|
1415
|
+
//
|
|
1416
|
+
// It introduces a per key lock waiter queue. When it tries to take the lock, it
|
|
1417
|
+
// will first check whether there are other transactions already in the waiter
|
|
1418
|
+
// queue, if so it will return TimeOut. Caller will join the waiter queue, if
|
|
1419
|
+
// lock timeout is not reached yet. When it is its to take the lock, it will be
|
|
1420
|
+
// woken up and take the lock.
|
|
1421
|
+
//
|
|
1422
|
+
// It introduces a fast path check that will quickly check whether the lock
|
|
1423
|
+
// could be obtained without gathering waiter id information. This allows
|
|
1424
|
+
// transaction to sleep a short time before perform deadlock detection.
|
|
1425
|
+
//
|
|
1426
|
+
// @param lock_info_ptr: pointer to the LockInfo associated with the key. If the
|
|
1427
|
+
// key is already locked, LockInfo will be not null. If not, LockInfo is
|
|
1428
|
+
// null, and a new LockInfo is created and assigned to lock_info_ptr.
|
|
1429
|
+
//
|
|
1430
|
+
// @param wait_ids: When wait_ids is nullptr, it perform a fast path check to
|
|
1431
|
+
// see whether it could take the lock, it does not fill waiter_ids. If
|
|
1432
|
+
// wait_ids is not nullptr, it will fill the wait_ids with the lock holder.
|
|
1433
|
+
//
|
|
1434
|
+
// @param isUpgrade: isUpgrade is set to true, if the transaction tries to
|
|
1435
|
+
// uprade a lock to exclusive, but it needs to wait for other lock holders to
|
|
1436
|
+
// release the shared locks. Note that isUpgrade is not set on fast path
|
|
1437
|
+
// check.
|
|
1438
|
+
//
|
|
1439
|
+
// @param fifo: fifo flag indicates whether it should follow fifo order to check
|
|
1440
|
+
// whether there is already a waiter waiting for the lock or not. If fifo is
|
|
1441
|
+
// true and there is already a lock waiter waiting in the queue and it is not
|
|
1442
|
+
// itself, return TimedOut. If fifo is false, it means it is its turn to take
|
|
1443
|
+
// the lock.
|
|
1444
|
+
Status PerKeyPointLockManager::AcquireLocked(
|
|
1445
|
+
LockMap* lock_map, LockMapStripe* stripe, const std::string& key, Env* env,
|
|
1446
|
+
const LockInfo& txn_lock_info, uint64_t* expire_time,
|
|
1447
|
+
autovector<TransactionID>* wait_ids, LockInfo** lock_info_ptr,
|
|
1448
|
+
bool* isUpgrade, bool fifo) {
|
|
1449
|
+
assert(txn_lock_info.txn_ids.size() == 1);
|
|
1450
|
+
|
|
1451
|
+
if (wait_ids != nullptr) {
|
|
1452
|
+
wait_ids->clear();
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
*isUpgrade = false;
|
|
1456
|
+
auto my_txn_id = txn_lock_info.txn_ids[0];
|
|
1457
|
+
|
|
1458
|
+
if (!*lock_info_ptr) {
|
|
1459
|
+
// No lock nor waiter on this key, so it can try to acquire the lock
|
|
1460
|
+
// directly
|
|
1461
|
+
if (max_num_locks_ > 0 &&
|
|
1462
|
+
lock_map->locked_key_cnt.LoadRelaxed() >= max_num_locks_) {
|
|
1463
|
+
return Status::LockLimit();
|
|
1464
|
+
} else {
|
|
1465
|
+
// acquire lock
|
|
1466
|
+
auto ret = stripe->keys.try_emplace(key, my_txn_id,
|
|
1467
|
+
txn_lock_info.expiration_time,
|
|
1468
|
+
txn_lock_info.exclusive);
|
|
1469
|
+
assert(ret.second);
|
|
1470
|
+
*lock_info_ptr = &(ret.first->second);
|
|
1471
|
+
|
|
1472
|
+
// Maintain lock count if there is a limit on the number of locks
|
|
1473
|
+
if (max_num_locks_ > 0) {
|
|
1474
|
+
lock_map->locked_key_cnt.FetchAddRelaxed(1);
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1477
|
+
return Status::OK();
|
|
1478
|
+
}
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
auto& lock_info = **lock_info_ptr;
|
|
1482
|
+
auto locked = !lock_info.txn_ids.empty();
|
|
1483
|
+
auto solo_lock_owner =
|
|
1484
|
+
(lock_info.txn_ids.size() == 1) && (lock_info.txn_ids[0] == my_txn_id);
|
|
1485
|
+
|
|
1486
|
+
// Handle lock downgrade and reentrant first, it should always succeed
|
|
1487
|
+
if (locked) {
|
|
1488
|
+
if (solo_lock_owner) {
|
|
1489
|
+
// Lock is already owned by itself.
|
|
1490
|
+
if (lock_info.exclusive && !txn_lock_info.exclusive) {
|
|
1491
|
+
// For downgrade, wake up all the shared lock waiters at the front of
|
|
1492
|
+
// the waiter queue
|
|
1493
|
+
if (lock_info.waiter_queue != nullptr) {
|
|
1494
|
+
for (auto& waiter : *lock_info.waiter_queue) {
|
|
1495
|
+
if (waiter->exclusive) {
|
|
1496
|
+
break;
|
|
1497
|
+
}
|
|
1498
|
+
waiter->Notify();
|
|
1499
|
+
DebugWakeUpWaiter(my_txn_id, waiter->id, key, "Lock Downgrade");
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
}
|
|
1503
|
+
|
|
1504
|
+
if (lock_info.exclusive || !txn_lock_info.exclusive) {
|
|
1505
|
+
// If it is lock downgrade or re-entrant, grant it immediately
|
|
1506
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
1507
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
1508
|
+
return Status::OK();
|
|
1509
|
+
}
|
|
1510
|
+
} else {
|
|
1511
|
+
// handle read reentrant lock for non solo lock owner case
|
|
1512
|
+
// Check whether the transaction already hold a shared lock and it is
|
|
1513
|
+
// trying to acquire it again.
|
|
1514
|
+
if (!txn_lock_info.exclusive && !lock_info.exclusive) {
|
|
1515
|
+
auto lock_it = std::find(lock_info.txn_ids.begin(),
|
|
1516
|
+
lock_info.txn_ids.end(), my_txn_id);
|
|
1517
|
+
if (lock_it != lock_info.txn_ids.end()) {
|
|
1518
|
+
lock_info.expiration_time = std::max(lock_info.expiration_time,
|
|
1519
|
+
txn_lock_info.expiration_time);
|
|
1520
|
+
return Status::OK();
|
|
1521
|
+
}
|
|
1522
|
+
}
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
auto has_waiter =
|
|
1527
|
+
(lock_info.waiter_queue != nullptr) && !lock_info.waiter_queue->empty();
|
|
1528
|
+
|
|
1529
|
+
// Update solo lock owner for the rest of the cases
|
|
1530
|
+
if (solo_lock_owner) {
|
|
1531
|
+
// If there is a shared lock waiter that is ready to take the lock, the
|
|
1532
|
+
// current transaction would not be the solo lock owner.
|
|
1533
|
+
auto has_ready_shared_lock_waiter =
|
|
1534
|
+
has_waiter && lock_info.waiter_queue->front()->IsReady() &&
|
|
1535
|
+
(!lock_info.waiter_queue->front()->exclusive);
|
|
1536
|
+
solo_lock_owner = !has_ready_shared_lock_waiter;
|
|
1537
|
+
}
|
|
1538
|
+
|
|
1539
|
+
// If myself is the first waiter in the queue, skip checking waiter queue
|
|
1540
|
+
auto is_first_waiter =
|
|
1541
|
+
has_waiter && (lock_info.waiter_queue->front()->id == my_txn_id);
|
|
1542
|
+
|
|
1543
|
+
if (fifo && has_waiter && !is_first_waiter) {
|
|
1544
|
+
// There are other waiters ahead of myself
|
|
1545
|
+
{
|
|
1546
|
+
// handle shared lock request on a shared lock with only shared lock
|
|
1547
|
+
// waiters
|
|
1548
|
+
if (!txn_lock_info.exclusive &&
|
|
1549
|
+
(!locked || (locked && !lock_info.exclusive))) {
|
|
1550
|
+
bool has_exclusive_waiter = false;
|
|
1551
|
+
// check whether there is exclusive lock waiter
|
|
1552
|
+
for (auto& waiter : *lock_info.waiter_queue) {
|
|
1553
|
+
if (waiter->exclusive) {
|
|
1554
|
+
has_exclusive_waiter = true;
|
|
1555
|
+
break;
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
if (!has_exclusive_waiter) {
|
|
1559
|
+
// no X waiter in the queue, so it can acquire the lock without
|
|
1560
|
+
// waiting
|
|
1561
|
+
lock_info.txn_ids.push_back(my_txn_id);
|
|
1562
|
+
lock_info.exclusive = false;
|
|
1563
|
+
lock_info.expiration_time = std::max(lock_info.expiration_time,
|
|
1564
|
+
txn_lock_info.expiration_time);
|
|
1565
|
+
return Status::OK();
|
|
1566
|
+
}
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
|
|
1570
|
+
// fast path check for lock upgrade
|
|
1571
|
+
if (solo_lock_owner && !lock_info.exclusive && txn_lock_info.exclusive) {
|
|
1572
|
+
// During lock upgrade, if it is the only transaction owns the lock and no
|
|
1573
|
+
// other shared lock requesting transaction is ready to take the lock,
|
|
1574
|
+
// prioritize the lock grade and grant it now.
|
|
1575
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
1576
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
1577
|
+
return Status::OK();
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
if (wait_ids == nullptr) {
|
|
1581
|
+
// If wait_ids is nullptr, it is a fast path check to see whether it is
|
|
1582
|
+
// able to take the lock or not, skip filling the waiting txn ids for
|
|
1583
|
+
// deadlock detection.
|
|
1584
|
+
return Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
// For other cases with fifo and lock waiter, try to wait in the queue
|
|
1588
|
+
// and fill the waiting txn list
|
|
1589
|
+
auto s = FillWaitIds(lock_info, txn_lock_info, wait_ids, *isUpgrade,
|
|
1590
|
+
my_txn_id, key);
|
|
1591
|
+
if (!s.ok()) {
|
|
1592
|
+
// propagate error up
|
|
1593
|
+
return s;
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1596
|
+
// Add the waiter txn ids to the blocking txn id list
|
|
1597
|
+
if (txn_lock_info.exclusive) {
|
|
1598
|
+
// For exclusive lock, it traverse the queue from front to back to
|
|
1599
|
+
// handle upgrade
|
|
1600
|
+
for (auto& waiter : *lock_info.waiter_queue) {
|
|
1601
|
+
// For upgrade locks, it will be placed at the beginning of
|
|
1602
|
+
// the queue. However, for shared lock waiters that are at
|
|
1603
|
+
// the beginning of the queue that got woken up but haven't
|
|
1604
|
+
// taken the lock yet, they should still be added to the
|
|
1605
|
+
// blocking txn id list.
|
|
1606
|
+
if (*isUpgrade && waiter->exclusive) {
|
|
1607
|
+
break;
|
|
1608
|
+
}
|
|
1609
|
+
if (waiter->id != my_txn_id) {
|
|
1610
|
+
wait_ids->push_back(waiter->id);
|
|
1611
|
+
}
|
|
1612
|
+
}
|
|
1613
|
+
} else {
|
|
1614
|
+
// For shared lock, skip the S lock waiters at the end of the queue, as
|
|
1615
|
+
// they will be waked up together. Therefore, it traverses the queue from
|
|
1616
|
+
// from back to front.
|
|
1617
|
+
bool skip_shared_lock_waiter = true;
|
|
1618
|
+
for (auto it = lock_info.waiter_queue->rbegin();
|
|
1619
|
+
it != lock_info.waiter_queue->rend(); ++it) {
|
|
1620
|
+
if ((*it)->exclusive) {
|
|
1621
|
+
skip_shared_lock_waiter = false;
|
|
1622
|
+
} else {
|
|
1623
|
+
if (skip_shared_lock_waiter) {
|
|
1624
|
+
continue;
|
|
1625
|
+
}
|
|
1626
|
+
}
|
|
1627
|
+
if ((*it)->id != my_txn_id) {
|
|
1628
|
+
wait_ids->push_back((*it)->id);
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
}
|
|
1632
|
+
|
|
1633
|
+
return Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1634
|
+
} else {
|
|
1635
|
+
// there is no waiter or it is its turn to take the lock
|
|
1636
|
+
if (!locked) {
|
|
1637
|
+
// no lock on this key, acquire it directly
|
|
1638
|
+
lock_info.txn_ids = txn_lock_info.txn_ids;
|
|
1639
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
1640
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
1641
|
+
return Status::OK();
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
if (IsLockExpired(my_txn_id, lock_info, env, expire_time)) {
|
|
1645
|
+
// current lock is expired, steal it.
|
|
1646
|
+
lock_info.txn_ids = txn_lock_info.txn_ids;
|
|
1647
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
1648
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
1649
|
+
return Status::OK();
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// Check lock compatibility
|
|
1653
|
+
if (txn_lock_info.exclusive) {
|
|
1654
|
+
// handle lock upgrade
|
|
1655
|
+
if (solo_lock_owner) {
|
|
1656
|
+
// Lock re-entrant or downgrade has already been handled above.
|
|
1657
|
+
// Assert it is an upgrade here. Acquire the lock directly.
|
|
1658
|
+
assert(!lock_info.exclusive);
|
|
1659
|
+
lock_info.exclusive = txn_lock_info.exclusive;
|
|
1660
|
+
lock_info.expiration_time = txn_lock_info.expiration_time;
|
|
1661
|
+
return Status::OK();
|
|
1662
|
+
} else {
|
|
1663
|
+
// lock is already owned by other transactions
|
|
1664
|
+
auto s = FillWaitIds(lock_info, txn_lock_info, wait_ids, *isUpgrade,
|
|
1665
|
+
my_txn_id, key);
|
|
1666
|
+
if (!s.ok()) {
|
|
1667
|
+
// propagate error up
|
|
1668
|
+
return s;
|
|
1669
|
+
}
|
|
1670
|
+
return Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1671
|
+
}
|
|
1672
|
+
} else {
|
|
1673
|
+
// handle shared lock request
|
|
1674
|
+
if (lock_info.exclusive) {
|
|
1675
|
+
// lock is already owned by other exclusive lock
|
|
1676
|
+
auto s = FillWaitIds(lock_info, txn_lock_info, wait_ids, *isUpgrade,
|
|
1677
|
+
my_txn_id, key);
|
|
1678
|
+
if (!s.ok()) {
|
|
1679
|
+
// propagate error up
|
|
1680
|
+
return s;
|
|
1681
|
+
}
|
|
1682
|
+
return Status::TimedOut(Status::SubCode::kLockTimeout);
|
|
1683
|
+
} else {
|
|
1684
|
+
// lock is on shared lock state, acquire it
|
|
1685
|
+
lock_info.txn_ids.push_back(my_txn_id);
|
|
1686
|
+
// update the expiration time
|
|
1687
|
+
lock_info.expiration_time =
|
|
1688
|
+
std::max(lock_info.expiration_time, txn_lock_info.expiration_time);
|
|
1689
|
+
return Status::OK();
|
|
1690
|
+
}
|
|
1691
|
+
}
|
|
1692
|
+
}
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1695
|
+
void PerKeyPointLockManager::UnLockKey(PessimisticTransaction* txn,
|
|
1696
|
+
const std::string& key,
|
|
1697
|
+
LockMapStripe* stripe, LockMap* lock_map,
|
|
1698
|
+
Env* env) {
|
|
1699
|
+
#ifdef NDEBUG
|
|
1700
|
+
(void)env;
|
|
1701
|
+
#endif
|
|
1702
|
+
TransactionID txn_id = txn->GetID();
|
|
1703
|
+
|
|
1704
|
+
auto stripe_iter = stripe->keys.find(key);
|
|
1705
|
+
if (stripe_iter != stripe->keys.end()) {
|
|
1706
|
+
auto& lock_info = stripe_iter->second;
|
|
1707
|
+
auto& txns = lock_info.txn_ids;
|
|
1708
|
+
auto txn_it = std::find(txns.begin(), txns.end(), txn_id);
|
|
1709
|
+
|
|
1710
|
+
if (txn_it != txns.end()) {
|
|
1711
|
+
// If the lock was held in exclusive mode, only one transaction should
|
|
1712
|
+
// holding it.
|
|
1713
|
+
if (lock_info.exclusive) {
|
|
1714
|
+
assert(txns.size() == 1);
|
|
1715
|
+
stripe->ReleaseLastLockHolder(lock_info, stripe_iter, lock_map, txn_id,
|
|
1716
|
+
key, max_num_locks_, txns, txn_it);
|
|
1717
|
+
} else {
|
|
1718
|
+
// In shared mode, it is possible that another transaction is holding
|
|
1719
|
+
// a shared lock and is waiting to upgrade the lock to exclusive.
|
|
1720
|
+
assert(txns.size() >= 1);
|
|
1721
|
+
if (txns.size() > 2) {
|
|
1722
|
+
// Including the current transaction, if there are more than 2
|
|
1723
|
+
// transactions holding the lock in shared mode, don't wake up any
|
|
1724
|
+
// waiter, as the next waiter will not be able to acquire the lock
|
|
1725
|
+
// anyway.
|
|
1726
|
+
RemoveTransaction(txns, txn_it);
|
|
1727
|
+
} else if (txns.size() == 2) {
|
|
1728
|
+
// remove the current transaction first.
|
|
1729
|
+
RemoveTransaction(txns, txn_it);
|
|
1730
|
+
// Check whether the one remained is trying to upgrade the lock by
|
|
1731
|
+
// checking whether its id matches.
|
|
1732
|
+
auto& waiter_queue = lock_info.waiter_queue;
|
|
1733
|
+
if (waiter_queue != nullptr && !waiter_queue->empty() &&
|
|
1734
|
+
waiter_queue->front()->id == txns[0]) {
|
|
1735
|
+
// There are waiters in the queue and the next one is same as the
|
|
1736
|
+
// only one that is still holding the shared lock, wake the waiter
|
|
1737
|
+
// up
|
|
1738
|
+
waiter_queue->front()->Notify();
|
|
1739
|
+
DebugWakeUpWaiter(txn_id, waiter_queue->front()->id, key,
|
|
1740
|
+
"Lock Upgrade");
|
|
1741
|
+
}
|
|
1742
|
+
} else {
|
|
1743
|
+
// Current transaction is the only one holding the shared lock
|
|
1744
|
+
stripe->ReleaseLastLockHolder(lock_info, stripe_iter, lock_map,
|
|
1745
|
+
txn_id, key, max_num_locks_, txns,
|
|
1746
|
+
txn_it);
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
} else {
|
|
1751
|
+
// This key is either not locked or locked by someone else. This should
|
|
1752
|
+
// only happen if the unlocking transaction has expired.
|
|
1753
|
+
assert(txn->GetExpirationTime() > 0 &&
|
|
1754
|
+
txn->GetExpirationTime() < env->NowMicros());
|
|
1755
|
+
}
|
|
1756
|
+
}
|
|
1757
|
+
|
|
1758
|
+
void PerKeyPointLockManager::UnLock(PessimisticTransaction* txn,
|
|
1759
|
+
ColumnFamilyId column_family_id,
|
|
1760
|
+
const std::string& key, Env* env) {
|
|
1761
|
+
std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(column_family_id);
|
|
1762
|
+
LockMap* lock_map = lock_map_ptr.get();
|
|
1763
|
+
if (lock_map == nullptr) {
|
|
1764
|
+
// Column Family must have been dropped.
|
|
1765
|
+
return;
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
// Lock the mutex for the stripe that this key hashes to
|
|
1769
|
+
size_t stripe_num = lock_map->GetStripe(key);
|
|
1770
|
+
assert(lock_map->lock_map_stripes_.size() > stripe_num);
|
|
1771
|
+
LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
|
|
1772
|
+
|
|
1773
|
+
stripe->stripe_mutex->Lock().AssertOK();
|
|
1774
|
+
UnLockKey(txn, key, stripe, lock_map, env);
|
|
1775
|
+
stripe->stripe_mutex->UnLock();
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
void PerKeyPointLockManager::UnLock(PessimisticTransaction* txn,
|
|
1779
|
+
const LockTracker& tracker, Env* env) {
|
|
1780
|
+
std::unique_ptr<LockTracker::ColumnFamilyIterator> cf_it(
|
|
1781
|
+
tracker.GetColumnFamilyIterator());
|
|
1782
|
+
assert(cf_it != nullptr);
|
|
1783
|
+
while (cf_it->HasNext()) {
|
|
1784
|
+
ColumnFamilyId cf = cf_it->Next();
|
|
1785
|
+
std::shared_ptr<LockMap> lock_map_ptr = GetLockMap(cf);
|
|
1786
|
+
LockMap* lock_map = lock_map_ptr.get();
|
|
1787
|
+
if (!lock_map) {
|
|
1788
|
+
// Column Family must have been dropped.
|
|
1789
|
+
return;
|
|
1790
|
+
}
|
|
1791
|
+
|
|
1792
|
+
// Bucket keys by lock_map_ stripe
|
|
1793
|
+
UnorderedMap<size_t, std::vector<const std::string*>> keys_by_stripe(
|
|
1794
|
+
lock_map->num_stripes_);
|
|
1795
|
+
std::unique_ptr<LockTracker::KeyIterator> key_it(
|
|
1796
|
+
tracker.GetKeyIterator(cf));
|
|
1797
|
+
assert(key_it != nullptr);
|
|
1798
|
+
while (key_it->HasNext()) {
|
|
1799
|
+
const std::string& key = key_it->Next();
|
|
1800
|
+
size_t stripe_num = lock_map->GetStripe(key);
|
|
1801
|
+
keys_by_stripe[stripe_num].push_back(&key);
|
|
1802
|
+
}
|
|
1803
|
+
|
|
1804
|
+
// For each stripe, grab the stripe mutex and unlock all keys in this
|
|
1805
|
+
// stripe
|
|
1806
|
+
for (auto& stripe_iter : keys_by_stripe) {
|
|
1807
|
+
size_t stripe_num = stripe_iter.first;
|
|
1808
|
+
auto& stripe_keys = stripe_iter.second;
|
|
1809
|
+
|
|
1810
|
+
assert(lock_map->lock_map_stripes_.size() > stripe_num);
|
|
1811
|
+
LockMapStripe* stripe = lock_map->lock_map_stripes_.at(stripe_num);
|
|
1812
|
+
|
|
1813
|
+
stripe->stripe_mutex->Lock().AssertOK();
|
|
1814
|
+
|
|
1815
|
+
for (const std::string* key : stripe_keys) {
|
|
1816
|
+
UnLockKey(txn, *key, stripe, lock_map, env);
|
|
1817
|
+
}
|
|
1818
|
+
|
|
1819
|
+
stripe->stripe_mutex->UnLock();
|
|
1820
|
+
}
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
void PerKeyPointLockManager::UnLock(PessimisticTransaction* /* txn */,
|
|
1825
|
+
ColumnFamilyId /* cf_id */,
|
|
1826
|
+
const Endpoint& /* start */,
|
|
1827
|
+
const Endpoint& /* end */, Env* /* env */) {
|
|
1828
|
+
// no-op
|
|
1829
|
+
}
|
|
1830
|
+
|
|
748
1831
|
} // namespace ROCKSDB_NAMESPACE
|