mdbxmou 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +32 -0
- package/.github/workflows/publish.yml +27 -0
- package/.gitmodules +3 -0
- package/CMakeLists.txt +53 -0
- package/LICENSE +201 -0
- package/README.md +639 -0
- package/build.js +11 -0
- package/deps/libmdbx/.clang-format +3 -0
- package/deps/libmdbx/.cmake-format.yaml +3 -0
- package/deps/libmdbx/.le.ini +40 -0
- package/deps/libmdbx/CMakeLists.txt +1269 -0
- package/deps/libmdbx/COPYRIGHT +159 -0
- package/deps/libmdbx/ChangeLog.md +2786 -0
- package/deps/libmdbx/GNUmakefile +950 -0
- package/deps/libmdbx/LICENSE +177 -0
- package/deps/libmdbx/Makefile +16 -0
- package/deps/libmdbx/NOTICE +39 -0
- package/deps/libmdbx/README.md +863 -0
- package/deps/libmdbx/TODO.md +43 -0
- package/deps/libmdbx/cmake/compiler.cmake +1221 -0
- package/deps/libmdbx/cmake/profile.cmake +58 -0
- package/deps/libmdbx/cmake/utils.cmake +524 -0
- package/deps/libmdbx/conanfile.py +323 -0
- package/deps/libmdbx/docs/Doxyfile.in +2734 -0
- package/deps/libmdbx/docs/_preface.md +47 -0
- package/deps/libmdbx/docs/_restrictions.md +248 -0
- package/deps/libmdbx/docs/_starting.md +245 -0
- package/deps/libmdbx/docs/_toc.md +34 -0
- package/deps/libmdbx/docs/header.html +96 -0
- package/deps/libmdbx/example/CMakeLists.txt +6 -0
- package/deps/libmdbx/example/README.md +1 -0
- package/deps/libmdbx/example/example-mdbx.c +154 -0
- package/deps/libmdbx/example/sample-bdb.txt +77 -0
- package/deps/libmdbx/mdbx.h +6655 -0
- package/deps/libmdbx/mdbx.h++ +6428 -0
- package/deps/libmdbx/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +173 -0
- package/deps/libmdbx/src/alloy.c +54 -0
- package/deps/libmdbx/src/api-cold.c +543 -0
- package/deps/libmdbx/src/api-copy.c +912 -0
- package/deps/libmdbx/src/api-cursor.c +754 -0
- package/deps/libmdbx/src/api-dbi.c +315 -0
- package/deps/libmdbx/src/api-env.c +1434 -0
- package/deps/libmdbx/src/api-extra.c +165 -0
- package/deps/libmdbx/src/api-key-transform.c +197 -0
- package/deps/libmdbx/src/api-misc.c +286 -0
- package/deps/libmdbx/src/api-opts.c +575 -0
- package/deps/libmdbx/src/api-range-estimate.c +365 -0
- package/deps/libmdbx/src/api-txn-data.c +454 -0
- package/deps/libmdbx/src/api-txn.c +921 -0
- package/deps/libmdbx/src/atomics-ops.h +364 -0
- package/deps/libmdbx/src/atomics-types.h +97 -0
- package/deps/libmdbx/src/audit.c +109 -0
- package/deps/libmdbx/src/bits.md +34 -0
- package/deps/libmdbx/src/chk.c +1796 -0
- package/deps/libmdbx/src/cogs.c +309 -0
- package/deps/libmdbx/src/cogs.h +506 -0
- package/deps/libmdbx/src/coherency.c +170 -0
- package/deps/libmdbx/src/config.h.in +88 -0
- package/deps/libmdbx/src/cursor.c +2396 -0
- package/deps/libmdbx/src/cursor.h +391 -0
- package/deps/libmdbx/src/dbi.c +717 -0
- package/deps/libmdbx/src/dbi.h +142 -0
- package/deps/libmdbx/src/debug_begin.h +36 -0
- package/deps/libmdbx/src/debug_end.h +15 -0
- package/deps/libmdbx/src/dpl.c +486 -0
- package/deps/libmdbx/src/dpl.h +134 -0
- package/deps/libmdbx/src/dxb.c +1335 -0
- package/deps/libmdbx/src/env.c +607 -0
- package/deps/libmdbx/src/essentials.h +125 -0
- package/deps/libmdbx/src/gc-get.c +1345 -0
- package/deps/libmdbx/src/gc-put.c +970 -0
- package/deps/libmdbx/src/gc.h +40 -0
- package/deps/libmdbx/src/global.c +474 -0
- package/deps/libmdbx/src/internals.h +585 -0
- package/deps/libmdbx/src/layout-dxb.h +288 -0
- package/deps/libmdbx/src/layout-lck.h +289 -0
- package/deps/libmdbx/src/lck-posix.c +859 -0
- package/deps/libmdbx/src/lck-windows.c +607 -0
- package/deps/libmdbx/src/lck.c +174 -0
- package/deps/libmdbx/src/lck.h +110 -0
- package/deps/libmdbx/src/logging_and_debug.c +250 -0
- package/deps/libmdbx/src/logging_and_debug.h +159 -0
- package/deps/libmdbx/src/man1/mdbx_chk.1 +106 -0
- package/deps/libmdbx/src/man1/mdbx_copy.1 +95 -0
- package/deps/libmdbx/src/man1/mdbx_drop.1 +48 -0
- package/deps/libmdbx/src/man1/mdbx_dump.1 +101 -0
- package/deps/libmdbx/src/man1/mdbx_load.1 +105 -0
- package/deps/libmdbx/src/man1/mdbx_stat.1 +86 -0
- package/deps/libmdbx/src/mdbx.c++ +1837 -0
- package/deps/libmdbx/src/meta.c +656 -0
- package/deps/libmdbx/src/meta.h +168 -0
- package/deps/libmdbx/src/mvcc-readers.c +414 -0
- package/deps/libmdbx/src/node.c +365 -0
- package/deps/libmdbx/src/node.h +102 -0
- package/deps/libmdbx/src/ntdll.def +1246 -0
- package/deps/libmdbx/src/options.h +534 -0
- package/deps/libmdbx/src/osal.c +3485 -0
- package/deps/libmdbx/src/osal.h +587 -0
- package/deps/libmdbx/src/page-get.c +483 -0
- package/deps/libmdbx/src/page-iov.c +185 -0
- package/deps/libmdbx/src/page-iov.h +34 -0
- package/deps/libmdbx/src/page-ops.c +744 -0
- package/deps/libmdbx/src/page-ops.h +142 -0
- package/deps/libmdbx/src/pnl.c +236 -0
- package/deps/libmdbx/src/pnl.h +146 -0
- package/deps/libmdbx/src/preface.h +990 -0
- package/deps/libmdbx/src/proto.h +105 -0
- package/deps/libmdbx/src/refund.c +212 -0
- package/deps/libmdbx/src/sort.h +484 -0
- package/deps/libmdbx/src/spill.c +431 -0
- package/deps/libmdbx/src/spill.h +74 -0
- package/deps/libmdbx/src/table.c +107 -0
- package/deps/libmdbx/src/tls.c +551 -0
- package/deps/libmdbx/src/tls.h +43 -0
- package/deps/libmdbx/src/tools/chk.c +673 -0
- package/deps/libmdbx/src/tools/copy.c +166 -0
- package/deps/libmdbx/src/tools/drop.c +199 -0
- package/deps/libmdbx/src/tools/dump.c +515 -0
- package/deps/libmdbx/src/tools/load.c +831 -0
- package/deps/libmdbx/src/tools/stat.c +516 -0
- package/deps/libmdbx/src/tools/wingetopt.c +87 -0
- package/deps/libmdbx/src/tools/wingetopt.h +30 -0
- package/deps/libmdbx/src/tree-ops.c +1554 -0
- package/deps/libmdbx/src/tree-search.c +140 -0
- package/deps/libmdbx/src/txl.c +99 -0
- package/deps/libmdbx/src/txl.h +26 -0
- package/deps/libmdbx/src/txn.c +1083 -0
- package/deps/libmdbx/src/unaligned.h +205 -0
- package/deps/libmdbx/src/utils.c +32 -0
- package/deps/libmdbx/src/utils.h +76 -0
- package/deps/libmdbx/src/version.c.in +44 -0
- package/deps/libmdbx/src/walk.c +290 -0
- package/deps/libmdbx/src/walk.h +20 -0
- package/deps/libmdbx/src/windows-import.c +152 -0
- package/deps/libmdbx/src/windows-import.h +128 -0
- package/deps/libmdbx/test/CMakeLists.txt +317 -0
- package/deps/libmdbx/test/append.c++ +237 -0
- package/deps/libmdbx/test/base.h++ +92 -0
- package/deps/libmdbx/test/battery-tmux.sh +64 -0
- package/deps/libmdbx/test/cases.c++ +118 -0
- package/deps/libmdbx/test/chrono.c++ +134 -0
- package/deps/libmdbx/test/chrono.h++ +85 -0
- package/deps/libmdbx/test/config.c++ +643 -0
- package/deps/libmdbx/test/config.h++ +334 -0
- package/deps/libmdbx/test/copy.c++ +62 -0
- package/deps/libmdbx/test/dead.c++ +39 -0
- package/deps/libmdbx/test/dump-load.sh +40 -0
- package/deps/libmdbx/test/extra/crunched_delete.c++ +409 -0
- package/deps/libmdbx/test/extra/cursor_closing.c++ +410 -0
- package/deps/libmdbx/test/extra/dbi.c++ +229 -0
- package/deps/libmdbx/test/extra/doubtless_positioning.c++ +253 -0
- package/deps/libmdbx/test/extra/dupfix_addodd.c +94 -0
- package/deps/libmdbx/test/extra/dupfix_multiple.c++ +311 -0
- package/deps/libmdbx/test/extra/early_close_dbi.c++ +137 -0
- package/deps/libmdbx/test/extra/hex_base64_base58.c++ +118 -0
- package/deps/libmdbx/test/extra/maindb_ordinal.c++ +61 -0
- package/deps/libmdbx/test/extra/open.c++ +96 -0
- package/deps/libmdbx/test/extra/pcrf/README.md +2 -0
- package/deps/libmdbx/test/extra/pcrf/pcrf_test.c +380 -0
- package/deps/libmdbx/test/extra/probe.c++ +10 -0
- package/deps/libmdbx/test/extra/txn.c++ +407 -0
- package/deps/libmdbx/test/extra/upsert_alldups.c +193 -0
- package/deps/libmdbx/test/fork.c++ +263 -0
- package/deps/libmdbx/test/hill.c++ +447 -0
- package/deps/libmdbx/test/jitter.c++ +197 -0
- package/deps/libmdbx/test/keygen.c++ +393 -0
- package/deps/libmdbx/test/keygen.h++ +130 -0
- package/deps/libmdbx/test/log.c++ +358 -0
- package/deps/libmdbx/test/log.h++ +91 -0
- package/deps/libmdbx/test/main.c++ +706 -0
- package/deps/libmdbx/test/nested.c++ +318 -0
- package/deps/libmdbx/test/osal-unix.c++ +647 -0
- package/deps/libmdbx/test/osal-windows.c++ +440 -0
- package/deps/libmdbx/test/osal.h++ +41 -0
- package/deps/libmdbx/test/stochastic.sh +690 -0
- package/deps/libmdbx/test/stub/LICENSE +24 -0
- package/deps/libmdbx/test/stub/README.md +8 -0
- package/deps/libmdbx/test/stub/pthread_barrier.c +104 -0
- package/deps/libmdbx/test/stub/pthread_barrier.h +77 -0
- package/deps/libmdbx/test/test.c++ +1551 -0
- package/deps/libmdbx/test/test.h++ +298 -0
- package/deps/libmdbx/test/tmux.conf +3 -0
- package/deps/libmdbx/test/try.c++ +30 -0
- package/deps/libmdbx/test/ttl.c++ +240 -0
- package/deps/libmdbx/test/utils.c++ +203 -0
- package/deps/libmdbx/test/utils.h++ +326 -0
- package/deps/libmdbx/test/valgrind_suppress.txt +536 -0
- package/lib/mdbx_evn_async.js +211 -0
- package/lib/mdbx_worker.js +195 -0
- package/lib/nativemou.js +6 -0
- package/package.json +38 -0
- package/src/async/envmou_close.cpp +34 -0
- package/src/async/envmou_close.hpp +32 -0
- package/src/async/envmou_copy_to.cpp +29 -0
- package/src/async/envmou_copy_to.hpp +38 -0
- package/src/async/envmou_keys.cpp +201 -0
- package/src/async/envmou_keys.hpp +50 -0
- package/src/async/envmou_open.cpp +38 -0
- package/src/async/envmou_open.hpp +33 -0
- package/src/async/envmou_query.cpp +167 -0
- package/src/async/envmou_query.hpp +53 -0
- package/src/dbimou.cpp +522 -0
- package/src/dbimou.hpp +82 -0
- package/src/env_arg0.hpp +24 -0
- package/src/envmou.cpp +445 -0
- package/src/envmou.hpp +116 -0
- package/src/modulemou.cpp +113 -0
- package/src/querymou.cpp +177 -0
- package/src/querymou.hpp +93 -0
- package/src/txnmou.cpp +254 -0
- package/src/txnmou.hpp +122 -0
- package/src/typemou.hpp +239 -0
- package/src/valuemou.hpp +194 -0
- package/test/async.js +67 -0
- package/test/e3.js +38 -0
- package/test/e4.js +89 -0
- package/test/e5.js +162 -0
- package/test/test-batch-ops.js +243 -0
- package/test/test-cursor-mode.js +84 -0
- package/test/test-multi-mode.js +87 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "essentials.h"
|
|
7
|
+
|
|
8
|
+
static inline uint64_t meta_sign_calculate(const meta_t *meta) {
|
|
9
|
+
uint64_t sign = DATASIGN_NONE;
|
|
10
|
+
#if 0 /* TODO */
|
|
11
|
+
sign = hippeus_hash64(...);
|
|
12
|
+
#else
|
|
13
|
+
(void)meta;
|
|
14
|
+
#endif
|
|
15
|
+
/* LY: newer returns DATASIGN_NONE or DATASIGN_WEAK */
|
|
16
|
+
return (sign > DATASIGN_WEAK) ? sign : ~sign;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
static inline uint64_t meta_sign_get(const volatile meta_t *meta) { return unaligned_peek_u64_volatile(4, meta->sign); }
|
|
20
|
+
|
|
21
|
+
static inline void meta_sign_as_steady(meta_t *meta) { unaligned_poke_u64(4, meta->sign, meta_sign_calculate(meta)); }
|
|
22
|
+
|
|
23
|
+
static inline bool meta_is_steady(const volatile meta_t *meta) { return SIGN_IS_STEADY(meta_sign_get(meta)); }
|
|
24
|
+
|
|
25
|
+
MDBX_INTERNAL troika_t meta_tap(const MDBX_env *env);
|
|
26
|
+
MDBX_INTERNAL unsigned meta_eq_mask(const troika_t *troika);
|
|
27
|
+
MDBX_INTERNAL bool meta_should_retry(const MDBX_env *env, troika_t *troika);
|
|
28
|
+
MDBX_MAYBE_UNUSED MDBX_INTERNAL bool troika_verify_fsm(void);
|
|
29
|
+
|
|
30
|
+
struct meta_ptr {
|
|
31
|
+
txnid_t txnid;
|
|
32
|
+
union {
|
|
33
|
+
const volatile meta_t *ptr_v;
|
|
34
|
+
const meta_t *ptr_c;
|
|
35
|
+
};
|
|
36
|
+
size_t is_steady;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
MDBX_INTERNAL meta_ptr_t meta_ptr(const MDBX_env *env, unsigned n);
|
|
40
|
+
MDBX_INTERNAL txnid_t meta_txnid(const volatile meta_t *meta);
|
|
41
|
+
MDBX_INTERNAL txnid_t recent_committed_txnid(const MDBX_env *env);
|
|
42
|
+
MDBX_INTERNAL int meta_sync(const MDBX_env *env, const meta_ptr_t head);
|
|
43
|
+
|
|
44
|
+
MDBX_INTERNAL const char *durable_caption(const meta_t *const meta);
|
|
45
|
+
MDBX_INTERNAL void meta_troika_dump(const MDBX_env *env, const troika_t *troika);
|
|
46
|
+
|
|
47
|
+
#define METAPAGE(env, n) page_meta(pgno2page(env, n))
|
|
48
|
+
#define METAPAGE_END(env) METAPAGE(env, NUM_METAS)
|
|
49
|
+
|
|
50
|
+
static inline meta_ptr_t meta_recent(const MDBX_env *env, const troika_t *troika) {
|
|
51
|
+
meta_ptr_t r;
|
|
52
|
+
r.txnid = troika->txnid[troika->recent];
|
|
53
|
+
r.ptr_v = METAPAGE(env, troika->recent);
|
|
54
|
+
r.is_steady = (troika->fsm >> troika->recent) & 1;
|
|
55
|
+
return r;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
static inline meta_ptr_t meta_prefer_steady(const MDBX_env *env, const troika_t *troika) {
|
|
59
|
+
meta_ptr_t r;
|
|
60
|
+
r.txnid = troika->txnid[troika->prefer_steady];
|
|
61
|
+
r.ptr_v = METAPAGE(env, troika->prefer_steady);
|
|
62
|
+
r.is_steady = (troika->fsm >> troika->prefer_steady) & 1;
|
|
63
|
+
return r;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
static inline meta_ptr_t meta_tail(const MDBX_env *env, const troika_t *troika) {
|
|
67
|
+
const uint8_t tail = troika->tail_and_flags & 3;
|
|
68
|
+
MDBX_ANALYSIS_ASSUME(tail < NUM_METAS);
|
|
69
|
+
meta_ptr_t r;
|
|
70
|
+
r.txnid = troika->txnid[tail];
|
|
71
|
+
r.ptr_v = METAPAGE(env, tail);
|
|
72
|
+
r.is_steady = (troika->fsm >> tail) & 1;
|
|
73
|
+
return r;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
static inline bool meta_is_used(const troika_t *troika, unsigned n) {
|
|
77
|
+
return n == troika->recent || n == troika->prefer_steady;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
static inline bool meta_bootid_match(const meta_t *meta) {
|
|
81
|
+
|
|
82
|
+
return memcmp(&meta->bootid, &globals.bootid, 16) == 0 && (globals.bootid.x | globals.bootid.y) != 0;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
static inline bool meta_weak_acceptable(const MDBX_env *env, const meta_t *meta, const int lck_exclusive) {
|
|
86
|
+
return lck_exclusive
|
|
87
|
+
? /* exclusive lock */ meta_bootid_match(meta)
|
|
88
|
+
: /* db already opened */ env->lck_mmap.lck && (env->lck_mmap.lck->envmode.weak & MDBX_RDONLY) == 0;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline txnid_t constmeta_txnid(const meta_t *meta) {
|
|
92
|
+
const txnid_t a = unaligned_peek_u64(4, &meta->txnid_a);
|
|
93
|
+
const txnid_t b = unaligned_peek_u64(4, &meta->txnid_b);
|
|
94
|
+
return likely(a == b) ? a : 0;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
static inline void meta_update_begin(const MDBX_env *env, meta_t *meta, txnid_t txnid) {
|
|
98
|
+
eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env));
|
|
99
|
+
eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) < txnid && unaligned_peek_u64(4, meta->txnid_b) < txnid);
|
|
100
|
+
(void)env;
|
|
101
|
+
#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8
|
|
102
|
+
atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, 0, mo_AcquireRelease);
|
|
103
|
+
atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_a, txnid, mo_AcquireRelease);
|
|
104
|
+
#else
|
|
105
|
+
atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], 0, mo_AcquireRelease);
|
|
106
|
+
atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], 0, mo_AcquireRelease);
|
|
107
|
+
atomic_store32(&meta->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], (uint32_t)txnid, mo_AcquireRelease);
|
|
108
|
+
atomic_store32(&meta->txnid_a[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], (uint32_t)(txnid >> 32), mo_AcquireRelease);
|
|
109
|
+
#endif
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
static inline void meta_update_end(const MDBX_env *env, meta_t *meta, txnid_t txnid) {
|
|
113
|
+
eASSERT(env, meta >= METAPAGE(env, 0) && meta < METAPAGE_END(env));
|
|
114
|
+
eASSERT(env, unaligned_peek_u64(4, meta->txnid_a) == txnid);
|
|
115
|
+
eASSERT(env, unaligned_peek_u64(4, meta->txnid_b) < txnid);
|
|
116
|
+
(void)env;
|
|
117
|
+
jitter4testing(true);
|
|
118
|
+
memcpy(&meta->bootid, &globals.bootid, 16);
|
|
119
|
+
#if (defined(__amd64__) || defined(__e2k__)) && !defined(ENABLE_UBSAN) && MDBX_UNALIGNED_OK >= 8
|
|
120
|
+
atomic_store64((mdbx_atomic_uint64_t *)&meta->txnid_b, txnid, mo_AcquireRelease);
|
|
121
|
+
#else
|
|
122
|
+
atomic_store32(&meta->txnid_b[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__], (uint32_t)txnid, mo_AcquireRelease);
|
|
123
|
+
atomic_store32(&meta->txnid_b[__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__], (uint32_t)(txnid >> 32), mo_AcquireRelease);
|
|
124
|
+
#endif
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
static inline void meta_set_txnid(const MDBX_env *env, meta_t *meta, const txnid_t txnid) {
|
|
128
|
+
eASSERT(env, !env->dxb_mmap.base || meta < METAPAGE(env, 0) || meta >= METAPAGE_END(env));
|
|
129
|
+
(void)env;
|
|
130
|
+
/* update inconsistently since this function used ONLY for filling meta-image
|
|
131
|
+
* for writing, but not the actual meta-page */
|
|
132
|
+
memcpy(&meta->bootid, &globals.bootid, 16);
|
|
133
|
+
unaligned_poke_u64(4, meta->txnid_a, txnid);
|
|
134
|
+
unaligned_poke_u64(4, meta->txnid_b, txnid);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
static inline uint8_t meta_cmp2int(txnid_t a, txnid_t b, uint8_t s) {
|
|
138
|
+
return unlikely(a == b) ? 1 * s : (a > b) ? 2 * s : 0 * s;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
static inline uint8_t meta_cmp2recent(uint8_t ab_cmp2int, bool a_steady, bool b_steady) {
|
|
142
|
+
assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */);
|
|
143
|
+
return ab_cmp2int > 1 || (ab_cmp2int == 1 && a_steady > b_steady);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
static inline uint8_t meta_cmp2steady(uint8_t ab_cmp2int, bool a_steady, bool b_steady) {
|
|
147
|
+
assert(ab_cmp2int < 3 /* && a_steady< 2 && b_steady < 2 */);
|
|
148
|
+
return a_steady > b_steady || (a_steady == b_steady && ab_cmp2int > 1);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
static inline bool meta_choice_recent(txnid_t a_txnid, bool a_steady, txnid_t b_txnid, bool b_steady) {
|
|
152
|
+
return meta_cmp2recent(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
static inline bool meta_choice_steady(txnid_t a_txnid, bool a_steady, txnid_t b_txnid, bool b_steady) {
|
|
156
|
+
return meta_cmp2steady(meta_cmp2int(a_txnid, b_txnid, 1), a_steady, b_steady);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
MDBX_INTERNAL meta_t *meta_init_triplet(const MDBX_env *env, void *buffer);
|
|
160
|
+
|
|
161
|
+
MDBX_INTERNAL int meta_validate(MDBX_env *env, meta_t *const meta, const page_t *const page, const unsigned meta_number,
|
|
162
|
+
unsigned *guess_pagesize);
|
|
163
|
+
|
|
164
|
+
MDBX_INTERNAL int __must_check_result meta_validate_copy(MDBX_env *env, const meta_t *meta, meta_t *dest);
|
|
165
|
+
|
|
166
|
+
MDBX_INTERNAL int __must_check_result meta_override(MDBX_env *env, size_t target, txnid_t txnid, const meta_t *shape);
|
|
167
|
+
|
|
168
|
+
MDBX_INTERNAL int meta_wipe_steady(MDBX_env *env, txnid_t inclusive_upto);
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#include "internals.h"
|
|
5
|
+
|
|
6
|
+
bsr_t mvcc_bind_slot(MDBX_env *env) {
|
|
7
|
+
eASSERT(env, env->lck_mmap.lck);
|
|
8
|
+
eASSERT(env, env->lck->magic_and_version == MDBX_LOCK_MAGIC);
|
|
9
|
+
eASSERT(env, env->lck->os_and_format == MDBX_LOCK_FORMAT);
|
|
10
|
+
|
|
11
|
+
bsr_t result = {lck_rdt_lock(env), nullptr};
|
|
12
|
+
if (unlikely(MDBX_IS_ERROR(result.err)))
|
|
13
|
+
return result;
|
|
14
|
+
if (unlikely(env->flags & ENV_FATAL_ERROR)) {
|
|
15
|
+
lck_rdt_unlock(env);
|
|
16
|
+
result.err = MDBX_PANIC;
|
|
17
|
+
return result;
|
|
18
|
+
}
|
|
19
|
+
if (unlikely(!env->dxb_mmap.base)) {
|
|
20
|
+
lck_rdt_unlock(env);
|
|
21
|
+
result.err = MDBX_EPERM;
|
|
22
|
+
return result;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
if (unlikely(env->registered_reader_pid != env->pid)) {
|
|
26
|
+
result.err = lck_rpid_set(env);
|
|
27
|
+
if (unlikely(result.err != MDBX_SUCCESS)) {
|
|
28
|
+
lck_rdt_unlock(env);
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
env->registered_reader_pid = env->pid;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
result.err = MDBX_SUCCESS;
|
|
35
|
+
size_t slot, nreaders;
|
|
36
|
+
while (1) {
|
|
37
|
+
nreaders = env->lck->rdt_length.weak;
|
|
38
|
+
for (slot = 0; slot < nreaders; slot++)
|
|
39
|
+
if (!atomic_load32(&env->lck->rdt[slot].pid, mo_AcquireRelease))
|
|
40
|
+
break;
|
|
41
|
+
|
|
42
|
+
if (likely(slot < env->max_readers))
|
|
43
|
+
break;
|
|
44
|
+
|
|
45
|
+
result.err = mvcc_cleanup_dead(env, true, nullptr);
|
|
46
|
+
if (result.err != MDBX_RESULT_TRUE) {
|
|
47
|
+
lck_rdt_unlock(env);
|
|
48
|
+
result.err = (result.err == MDBX_SUCCESS) ? MDBX_READERS_FULL : result.err;
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
result.rslot = &env->lck->rdt[slot];
|
|
54
|
+
/* Claim the reader slot, carefully since other code
|
|
55
|
+
* uses the reader table un-mutexed: First reset the
|
|
56
|
+
* slot, next publish it in lck->rdt_length. After
|
|
57
|
+
* that, it is safe for mdbx_env_close() to touch it.
|
|
58
|
+
* When it will be closed, we can finally claim it. */
|
|
59
|
+
atomic_store32(&result.rslot->pid, 0, mo_AcquireRelease);
|
|
60
|
+
safe64_reset(&result.rslot->txnid, true);
|
|
61
|
+
if (slot == nreaders)
|
|
62
|
+
env->lck->rdt_length.weak = (uint32_t)++nreaders;
|
|
63
|
+
result.rslot->tid.weak = (env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self();
|
|
64
|
+
atomic_store32(&result.rslot->pid, env->pid, mo_AcquireRelease);
|
|
65
|
+
lck_rdt_unlock(env);
|
|
66
|
+
|
|
67
|
+
if (likely(env->flags & ENV_TXKEY)) {
|
|
68
|
+
eASSERT(env, env->registered_reader_pid == env->pid);
|
|
69
|
+
thread_rthc_set(env->me_txkey, result.rslot);
|
|
70
|
+
}
|
|
71
|
+
return result;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
__hot txnid_t mvcc_shapshot_oldest(MDBX_env *const env, const txnid_t steady) {
|
|
75
|
+
const uint32_t nothing_changed = MDBX_STRING_TETRAD("None");
|
|
76
|
+
eASSERT(env, steady <= env->basal_txn->txnid);
|
|
77
|
+
|
|
78
|
+
lck_t *const lck = env->lck_mmap.lck;
|
|
79
|
+
if (unlikely(lck == nullptr /* exclusive without-lck mode */)) {
|
|
80
|
+
eASSERT(env, env->lck == lckless_stub(env));
|
|
81
|
+
env->lck->rdt_refresh_flag.weak = nothing_changed;
|
|
82
|
+
return env->lck->cached_oldest.weak = steady;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const txnid_t prev_oldest = atomic_load64(&lck->cached_oldest, mo_AcquireRelease);
|
|
86
|
+
eASSERT(env, steady >= prev_oldest);
|
|
87
|
+
|
|
88
|
+
txnid_t new_oldest = prev_oldest;
|
|
89
|
+
while (nothing_changed != atomic_load32(&lck->rdt_refresh_flag, mo_AcquireRelease)) {
|
|
90
|
+
lck->rdt_refresh_flag.weak = nothing_changed;
|
|
91
|
+
jitter4testing(false);
|
|
92
|
+
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
|
|
93
|
+
new_oldest = steady;
|
|
94
|
+
|
|
95
|
+
for (size_t i = 0; i < snap_nreaders; ++i) {
|
|
96
|
+
const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease);
|
|
97
|
+
if (!pid)
|
|
98
|
+
continue;
|
|
99
|
+
jitter4testing(true);
|
|
100
|
+
|
|
101
|
+
const txnid_t rtxn = safe64_read(&lck->rdt[i].txnid);
|
|
102
|
+
if (unlikely(rtxn < prev_oldest)) {
|
|
103
|
+
if (unlikely(nothing_changed == atomic_load32(&lck->rdt_refresh_flag, mo_AcquireRelease)) &&
|
|
104
|
+
safe64_reset_compare(&lck->rdt[i].txnid, rtxn)) {
|
|
105
|
+
NOTICE("kick stuck reader[%zu of %zu].pid_%u %" PRIaTXN " < prev-oldest %" PRIaTXN ", steady-txn %" PRIaTXN,
|
|
106
|
+
i, snap_nreaders, pid, rtxn, prev_oldest, steady);
|
|
107
|
+
}
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (rtxn < new_oldest) {
|
|
112
|
+
new_oldest = rtxn;
|
|
113
|
+
if (!MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS && new_oldest == prev_oldest)
|
|
114
|
+
break;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (new_oldest != prev_oldest) {
|
|
120
|
+
VERBOSE("update oldest %" PRIaTXN " -> %" PRIaTXN, prev_oldest, new_oldest);
|
|
121
|
+
eASSERT(env, new_oldest >= lck->cached_oldest.weak);
|
|
122
|
+
atomic_store64(&lck->cached_oldest, new_oldest, mo_Relaxed);
|
|
123
|
+
}
|
|
124
|
+
return new_oldest;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
pgno_t mvcc_snapshot_largest(const MDBX_env *env, pgno_t last_used_page) {
|
|
128
|
+
lck_t *const lck = env->lck_mmap.lck;
|
|
129
|
+
if (likely(lck != nullptr /* check for exclusive without-lck mode */)) {
|
|
130
|
+
retry:;
|
|
131
|
+
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
|
|
132
|
+
for (size_t i = 0; i < snap_nreaders; ++i) {
|
|
133
|
+
if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) {
|
|
134
|
+
/* jitter4testing(true); */
|
|
135
|
+
const pgno_t snap_pages = atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed);
|
|
136
|
+
const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid);
|
|
137
|
+
if (unlikely(snap_pages != atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_AcquireRelease) ||
|
|
138
|
+
snap_txnid != safe64_read(&lck->rdt[i].txnid)))
|
|
139
|
+
goto retry;
|
|
140
|
+
if (last_used_page < snap_pages && snap_txnid <= env->basal_txn->txnid)
|
|
141
|
+
last_used_page = snap_pages;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return last_used_page;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/* Find largest mvcc-snapshot still referenced by this process. */
|
|
150
|
+
pgno_t mvcc_largest_this(MDBX_env *env, pgno_t largest) {
|
|
151
|
+
lck_t *const lck = env->lck_mmap.lck;
|
|
152
|
+
if (likely(lck != nullptr /* exclusive mode */)) {
|
|
153
|
+
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
|
|
154
|
+
for (size_t i = 0; i < snap_nreaders; ++i) {
|
|
155
|
+
retry:
|
|
156
|
+
if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease) == env->pid) {
|
|
157
|
+
/* jitter4testing(true); */
|
|
158
|
+
const pgno_t snap_pages = atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_Relaxed);
|
|
159
|
+
const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid);
|
|
160
|
+
if (unlikely(snap_pages != atomic_load32(&lck->rdt[i].snapshot_pages_used, mo_AcquireRelease) ||
|
|
161
|
+
snap_txnid != safe64_read(&lck->rdt[i].txnid)))
|
|
162
|
+
goto retry;
|
|
163
|
+
if (largest < snap_pages &&
|
|
164
|
+
atomic_load64(&lck->cached_oldest, mo_AcquireRelease) <=
|
|
165
|
+
/* ignore pending updates */ snap_txnid &&
|
|
166
|
+
snap_txnid <= MAX_TXNID)
|
|
167
|
+
largest = snap_pages;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return largest;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
static bool pid_insert(uint32_t *list, uint32_t pid) {
|
|
175
|
+
/* binary search of pid in list */
|
|
176
|
+
size_t base = 0;
|
|
177
|
+
size_t cursor = 1;
|
|
178
|
+
int32_t val = 0;
|
|
179
|
+
size_t n = /* length */ list[0];
|
|
180
|
+
|
|
181
|
+
while (n > 0) {
|
|
182
|
+
size_t pivot = n >> 1;
|
|
183
|
+
cursor = base + pivot + 1;
|
|
184
|
+
val = pid - list[cursor];
|
|
185
|
+
|
|
186
|
+
if (val < 0) {
|
|
187
|
+
n = pivot;
|
|
188
|
+
} else if (val > 0) {
|
|
189
|
+
base = cursor;
|
|
190
|
+
n -= pivot + 1;
|
|
191
|
+
} else {
|
|
192
|
+
/* found, so it's a duplicate */
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
if (val > 0)
|
|
198
|
+
++cursor;
|
|
199
|
+
|
|
200
|
+
list[0]++;
|
|
201
|
+
for (n = list[0]; n > cursor; n--)
|
|
202
|
+
list[n] = list[n - 1];
|
|
203
|
+
list[n] = pid;
|
|
204
|
+
return true;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
__cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, int *dead) {
|
|
208
|
+
int rc = check_env(env, true);
|
|
209
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
210
|
+
return rc;
|
|
211
|
+
|
|
212
|
+
eASSERT(env, rdt_locked >= 0);
|
|
213
|
+
lck_t *const lck = env->lck_mmap.lck;
|
|
214
|
+
if (unlikely(lck == nullptr)) {
|
|
215
|
+
/* exclusive mode */
|
|
216
|
+
if (dead)
|
|
217
|
+
*dead = 0;
|
|
218
|
+
return MDBX_SUCCESS;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
|
|
222
|
+
uint32_t pidsbuf_onstask[142];
|
|
223
|
+
uint32_t *const pids = (snap_nreaders < ARRAY_LENGTH(pidsbuf_onstask))
|
|
224
|
+
? pidsbuf_onstask
|
|
225
|
+
: osal_malloc((snap_nreaders + 1) * sizeof(uint32_t));
|
|
226
|
+
if (unlikely(!pids))
|
|
227
|
+
return MDBX_ENOMEM;
|
|
228
|
+
|
|
229
|
+
pids[0] = 0;
|
|
230
|
+
int count = 0;
|
|
231
|
+
for (size_t i = 0; i < snap_nreaders; i++) {
|
|
232
|
+
const uint32_t pid = atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease);
|
|
233
|
+
if (pid == 0)
|
|
234
|
+
continue /* skip empty */;
|
|
235
|
+
if (pid == env->pid)
|
|
236
|
+
continue /* skip self */;
|
|
237
|
+
if (!pid_insert(pids, pid))
|
|
238
|
+
continue /* such pid already processed */;
|
|
239
|
+
|
|
240
|
+
int err = lck_rpid_check(env, pid);
|
|
241
|
+
if (err == MDBX_RESULT_TRUE)
|
|
242
|
+
continue /* reader is live */;
|
|
243
|
+
|
|
244
|
+
if (err != MDBX_SUCCESS) {
|
|
245
|
+
rc = err;
|
|
246
|
+
break /* lck_rpid_check() failed */;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* stale reader found */
|
|
250
|
+
if (!rdt_locked) {
|
|
251
|
+
err = lck_rdt_lock(env);
|
|
252
|
+
if (MDBX_IS_ERROR(err)) {
|
|
253
|
+
rc = err;
|
|
254
|
+
break;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
rdt_locked = -1;
|
|
258
|
+
if (err == MDBX_RESULT_TRUE) {
|
|
259
|
+
/* mutex recovered, the mdbx_ipclock_failed() checked all readers */
|
|
260
|
+
rc = MDBX_RESULT_TRUE;
|
|
261
|
+
break;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/* a other process may have clean and reused slot, recheck */
|
|
265
|
+
if (lck->rdt[i].pid.weak != pid)
|
|
266
|
+
continue;
|
|
267
|
+
|
|
268
|
+
err = lck_rpid_check(env, pid);
|
|
269
|
+
if (MDBX_IS_ERROR(err)) {
|
|
270
|
+
rc = err;
|
|
271
|
+
break;
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (err != MDBX_SUCCESS)
|
|
275
|
+
continue /* the race with other process, slot reused */;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/* clean it */
|
|
279
|
+
for (size_t ii = i; ii < snap_nreaders; ii++) {
|
|
280
|
+
if (lck->rdt[ii].pid.weak == pid) {
|
|
281
|
+
DEBUG("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN, (size_t)pid, lck->rdt[ii].txnid.weak);
|
|
282
|
+
atomic_store32(&lck->rdt[ii].pid, 0, mo_Relaxed);
|
|
283
|
+
atomic_store32(&lck->rdt_refresh_flag, true, mo_AcquireRelease);
|
|
284
|
+
count++;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
if (likely(!MDBX_IS_ERROR(rc)))
|
|
290
|
+
atomic_store64(&lck->readers_check_timestamp, osal_monotime(), mo_Relaxed);
|
|
291
|
+
|
|
292
|
+
if (rdt_locked < 0)
|
|
293
|
+
lck_rdt_unlock(env);
|
|
294
|
+
|
|
295
|
+
if (pids != pidsbuf_onstask)
|
|
296
|
+
osal_free(pids);
|
|
297
|
+
|
|
298
|
+
if (dead)
|
|
299
|
+
*dead = count;
|
|
300
|
+
return rc;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
__cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) {
|
|
304
|
+
DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler);
|
|
305
|
+
osal_memory_fence(mo_AcquireRelease, false);
|
|
306
|
+
MDBX_hsr_func *const callback = env->hsr_callback;
|
|
307
|
+
txnid_t oldest = 0;
|
|
308
|
+
bool notify_eof_of_loop = false;
|
|
309
|
+
int retry = 0;
|
|
310
|
+
do {
|
|
311
|
+
const txnid_t steady = env->txn->tw.troika.txnid[env->txn->tw.troika.prefer_steady];
|
|
312
|
+
env->lck->rdt_refresh_flag.weak = /* force refresh */ true;
|
|
313
|
+
oldest = mvcc_shapshot_oldest(env, steady);
|
|
314
|
+
eASSERT(env, oldest < env->basal_txn->txnid);
|
|
315
|
+
eASSERT(env, oldest >= straggler);
|
|
316
|
+
eASSERT(env, oldest >= env->lck->cached_oldest.weak);
|
|
317
|
+
|
|
318
|
+
lck_t *const lck = env->lck_mmap.lck;
|
|
319
|
+
if (oldest == steady || oldest > straggler || /* without-LCK mode */ !lck)
|
|
320
|
+
break;
|
|
321
|
+
|
|
322
|
+
if (MDBX_IS_ERROR(mvcc_cleanup_dead(env, false, nullptr)))
|
|
323
|
+
break;
|
|
324
|
+
|
|
325
|
+
reader_slot_t *stucked = nullptr;
|
|
326
|
+
uint64_t hold_retired = 0;
|
|
327
|
+
for (size_t i = 0; i < lck->rdt_length.weak; ++i) {
|
|
328
|
+
uint32_t pid;
|
|
329
|
+
reader_slot_t *const rslot = &lck->rdt[i];
|
|
330
|
+
txnid_t rtxn = safe64_read(&rslot->txnid);
|
|
331
|
+
retry:
|
|
332
|
+
if (rtxn == straggler && (pid = atomic_load32(&rslot->pid, mo_AcquireRelease)) != 0) {
|
|
333
|
+
const uint64_t tid = safe64_read(&rslot->tid);
|
|
334
|
+
if (tid == MDBX_TID_TXN_PARKED) {
|
|
335
|
+
/* Читающая транзакция была помечена владельцем как "припаркованная",
|
|
336
|
+
* т.е. подлежащая асинхронному прерыванию, либо восстановлению
|
|
337
|
+
* по активности читателя.
|
|
338
|
+
*
|
|
339
|
+
* Если первый CAS(slot->tid) будет успешным, то
|
|
340
|
+
* safe64_reset_compare() безопасно очистит txnid, либо откажется
|
|
341
|
+
* из-за того что читатель сбросил и/или перезапустил транзакцию.
|
|
342
|
+
* При этом читатеть может не заметить вытестения, если приступит
|
|
343
|
+
* к завершению транзакции. Все эти исходы нас устраивют.
|
|
344
|
+
*
|
|
345
|
+
* Если первый CAS(slot->tid) будет НЕ успешным, то значит читатеть
|
|
346
|
+
* восстановил транзакцию, либо завершил её, либо даже освободил слот.
|
|
347
|
+
*/
|
|
348
|
+
bool ousted =
|
|
349
|
+
#if MDBX_64BIT_CAS
|
|
350
|
+
atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, MDBX_TID_TXN_OUSTED);
|
|
351
|
+
#else
|
|
352
|
+
atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)MDBX_TID_TXN_OUSTED);
|
|
353
|
+
#endif
|
|
354
|
+
if (likely(ousted)) {
|
|
355
|
+
ousted = safe64_reset_compare(&rslot->txnid, rtxn);
|
|
356
|
+
NOTICE("ousted-%s parked read-txn %" PRIaTXN ", pid %u, tid 0x%" PRIx64, ousted ? "complete" : "half", rtxn,
|
|
357
|
+
pid, tid);
|
|
358
|
+
eASSERT(env, ousted || safe64_read(&rslot->txnid) > straggler);
|
|
359
|
+
continue;
|
|
360
|
+
}
|
|
361
|
+
rtxn = safe64_read(&rslot->txnid);
|
|
362
|
+
goto retry;
|
|
363
|
+
}
|
|
364
|
+
hold_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed);
|
|
365
|
+
stucked = rslot;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (!callback || !stucked)
|
|
370
|
+
break;
|
|
371
|
+
|
|
372
|
+
uint32_t pid = atomic_load32(&stucked->pid, mo_AcquireRelease);
|
|
373
|
+
uint64_t tid = safe64_read(&stucked->tid);
|
|
374
|
+
if (safe64_read(&stucked->txnid) != straggler || !pid)
|
|
375
|
+
continue;
|
|
376
|
+
|
|
377
|
+
const meta_ptr_t head = meta_recent(env, &env->txn->tw.troika);
|
|
378
|
+
const txnid_t gap = (head.txnid - straggler) / xMDBX_TXNID_STEP;
|
|
379
|
+
const uint64_t head_retired = unaligned_peek_u64(4, head.ptr_c->pages_retired);
|
|
380
|
+
const size_t space = (head_retired > hold_retired) ? pgno2bytes(env, (pgno_t)(head_retired - hold_retired)) : 0;
|
|
381
|
+
int rc = callback(env, env->txn, pid, (mdbx_tid_t)((intptr_t)tid), straggler,
|
|
382
|
+
(gap < UINT_MAX) ? (unsigned)gap : UINT_MAX, space, retry);
|
|
383
|
+
if (rc < 0)
|
|
384
|
+
/* hsr returned error and/or agree MDBX_MAP_FULL error */
|
|
385
|
+
break;
|
|
386
|
+
|
|
387
|
+
if (rc > 0) {
|
|
388
|
+
if (rc == 1) {
|
|
389
|
+
/* hsr reported transaction (will be) aborted asynchronous */
|
|
390
|
+
safe64_reset_compare(&stucked->txnid, straggler);
|
|
391
|
+
} else {
|
|
392
|
+
/* hsr reported reader process was killed and slot should be cleared */
|
|
393
|
+
safe64_reset(&stucked->txnid, true);
|
|
394
|
+
atomic_store64(&stucked->tid, 0, mo_Relaxed);
|
|
395
|
+
atomic_store32(&stucked->pid, 0, mo_AcquireRelease);
|
|
396
|
+
}
|
|
397
|
+
} else if (!notify_eof_of_loop) {
|
|
398
|
+
#if MDBX_ENABLE_PROFGC
|
|
399
|
+
env->lck->pgops.gc_prof.kicks += 1;
|
|
400
|
+
#endif /* MDBX_ENABLE_PROFGC */
|
|
401
|
+
notify_eof_of_loop = true;
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
} while (++retry < INT_MAX);
|
|
405
|
+
|
|
406
|
+
if (notify_eof_of_loop) {
|
|
407
|
+
/* notify end of hsr-loop */
|
|
408
|
+
const txnid_t turn = oldest - straggler;
|
|
409
|
+
if (turn)
|
|
410
|
+
NOTICE("hsr-kick: done turn %" PRIaTXN " -> %" PRIaTXN " +%" PRIaTXN, straggler, oldest, turn);
|
|
411
|
+
callback(env, env->txn, 0, 0, straggler, (turn < UINT_MAX) ? (unsigned)turn : UINT_MAX, 0, -retry);
|
|
412
|
+
}
|
|
413
|
+
return oldest;
|
|
414
|
+
}
|