mdbxmou 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +32 -0
- package/.github/workflows/publish.yml +27 -0
- package/.gitmodules +3 -0
- package/CMakeLists.txt +53 -0
- package/LICENSE +201 -0
- package/README.md +639 -0
- package/build.js +11 -0
- package/deps/libmdbx/.clang-format +3 -0
- package/deps/libmdbx/.cmake-format.yaml +3 -0
- package/deps/libmdbx/.le.ini +40 -0
- package/deps/libmdbx/CMakeLists.txt +1269 -0
- package/deps/libmdbx/COPYRIGHT +159 -0
- package/deps/libmdbx/ChangeLog.md +2786 -0
- package/deps/libmdbx/GNUmakefile +950 -0
- package/deps/libmdbx/LICENSE +177 -0
- package/deps/libmdbx/Makefile +16 -0
- package/deps/libmdbx/NOTICE +39 -0
- package/deps/libmdbx/README.md +863 -0
- package/deps/libmdbx/TODO.md +43 -0
- package/deps/libmdbx/cmake/compiler.cmake +1221 -0
- package/deps/libmdbx/cmake/profile.cmake +58 -0
- package/deps/libmdbx/cmake/utils.cmake +524 -0
- package/deps/libmdbx/conanfile.py +323 -0
- package/deps/libmdbx/docs/Doxyfile.in +2734 -0
- package/deps/libmdbx/docs/_preface.md +47 -0
- package/deps/libmdbx/docs/_restrictions.md +248 -0
- package/deps/libmdbx/docs/_starting.md +245 -0
- package/deps/libmdbx/docs/_toc.md +34 -0
- package/deps/libmdbx/docs/header.html +96 -0
- package/deps/libmdbx/example/CMakeLists.txt +6 -0
- package/deps/libmdbx/example/README.md +1 -0
- package/deps/libmdbx/example/example-mdbx.c +154 -0
- package/deps/libmdbx/example/sample-bdb.txt +77 -0
- package/deps/libmdbx/mdbx.h +6655 -0
- package/deps/libmdbx/mdbx.h++ +6428 -0
- package/deps/libmdbx/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +173 -0
- package/deps/libmdbx/src/alloy.c +54 -0
- package/deps/libmdbx/src/api-cold.c +543 -0
- package/deps/libmdbx/src/api-copy.c +912 -0
- package/deps/libmdbx/src/api-cursor.c +754 -0
- package/deps/libmdbx/src/api-dbi.c +315 -0
- package/deps/libmdbx/src/api-env.c +1434 -0
- package/deps/libmdbx/src/api-extra.c +165 -0
- package/deps/libmdbx/src/api-key-transform.c +197 -0
- package/deps/libmdbx/src/api-misc.c +286 -0
- package/deps/libmdbx/src/api-opts.c +575 -0
- package/deps/libmdbx/src/api-range-estimate.c +365 -0
- package/deps/libmdbx/src/api-txn-data.c +454 -0
- package/deps/libmdbx/src/api-txn.c +921 -0
- package/deps/libmdbx/src/atomics-ops.h +364 -0
- package/deps/libmdbx/src/atomics-types.h +97 -0
- package/deps/libmdbx/src/audit.c +109 -0
- package/deps/libmdbx/src/bits.md +34 -0
- package/deps/libmdbx/src/chk.c +1796 -0
- package/deps/libmdbx/src/cogs.c +309 -0
- package/deps/libmdbx/src/cogs.h +506 -0
- package/deps/libmdbx/src/coherency.c +170 -0
- package/deps/libmdbx/src/config.h.in +88 -0
- package/deps/libmdbx/src/cursor.c +2396 -0
- package/deps/libmdbx/src/cursor.h +391 -0
- package/deps/libmdbx/src/dbi.c +717 -0
- package/deps/libmdbx/src/dbi.h +142 -0
- package/deps/libmdbx/src/debug_begin.h +36 -0
- package/deps/libmdbx/src/debug_end.h +15 -0
- package/deps/libmdbx/src/dpl.c +486 -0
- package/deps/libmdbx/src/dpl.h +134 -0
- package/deps/libmdbx/src/dxb.c +1335 -0
- package/deps/libmdbx/src/env.c +607 -0
- package/deps/libmdbx/src/essentials.h +125 -0
- package/deps/libmdbx/src/gc-get.c +1345 -0
- package/deps/libmdbx/src/gc-put.c +970 -0
- package/deps/libmdbx/src/gc.h +40 -0
- package/deps/libmdbx/src/global.c +474 -0
- package/deps/libmdbx/src/internals.h +585 -0
- package/deps/libmdbx/src/layout-dxb.h +288 -0
- package/deps/libmdbx/src/layout-lck.h +289 -0
- package/deps/libmdbx/src/lck-posix.c +859 -0
- package/deps/libmdbx/src/lck-windows.c +607 -0
- package/deps/libmdbx/src/lck.c +174 -0
- package/deps/libmdbx/src/lck.h +110 -0
- package/deps/libmdbx/src/logging_and_debug.c +250 -0
- package/deps/libmdbx/src/logging_and_debug.h +159 -0
- package/deps/libmdbx/src/man1/mdbx_chk.1 +106 -0
- package/deps/libmdbx/src/man1/mdbx_copy.1 +95 -0
- package/deps/libmdbx/src/man1/mdbx_drop.1 +48 -0
- package/deps/libmdbx/src/man1/mdbx_dump.1 +101 -0
- package/deps/libmdbx/src/man1/mdbx_load.1 +105 -0
- package/deps/libmdbx/src/man1/mdbx_stat.1 +86 -0
- package/deps/libmdbx/src/mdbx.c++ +1837 -0
- package/deps/libmdbx/src/meta.c +656 -0
- package/deps/libmdbx/src/meta.h +168 -0
- package/deps/libmdbx/src/mvcc-readers.c +414 -0
- package/deps/libmdbx/src/node.c +365 -0
- package/deps/libmdbx/src/node.h +102 -0
- package/deps/libmdbx/src/ntdll.def +1246 -0
- package/deps/libmdbx/src/options.h +534 -0
- package/deps/libmdbx/src/osal.c +3485 -0
- package/deps/libmdbx/src/osal.h +587 -0
- package/deps/libmdbx/src/page-get.c +483 -0
- package/deps/libmdbx/src/page-iov.c +185 -0
- package/deps/libmdbx/src/page-iov.h +34 -0
- package/deps/libmdbx/src/page-ops.c +744 -0
- package/deps/libmdbx/src/page-ops.h +142 -0
- package/deps/libmdbx/src/pnl.c +236 -0
- package/deps/libmdbx/src/pnl.h +146 -0
- package/deps/libmdbx/src/preface.h +990 -0
- package/deps/libmdbx/src/proto.h +105 -0
- package/deps/libmdbx/src/refund.c +212 -0
- package/deps/libmdbx/src/sort.h +484 -0
- package/deps/libmdbx/src/spill.c +431 -0
- package/deps/libmdbx/src/spill.h +74 -0
- package/deps/libmdbx/src/table.c +107 -0
- package/deps/libmdbx/src/tls.c +551 -0
- package/deps/libmdbx/src/tls.h +43 -0
- package/deps/libmdbx/src/tools/chk.c +673 -0
- package/deps/libmdbx/src/tools/copy.c +166 -0
- package/deps/libmdbx/src/tools/drop.c +199 -0
- package/deps/libmdbx/src/tools/dump.c +515 -0
- package/deps/libmdbx/src/tools/load.c +831 -0
- package/deps/libmdbx/src/tools/stat.c +516 -0
- package/deps/libmdbx/src/tools/wingetopt.c +87 -0
- package/deps/libmdbx/src/tools/wingetopt.h +30 -0
- package/deps/libmdbx/src/tree-ops.c +1554 -0
- package/deps/libmdbx/src/tree-search.c +140 -0
- package/deps/libmdbx/src/txl.c +99 -0
- package/deps/libmdbx/src/txl.h +26 -0
- package/deps/libmdbx/src/txn.c +1083 -0
- package/deps/libmdbx/src/unaligned.h +205 -0
- package/deps/libmdbx/src/utils.c +32 -0
- package/deps/libmdbx/src/utils.h +76 -0
- package/deps/libmdbx/src/version.c.in +44 -0
- package/deps/libmdbx/src/walk.c +290 -0
- package/deps/libmdbx/src/walk.h +20 -0
- package/deps/libmdbx/src/windows-import.c +152 -0
- package/deps/libmdbx/src/windows-import.h +128 -0
- package/deps/libmdbx/test/CMakeLists.txt +317 -0
- package/deps/libmdbx/test/append.c++ +237 -0
- package/deps/libmdbx/test/base.h++ +92 -0
- package/deps/libmdbx/test/battery-tmux.sh +64 -0
- package/deps/libmdbx/test/cases.c++ +118 -0
- package/deps/libmdbx/test/chrono.c++ +134 -0
- package/deps/libmdbx/test/chrono.h++ +85 -0
- package/deps/libmdbx/test/config.c++ +643 -0
- package/deps/libmdbx/test/config.h++ +334 -0
- package/deps/libmdbx/test/copy.c++ +62 -0
- package/deps/libmdbx/test/dead.c++ +39 -0
- package/deps/libmdbx/test/dump-load.sh +40 -0
- package/deps/libmdbx/test/extra/crunched_delete.c++ +409 -0
- package/deps/libmdbx/test/extra/cursor_closing.c++ +410 -0
- package/deps/libmdbx/test/extra/dbi.c++ +229 -0
- package/deps/libmdbx/test/extra/doubtless_positioning.c++ +253 -0
- package/deps/libmdbx/test/extra/dupfix_addodd.c +94 -0
- package/deps/libmdbx/test/extra/dupfix_multiple.c++ +311 -0
- package/deps/libmdbx/test/extra/early_close_dbi.c++ +137 -0
- package/deps/libmdbx/test/extra/hex_base64_base58.c++ +118 -0
- package/deps/libmdbx/test/extra/maindb_ordinal.c++ +61 -0
- package/deps/libmdbx/test/extra/open.c++ +96 -0
- package/deps/libmdbx/test/extra/pcrf/README.md +2 -0
- package/deps/libmdbx/test/extra/pcrf/pcrf_test.c +380 -0
- package/deps/libmdbx/test/extra/probe.c++ +10 -0
- package/deps/libmdbx/test/extra/txn.c++ +407 -0
- package/deps/libmdbx/test/extra/upsert_alldups.c +193 -0
- package/deps/libmdbx/test/fork.c++ +263 -0
- package/deps/libmdbx/test/hill.c++ +447 -0
- package/deps/libmdbx/test/jitter.c++ +197 -0
- package/deps/libmdbx/test/keygen.c++ +393 -0
- package/deps/libmdbx/test/keygen.h++ +130 -0
- package/deps/libmdbx/test/log.c++ +358 -0
- package/deps/libmdbx/test/log.h++ +91 -0
- package/deps/libmdbx/test/main.c++ +706 -0
- package/deps/libmdbx/test/nested.c++ +318 -0
- package/deps/libmdbx/test/osal-unix.c++ +647 -0
- package/deps/libmdbx/test/osal-windows.c++ +440 -0
- package/deps/libmdbx/test/osal.h++ +41 -0
- package/deps/libmdbx/test/stochastic.sh +690 -0
- package/deps/libmdbx/test/stub/LICENSE +24 -0
- package/deps/libmdbx/test/stub/README.md +8 -0
- package/deps/libmdbx/test/stub/pthread_barrier.c +104 -0
- package/deps/libmdbx/test/stub/pthread_barrier.h +77 -0
- package/deps/libmdbx/test/test.c++ +1551 -0
- package/deps/libmdbx/test/test.h++ +298 -0
- package/deps/libmdbx/test/tmux.conf +3 -0
- package/deps/libmdbx/test/try.c++ +30 -0
- package/deps/libmdbx/test/ttl.c++ +240 -0
- package/deps/libmdbx/test/utils.c++ +203 -0
- package/deps/libmdbx/test/utils.h++ +326 -0
- package/deps/libmdbx/test/valgrind_suppress.txt +536 -0
- package/lib/mdbx_evn_async.js +211 -0
- package/lib/mdbx_worker.js +195 -0
- package/lib/nativemou.js +6 -0
- package/package.json +38 -0
- package/src/async/envmou_close.cpp +34 -0
- package/src/async/envmou_close.hpp +32 -0
- package/src/async/envmou_copy_to.cpp +29 -0
- package/src/async/envmou_copy_to.hpp +38 -0
- package/src/async/envmou_keys.cpp +201 -0
- package/src/async/envmou_keys.hpp +50 -0
- package/src/async/envmou_open.cpp +38 -0
- package/src/async/envmou_open.hpp +33 -0
- package/src/async/envmou_query.cpp +167 -0
- package/src/async/envmou_query.hpp +53 -0
- package/src/dbimou.cpp +522 -0
- package/src/dbimou.hpp +82 -0
- package/src/env_arg0.hpp +24 -0
- package/src/envmou.cpp +445 -0
- package/src/envmou.hpp +116 -0
- package/src/modulemou.cpp +113 -0
- package/src/querymou.cpp +177 -0
- package/src/querymou.hpp +93 -0
- package/src/txnmou.cpp +254 -0
- package/src/txnmou.hpp +122 -0
- package/src/typemou.hpp +239 -0
- package/src/valuemou.hpp +194 -0
- package/test/async.js +67 -0
- package/test/e3.js +38 -0
- package/test/e4.js +89 -0
- package/test/e5.js +162 -0
- package/test/test-batch-ops.js +243 -0
- package/test/test-cursor-mode.js +84 -0
- package/test/test-multi-mode.js +87 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#include "internals.h"
|
|
5
|
+
|
|
6
|
+
void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) {
|
|
7
|
+
tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) && txn->tw.spilled.least_removed > 0);
|
|
8
|
+
txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) ? idx : txn->tw.spilled.least_removed;
|
|
9
|
+
txn->tw.spilled.list[idx] |= 1;
|
|
10
|
+
MDBX_PNL_SETSIZE(txn->tw.spilled.list,
|
|
11
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
|
|
12
|
+
|
|
13
|
+
while (unlikely(npages > 1)) {
|
|
14
|
+
const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1;
|
|
15
|
+
if (MDBX_PNL_ASCENDING) {
|
|
16
|
+
if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) || (txn->tw.spilled.list[idx] >> 1) != pgno)
|
|
17
|
+
return;
|
|
18
|
+
} else {
|
|
19
|
+
if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno)
|
|
20
|
+
return;
|
|
21
|
+
txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed) ? idx : txn->tw.spilled.least_removed;
|
|
22
|
+
}
|
|
23
|
+
txn->tw.spilled.list[idx] |= 1;
|
|
24
|
+
MDBX_PNL_SETSIZE(txn->tw.spilled.list,
|
|
25
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) - (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
|
|
26
|
+
--npages;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
pnl_t spill_purge(MDBX_txn *txn) {
|
|
31
|
+
tASSERT(txn, txn->tw.spilled.least_removed > 0);
|
|
32
|
+
const pnl_t sl = txn->tw.spilled.list;
|
|
33
|
+
if (txn->tw.spilled.least_removed != INT_MAX) {
|
|
34
|
+
size_t len = MDBX_PNL_GETSIZE(sl), r, w;
|
|
35
|
+
for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) {
|
|
36
|
+
sl[w] = sl[r];
|
|
37
|
+
w += 1 - (sl[r] & 1);
|
|
38
|
+
}
|
|
39
|
+
for (size_t i = 1; i < w; ++i)
|
|
40
|
+
tASSERT(txn, (sl[i] & 1) == 0);
|
|
41
|
+
MDBX_PNL_SETSIZE(sl, w - 1);
|
|
42
|
+
txn->tw.spilled.least_removed = INT_MAX;
|
|
43
|
+
} else {
|
|
44
|
+
for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i)
|
|
45
|
+
tASSERT(txn, (sl[i] & 1) == 0);
|
|
46
|
+
}
|
|
47
|
+
return sl;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/*----------------------------------------------------------------------------*/
|
|
51
|
+
|
|
52
|
+
static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, page_t *dp, const size_t npages) {
|
|
53
|
+
tASSERT(txn, !(txn->flags & MDBX_WRITEMAP));
|
|
54
|
+
#if MDBX_ENABLE_PGOP_STAT
|
|
55
|
+
txn->env->lck->pgops.spill.weak += npages;
|
|
56
|
+
#endif /* MDBX_ENABLE_PGOP_STAT */
|
|
57
|
+
const pgno_t pgno = dp->pgno;
|
|
58
|
+
int err = iov_page(txn, ctx, dp, npages);
|
|
59
|
+
if (likely(err == MDBX_SUCCESS))
|
|
60
|
+
err = spill_append_span(&txn->tw.spilled.list, pgno, npages);
|
|
61
|
+
return err;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/* Set unspillable LRU-label for dirty pages watched by txn.
|
|
65
|
+
* Returns the number of pages marked as unspillable. */
|
|
66
|
+
static size_t spill_cursor_keep(const MDBX_txn *const txn, const MDBX_cursor *mc) {
|
|
67
|
+
tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
|
|
68
|
+
size_t keep = 0;
|
|
69
|
+
while (!is_poor(mc)) {
|
|
70
|
+
tASSERT(txn, mc->top >= 0);
|
|
71
|
+
const page_t *mp;
|
|
72
|
+
intptr_t i = 0;
|
|
73
|
+
do {
|
|
74
|
+
mp = mc->pg[i];
|
|
75
|
+
tASSERT(txn, !is_subpage(mp));
|
|
76
|
+
if (is_modifable(txn, mp)) {
|
|
77
|
+
size_t const n = dpl_search(txn, mp->pgno);
|
|
78
|
+
if (txn->tw.dirtylist->items[n].pgno == mp->pgno &&
|
|
79
|
+
/* не считаем дважды */ dpl_age(txn, n)) {
|
|
80
|
+
size_t *const ptr = ptr_disp(txn->tw.dirtylist->items[n].ptr, -(ptrdiff_t)sizeof(size_t));
|
|
81
|
+
*ptr = txn->tw.dirtylru;
|
|
82
|
+
tASSERT(txn, dpl_age(txn, n) == 0);
|
|
83
|
+
++keep;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
} while (++i <= mc->top);
|
|
87
|
+
|
|
88
|
+
tASSERT(txn, is_leaf(mp));
|
|
89
|
+
if (!mc->subcur || mc->ki[mc->top] >= page_numkeys(mp))
|
|
90
|
+
break;
|
|
91
|
+
if (!(node_flags(page_node(mp, mc->ki[mc->top])) & N_TREE))
|
|
92
|
+
break;
|
|
93
|
+
mc = &mc->subcur->cursor;
|
|
94
|
+
}
|
|
95
|
+
return keep;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
static size_t spill_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
|
|
99
|
+
tASSERT(txn, (txn->flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0);
|
|
100
|
+
dpl_lru_turn(txn);
|
|
101
|
+
size_t keep = m0 ? spill_cursor_keep(txn, m0) : 0;
|
|
102
|
+
|
|
103
|
+
TXN_FOREACH_DBI_ALL(txn, dbi) {
|
|
104
|
+
if (F_ISSET(txn->dbi_state[dbi], DBI_DIRTY | DBI_VALID) && txn->dbs[dbi].root != P_INVALID)
|
|
105
|
+
for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next)
|
|
106
|
+
if (mc != m0)
|
|
107
|
+
keep += spill_cursor_keep(txn, mc);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return keep;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/* Returns the spilling priority (0..255) for a dirty page:
|
|
114
|
+
* 0 = should be spilled;
|
|
115
|
+
* ...
|
|
116
|
+
* > 255 = must not be spilled. */
|
|
117
|
+
MDBX_NOTHROW_PURE_FUNCTION static unsigned spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) {
|
|
118
|
+
dpl_t *const dl = txn->tw.dirtylist;
|
|
119
|
+
const uint32_t age = dpl_age(txn, i);
|
|
120
|
+
const size_t npages = dpl_npages(dl, i);
|
|
121
|
+
const pgno_t pgno = dl->items[i].pgno;
|
|
122
|
+
if (age == 0) {
|
|
123
|
+
DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno);
|
|
124
|
+
return 256;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
page_t *const dp = dl->items[i].ptr;
|
|
128
|
+
if (dp->flags & (P_LOOSE | P_SPILLED)) {
|
|
129
|
+
DEBUG("skip %s %zu page %" PRIaPGNO, (dp->flags & P_LOOSE) ? "loose" : "parent-spilled", npages, pgno);
|
|
130
|
+
return 256;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/* Can't spill twice,
|
|
134
|
+
* make sure it's not already in a parent's spill list(s). */
|
|
135
|
+
MDBX_txn *parent = txn->parent;
|
|
136
|
+
if (parent && (parent->flags & MDBX_TXN_SPILLS)) {
|
|
137
|
+
do
|
|
138
|
+
if (spill_intersect(parent, pgno, npages)) {
|
|
139
|
+
DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno);
|
|
140
|
+
dp->flags |= P_SPILLED;
|
|
141
|
+
return 256;
|
|
142
|
+
}
|
|
143
|
+
while ((parent = parent->parent) != nullptr);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
tASSERT(txn, age * (uint64_t)reciprocal < UINT32_MAX);
|
|
147
|
+
unsigned prio = age * reciprocal >> 24;
|
|
148
|
+
tASSERT(txn, prio < 256);
|
|
149
|
+
if (likely(npages == 1))
|
|
150
|
+
return prio = 256 - prio;
|
|
151
|
+
|
|
152
|
+
/* make a large/overflow pages be likely to spill */
|
|
153
|
+
size_t factor = npages | npages >> 1;
|
|
154
|
+
factor |= factor >> 2;
|
|
155
|
+
factor |= factor >> 4;
|
|
156
|
+
factor |= factor >> 8;
|
|
157
|
+
factor |= factor >> 16;
|
|
158
|
+
factor = (size_t)prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
|
|
159
|
+
factor = (factor < 256) ? 255 - factor : 0;
|
|
160
|
+
tASSERT(txn, factor < 256 && factor < (256 - prio));
|
|
161
|
+
return prio = (unsigned)factor;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
static size_t spill_gate(const MDBX_env *env, intptr_t part, const size_t total) {
|
|
165
|
+
const intptr_t spill_min = env->options.spill_min_denominator
|
|
166
|
+
? (total + env->options.spill_min_denominator - 1) / env->options.spill_min_denominator
|
|
167
|
+
: 1;
|
|
168
|
+
const intptr_t spill_max =
|
|
169
|
+
total - (env->options.spill_max_denominator ? total / env->options.spill_max_denominator : 0);
|
|
170
|
+
part = (part < spill_max) ? part : spill_max;
|
|
171
|
+
part = (part > spill_min) ? part : spill_min;
|
|
172
|
+
eASSERT(env, part >= 0 && (size_t)part <= total);
|
|
173
|
+
return (size_t)part;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
__cold int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries,
|
|
177
|
+
const intptr_t wanna_spill_npages, const size_t need) {
|
|
178
|
+
tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
|
|
179
|
+
|
|
180
|
+
int rc = MDBX_SUCCESS;
|
|
181
|
+
if (unlikely(txn->tw.loose_count >=
|
|
182
|
+
(txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages)))
|
|
183
|
+
goto done;
|
|
184
|
+
|
|
185
|
+
const size_t dirty_entries = txn->tw.dirtylist ? (txn->tw.dirtylist->length - txn->tw.loose_count) : 1;
|
|
186
|
+
const size_t dirty_npages =
|
|
187
|
+
(txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages) -
|
|
188
|
+
txn->tw.loose_count;
|
|
189
|
+
const size_t need_spill_entries = spill_gate(txn->env, wanna_spill_entries, dirty_entries);
|
|
190
|
+
const size_t need_spill_npages = spill_gate(txn->env, wanna_spill_npages, dirty_npages);
|
|
191
|
+
|
|
192
|
+
const size_t need_spill = (need_spill_entries > need_spill_npages) ? need_spill_entries : need_spill_npages;
|
|
193
|
+
if (!need_spill)
|
|
194
|
+
goto done;
|
|
195
|
+
|
|
196
|
+
if (txn->flags & MDBX_WRITEMAP) {
|
|
197
|
+
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync", dirty_entries, dirty_npages);
|
|
198
|
+
const MDBX_env *env = txn->env;
|
|
199
|
+
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
|
200
|
+
rc = osal_msync(&txn->env->dxb_mmap, 0, pgno_align2os_bytes(env, txn->geo.first_unallocated), MDBX_SYNC_KICK);
|
|
201
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
202
|
+
goto bailout;
|
|
203
|
+
#if MDBX_AVOID_MSYNC
|
|
204
|
+
MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr);
|
|
205
|
+
tASSERT(txn, dpl_check(txn));
|
|
206
|
+
env->lck->unsynced_pages.weak += txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count;
|
|
207
|
+
dpl_clear(txn->tw.dirtylist);
|
|
208
|
+
txn->tw.dirtyroom = env->options.dp_limit - txn->tw.loose_count;
|
|
209
|
+
for (page_t *lp = txn->tw.loose_pages; lp != nullptr; lp = page_next(lp)) {
|
|
210
|
+
tASSERT(txn, lp->flags == P_LOOSE);
|
|
211
|
+
rc = dpl_append(txn, lp->pgno, lp, 1);
|
|
212
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
213
|
+
goto bailout;
|
|
214
|
+
MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
|
|
215
|
+
VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
|
|
216
|
+
}
|
|
217
|
+
tASSERT(txn, dpl_check(txn));
|
|
218
|
+
#else
|
|
219
|
+
tASSERT(txn, txn->tw.dirtylist == nullptr);
|
|
220
|
+
env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages;
|
|
221
|
+
txn->tw.writemap_spilled_npages += txn->tw.writemap_dirty_npages;
|
|
222
|
+
txn->tw.writemap_dirty_npages = 0;
|
|
223
|
+
#endif /* MDBX_AVOID_MSYNC */
|
|
224
|
+
goto done;
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "write", need_spill_entries, need_spill_npages);
|
|
228
|
+
MDBX_ANALYSIS_ASSUME(txn->tw.dirtylist != nullptr);
|
|
229
|
+
tASSERT(txn, txn->tw.dirtylist->length - txn->tw.loose_count >= 1);
|
|
230
|
+
tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >= need_spill_npages);
|
|
231
|
+
if (!txn->tw.spilled.list) {
|
|
232
|
+
txn->tw.spilled.least_removed = INT_MAX;
|
|
233
|
+
txn->tw.spilled.list = pnl_alloc(need_spill);
|
|
234
|
+
if (unlikely(!txn->tw.spilled.list)) {
|
|
235
|
+
rc = MDBX_ENOMEM;
|
|
236
|
+
bailout:
|
|
237
|
+
txn->flags |= MDBX_TXN_ERROR;
|
|
238
|
+
return rc;
|
|
239
|
+
}
|
|
240
|
+
} else {
|
|
241
|
+
/* purge deleted slots */
|
|
242
|
+
spill_purge(txn);
|
|
243
|
+
rc = pnl_reserve(&txn->tw.spilled.list, need_spill);
|
|
244
|
+
(void)rc /* ignore since the resulting list may be shorter
|
|
245
|
+
and pnl_append() will increase pnl on demand */
|
|
246
|
+
;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/* Сортируем чтобы запись на диск была полее последовательна */
|
|
250
|
+
dpl_t *const dl = dpl_sort(txn);
|
|
251
|
+
|
|
252
|
+
/* Preserve pages which may soon be dirtied again */
|
|
253
|
+
const size_t unspillable = spill_txn_keep(txn, m0);
|
|
254
|
+
if (unspillable + txn->tw.loose_count >= dl->length) {
|
|
255
|
+
#if xMDBX_DEBUG_SPILLING == 1 /* avoid false failure in debug mode */
|
|
256
|
+
if (likely(txn->tw.dirtyroom + txn->tw.loose_count >= need))
|
|
257
|
+
return MDBX_SUCCESS;
|
|
258
|
+
#endif /* xMDBX_DEBUG_SPILLING */
|
|
259
|
+
ERROR("all %zu dirty pages are unspillable since referenced "
|
|
260
|
+
"by a cursor(s), use fewer cursors or increase "
|
|
261
|
+
"MDBX_opt_txn_dp_limit",
|
|
262
|
+
unspillable);
|
|
263
|
+
goto done;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/* Подзадача: Вытолкнуть часть страниц на диск в соответствии с LRU,
|
|
267
|
+
* но при этом учесть важные поправки:
|
|
268
|
+
* - лучше выталкивать старые large/overflow страницы, так будет освобождено
|
|
269
|
+
* больше памяти, а также так как они (в текущем понимании) гораздо реже
|
|
270
|
+
* повторно изменяются;
|
|
271
|
+
* - при прочих равных лучше выталкивать смежные страницы, так будет
|
|
272
|
+
* меньше I/O операций;
|
|
273
|
+
* - желательно потратить на это меньше времени чем std::partial_sort_copy;
|
|
274
|
+
*
|
|
275
|
+
* Решение:
|
|
276
|
+
* - Квантуем весь диапазон lru-меток до 256 значений и задействуем один
|
|
277
|
+
* проход 8-битного radix-sort. В результате получаем 256 уровней
|
|
278
|
+
* "свежести", в том числе значение lru-метки, старее которой страницы
|
|
279
|
+
* должны быть выгружены;
|
|
280
|
+
* - Двигаемся последовательно в сторону увеличения номеров страниц
|
|
281
|
+
* и выталкиваем страницы с lru-меткой старее отсекающего значения,
|
|
282
|
+
* пока не вытолкнем достаточно;
|
|
283
|
+
* - Встречая страницы смежные с выталкиваемыми для уменьшения кол-ва
|
|
284
|
+
* I/O операций выталкиваем и их, если они попадают в первую половину
|
|
285
|
+
* между выталкиваемыми и самыми свежими lru-метками;
|
|
286
|
+
* - дополнительно при сортировке умышленно старим large/overflow страницы,
|
|
287
|
+
* тем самым повышая их шансы на выталкивание. */
|
|
288
|
+
|
|
289
|
+
/* get min/max of LRU-labels */
|
|
290
|
+
uint32_t age_max = 0;
|
|
291
|
+
for (size_t i = 1; i <= dl->length; ++i) {
|
|
292
|
+
const uint32_t age = dpl_age(txn, i);
|
|
293
|
+
age_max = (age_max >= age) ? age_max : age;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
VERBOSE("lru-head %u, age-max %u", txn->tw.dirtylru, age_max);
|
|
297
|
+
|
|
298
|
+
/* half of 8-bit radix-sort */
|
|
299
|
+
pgno_t radix_entries[256], radix_npages[256];
|
|
300
|
+
memset(&radix_entries, 0, sizeof(radix_entries));
|
|
301
|
+
memset(&radix_npages, 0, sizeof(radix_npages));
|
|
302
|
+
size_t spillable_entries = 0, spillable_npages = 0;
|
|
303
|
+
const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1);
|
|
304
|
+
for (size_t i = 1; i <= dl->length; ++i) {
|
|
305
|
+
const unsigned prio = spill_prio(txn, i, reciprocal);
|
|
306
|
+
size_t *const ptr = ptr_disp(dl->items[i].ptr, -(ptrdiff_t)sizeof(size_t));
|
|
307
|
+
TRACE("page %" PRIaPGNO ", lru %zu, is_multi %c, npages %u, age %u of %u, prio %u", dl->items[i].pgno, *ptr,
|
|
308
|
+
(dl->items[i].npages > 1) ? 'Y' : 'N', dpl_npages(dl, i), dpl_age(txn, i), age_max, prio);
|
|
309
|
+
if (prio < 256) {
|
|
310
|
+
radix_entries[prio] += 1;
|
|
311
|
+
spillable_entries += 1;
|
|
312
|
+
const pgno_t npages = dpl_npages(dl, i);
|
|
313
|
+
radix_npages[prio] += npages;
|
|
314
|
+
spillable_npages += npages;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
tASSERT(txn, spillable_npages >= spillable_entries);
|
|
319
|
+
pgno_t spilled_entries = 0, spilled_npages = 0;
|
|
320
|
+
if (likely(spillable_entries > 0)) {
|
|
321
|
+
size_t prio2spill = 0, prio2adjacent = 128, amount_entries = radix_entries[0], amount_npages = radix_npages[0];
|
|
322
|
+
for (size_t i = 1; i < 256; i++) {
|
|
323
|
+
if (amount_entries < need_spill_entries || amount_npages < need_spill_npages) {
|
|
324
|
+
prio2spill = i;
|
|
325
|
+
prio2adjacent = i + (257 - i) / 2;
|
|
326
|
+
amount_entries += radix_entries[i];
|
|
327
|
+
amount_npages += radix_npages[i];
|
|
328
|
+
} else if (amount_entries + amount_entries < spillable_entries + need_spill_entries
|
|
329
|
+
/* РАВНОЗНАЧНО: amount - need_spill < spillable - amount */
|
|
330
|
+
|| amount_npages + amount_npages < spillable_npages + need_spill_npages) {
|
|
331
|
+
prio2adjacent = i;
|
|
332
|
+
amount_entries += radix_entries[i];
|
|
333
|
+
amount_npages += radix_npages[i];
|
|
334
|
+
} else
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
VERBOSE("prio2spill %zu, prio2adjacent %zu, spillable %zu/%zu,"
|
|
339
|
+
" wanna-spill %zu/%zu, amount %zu/%zu",
|
|
340
|
+
prio2spill, prio2adjacent, spillable_entries, spillable_npages, need_spill_entries, need_spill_npages,
|
|
341
|
+
amount_entries, amount_npages);
|
|
342
|
+
tASSERT(txn, prio2spill < prio2adjacent && prio2adjacent <= 256);
|
|
343
|
+
|
|
344
|
+
iov_ctx_t ctx;
|
|
345
|
+
rc = iov_init(txn, &ctx, amount_entries, amount_npages,
|
|
346
|
+
#if defined(_WIN32) || defined(_WIN64)
|
|
347
|
+
txn->env->ioring.overlapped_fd ? txn->env->ioring.overlapped_fd :
|
|
348
|
+
#endif
|
|
349
|
+
txn->env->lazy_fd,
|
|
350
|
+
true);
|
|
351
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
352
|
+
goto bailout;
|
|
353
|
+
|
|
354
|
+
size_t r = 0, w = 0;
|
|
355
|
+
pgno_t last = 0;
|
|
356
|
+
while (r < dl->length && (spilled_entries < need_spill_entries || spilled_npages < need_spill_npages)) {
|
|
357
|
+
dl->items[++w] = dl->items[++r];
|
|
358
|
+
unsigned prio = spill_prio(txn, w, reciprocal);
|
|
359
|
+
if (prio > prio2spill && (prio >= prio2adjacent || last != dl->items[w].pgno))
|
|
360
|
+
continue;
|
|
361
|
+
|
|
362
|
+
const size_t e = w;
|
|
363
|
+
last = dpl_endpgno(dl, w);
|
|
364
|
+
while (--w && dpl_endpgno(dl, w) == dl->items[w + 1].pgno && spill_prio(txn, w, reciprocal) < prio2adjacent)
|
|
365
|
+
;
|
|
366
|
+
|
|
367
|
+
for (size_t i = w; ++i <= e;) {
|
|
368
|
+
const unsigned npages = dpl_npages(dl, i);
|
|
369
|
+
prio = spill_prio(txn, i, reciprocal);
|
|
370
|
+
DEBUG("%sspill[%zu] %u page %" PRIaPGNO " (age %d, prio %u)", (prio > prio2spill) ? "co-" : "", i, npages,
|
|
371
|
+
dl->items[i].pgno, dpl_age(txn, i), prio);
|
|
372
|
+
tASSERT(txn, prio < 256);
|
|
373
|
+
++spilled_entries;
|
|
374
|
+
spilled_npages += npages;
|
|
375
|
+
rc = spill_page(txn, &ctx, dl->items[i].ptr, npages);
|
|
376
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
377
|
+
goto failed;
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
VERBOSE("spilled entries %u, spilled npages %u", spilled_entries, spilled_npages);
|
|
382
|
+
tASSERT(txn, spillable_entries == 0 || spilled_entries > 0);
|
|
383
|
+
tASSERT(txn, spilled_npages >= spilled_entries);
|
|
384
|
+
|
|
385
|
+
failed:
|
|
386
|
+
while (r < dl->length)
|
|
387
|
+
dl->items[++w] = dl->items[++r];
|
|
388
|
+
tASSERT(txn, r - w == spilled_entries || rc != MDBX_SUCCESS);
|
|
389
|
+
|
|
390
|
+
dl->sorted = dpl_setlen(dl, w);
|
|
391
|
+
txn->tw.dirtyroom += spilled_entries;
|
|
392
|
+
txn->tw.dirtylist->pages_including_loose -= spilled_npages;
|
|
393
|
+
tASSERT(txn, dpl_check(txn));
|
|
394
|
+
|
|
395
|
+
if (!iov_empty(&ctx)) {
|
|
396
|
+
tASSERT(txn, rc == MDBX_SUCCESS);
|
|
397
|
+
rc = iov_write(&ctx);
|
|
398
|
+
}
|
|
399
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
400
|
+
goto bailout;
|
|
401
|
+
|
|
402
|
+
txn->env->lck->unsynced_pages.weak += spilled_npages;
|
|
403
|
+
pnl_sort(txn->tw.spilled.list, (size_t)txn->geo.first_unallocated << 1);
|
|
404
|
+
txn->flags |= MDBX_TXN_SPILLS;
|
|
405
|
+
NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room", spilled_entries, spilled_npages,
|
|
406
|
+
txn->tw.dirtyroom);
|
|
407
|
+
} else {
|
|
408
|
+
tASSERT(txn, rc == MDBX_SUCCESS);
|
|
409
|
+
for (size_t i = 1; i <= dl->length; ++i) {
|
|
410
|
+
page_t *dp = dl->items[i].ptr;
|
|
411
|
+
VERBOSE("unspillable[%zu]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u", i, dp->pgno, dpl_npages(dl, i),
|
|
412
|
+
dp->flags, dpl_age(txn, i), spill_prio(txn, i, reciprocal));
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
#if xMDBX_DEBUG_SPILLING == 2
|
|
417
|
+
if (txn->tw.loose_count + txn->tw.dirtyroom <= need / 2 + 1)
|
|
418
|
+
ERROR("dirty-list length: before %zu, after %zu, parent %zi, loose %zu; "
|
|
419
|
+
"needed %zu, spillable %zu; "
|
|
420
|
+
"spilled %u dirty-entries, now have %zu dirty-room",
|
|
421
|
+
dl->length + spilled_entries, dl->length,
|
|
422
|
+
(txn->parent && txn->parent->tw.dirtylist) ? (intptr_t)txn->parent->tw.dirtylist->length : -1,
|
|
423
|
+
txn->tw.loose_count, need, spillable_entries, spilled_entries, txn->tw.dirtyroom);
|
|
424
|
+
ENSURE(txn->env, txn->tw.loose_count + txn->tw.dirtyroom > need / 2);
|
|
425
|
+
#endif /* xMDBX_DEBUG_SPILLING */
|
|
426
|
+
|
|
427
|
+
done:
|
|
428
|
+
return likely(txn->tw.dirtyroom + txn->tw.loose_count > ((need > CURSOR_STACK_SIZE) ? CURSOR_STACK_SIZE : need))
|
|
429
|
+
? MDBX_SUCCESS
|
|
430
|
+
: MDBX_TXN_FULL;
|
|
431
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "essentials.h"
|
|
7
|
+
|
|
8
|
+
MDBX_INTERNAL void spill_remove(MDBX_txn *txn, size_t idx, size_t npages);
|
|
9
|
+
MDBX_INTERNAL pnl_t spill_purge(MDBX_txn *txn);
|
|
10
|
+
MDBX_INTERNAL int spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries,
|
|
11
|
+
const intptr_t wanna_spill_npages, const size_t need);
|
|
12
|
+
/*----------------------------------------------------------------------------*/
|
|
13
|
+
|
|
14
|
+
static inline size_t spill_search(const MDBX_txn *txn, pgno_t pgno) {
|
|
15
|
+
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
|
16
|
+
const pnl_t pnl = txn->tw.spilled.list;
|
|
17
|
+
if (likely(!pnl))
|
|
18
|
+
return 0;
|
|
19
|
+
pgno <<= 1;
|
|
20
|
+
size_t n = pnl_search(pnl, pgno, (size_t)MAX_PAGENO + MAX_PAGENO + 1);
|
|
21
|
+
return (n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] == pgno) ? n : 0;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
static inline bool spill_intersect(const MDBX_txn *txn, pgno_t pgno, size_t npages) {
|
|
25
|
+
const pnl_t pnl = txn->tw.spilled.list;
|
|
26
|
+
if (likely(!pnl))
|
|
27
|
+
return false;
|
|
28
|
+
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
|
29
|
+
if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
|
|
30
|
+
DEBUG_EXTRA("PNL len %zu [", len);
|
|
31
|
+
for (size_t i = 1; i <= len; ++i)
|
|
32
|
+
DEBUG_EXTRA_PRINT(" %li", (pnl[i] & 1) ? -(long)(pnl[i] >> 1) : (long)(pnl[i] >> 1));
|
|
33
|
+
DEBUG_EXTRA_PRINT("%s\n", "]");
|
|
34
|
+
}
|
|
35
|
+
const pgno_t spilled_range_begin = pgno << 1;
|
|
36
|
+
const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1;
|
|
37
|
+
#if MDBX_PNL_ASCENDING
|
|
38
|
+
const size_t n = pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1);
|
|
39
|
+
tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_begin <= pnl[n]));
|
|
40
|
+
const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] <= spilled_range_last;
|
|
41
|
+
#else
|
|
42
|
+
const size_t n = pnl_search(pnl, spilled_range_last, (size_t)MAX_PAGENO + MAX_PAGENO + 1);
|
|
43
|
+
tASSERT(txn, n && (n == MDBX_PNL_GETSIZE(pnl) + 1 || spilled_range_last >= pnl[n]));
|
|
44
|
+
const bool rc = n <= MDBX_PNL_GETSIZE(pnl) && pnl[n] >= spilled_range_begin;
|
|
45
|
+
#endif
|
|
46
|
+
if (ASSERT_ENABLED()) {
|
|
47
|
+
bool check = false;
|
|
48
|
+
for (size_t i = 0; i < npages; ++i)
|
|
49
|
+
check |= spill_search(txn, (pgno_t)(pgno + i)) != 0;
|
|
50
|
+
tASSERT(txn, check == rc);
|
|
51
|
+
}
|
|
52
|
+
return rc;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
static inline int txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0, const size_t need) {
|
|
56
|
+
tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
|
|
57
|
+
tASSERT(txn, !m0 || cursor_is_tracked(m0));
|
|
58
|
+
|
|
59
|
+
const intptr_t wanna_spill_entries = txn->tw.dirtylist ? (need - txn->tw.dirtyroom - txn->tw.loose_count) : 0;
|
|
60
|
+
const intptr_t wanna_spill_npages =
|
|
61
|
+
need + (txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose : txn->tw.writemap_dirty_npages) -
|
|
62
|
+
txn->tw.loose_count - txn->env->options.dp_limit;
|
|
63
|
+
|
|
64
|
+
/* production mode */
|
|
65
|
+
if (likely(wanna_spill_npages < 1 && wanna_spill_entries < 1)
|
|
66
|
+
#if xMDBX_DEBUG_SPILLING == 1
|
|
67
|
+
/* debug mode: always try to spill if xMDBX_DEBUG_SPILLING == 1 */
|
|
68
|
+
&& txn->txnid % 23 > 11
|
|
69
|
+
#endif
|
|
70
|
+
)
|
|
71
|
+
return MDBX_SUCCESS;
|
|
72
|
+
|
|
73
|
+
return spill_slowpath(txn, m0, wanna_spill_entries, wanna_spill_npages, need);
|
|
74
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#include "internals.h"
|
|
5
|
+
|
|
6
|
+
int tbl_setup(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *const db) {
|
|
7
|
+
osal_memory_fence(mo_AcquireRelease, false);
|
|
8
|
+
|
|
9
|
+
if (unlikely(!check_table_flags(db->flags))) {
|
|
10
|
+
ERROR("incompatible or invalid db.flags (0x%x) ", db->flags);
|
|
11
|
+
return MDBX_INCOMPATIBLE;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
size_t v_lmin = valsize_min(db->flags);
|
|
15
|
+
size_t v_lmax = env_valsize_max(env, db->flags);
|
|
16
|
+
if ((db->flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->dupfix_size) {
|
|
17
|
+
if (!MDBX_DISABLE_VALIDATION && unlikely(db->dupfix_size < v_lmin || db->dupfix_size > v_lmax)) {
|
|
18
|
+
ERROR("db.dupfix_size (%u) <> min/max value-length (%zu/%zu)", db->dupfix_size, v_lmin, v_lmax);
|
|
19
|
+
return MDBX_CORRUPTED;
|
|
20
|
+
}
|
|
21
|
+
v_lmin = v_lmax = db->dupfix_size;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
kvx->clc.k.lmin = keysize_min(db->flags);
|
|
25
|
+
kvx->clc.k.lmax = env_keysize_max(env, db->flags);
|
|
26
|
+
if (unlikely(!kvx->clc.k.cmp)) {
|
|
27
|
+
kvx->clc.v.cmp = builtin_datacmp(db->flags);
|
|
28
|
+
kvx->clc.k.cmp = builtin_keycmp(db->flags);
|
|
29
|
+
}
|
|
30
|
+
kvx->clc.v.lmin = v_lmin;
|
|
31
|
+
osal_memory_fence(mo_Relaxed, true);
|
|
32
|
+
kvx->clc.v.lmax = v_lmax;
|
|
33
|
+
osal_memory_fence(mo_AcquireRelease, true);
|
|
34
|
+
|
|
35
|
+
eASSERT(env, kvx->clc.k.lmax >= kvx->clc.k.lmin);
|
|
36
|
+
eASSERT(env, kvx->clc.v.lmax >= kvx->clc.v.lmin);
|
|
37
|
+
return MDBX_SUCCESS;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
int tbl_fetch(MDBX_txn *txn, size_t dbi) {
|
|
41
|
+
cursor_couple_t couple;
|
|
42
|
+
int rc = cursor_init(&couple.outer, txn, MAIN_DBI);
|
|
43
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
44
|
+
return rc;
|
|
45
|
+
|
|
46
|
+
kvx_t *const kvx = &txn->env->kvs[dbi];
|
|
47
|
+
rc = tree_search(&couple.outer, &kvx->name, 0);
|
|
48
|
+
if (unlikely(rc != MDBX_SUCCESS)) {
|
|
49
|
+
bailout:
|
|
50
|
+
NOTICE("dbi %zu refs to inaccessible table `%.*s` for txn %" PRIaTXN " (err %d)", dbi, (int)kvx->name.iov_len,
|
|
51
|
+
(const char *)kvx->name.iov_base, txn->txnid, rc);
|
|
52
|
+
return (rc == MDBX_NOTFOUND) ? MDBX_BAD_DBI : rc;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
MDBX_val data;
|
|
56
|
+
struct node_search_result nsr = node_search(&couple.outer, &kvx->name);
|
|
57
|
+
if (unlikely(!nsr.exact)) {
|
|
58
|
+
rc = MDBX_NOTFOUND;
|
|
59
|
+
goto bailout;
|
|
60
|
+
}
|
|
61
|
+
if (unlikely((node_flags(nsr.node) & (N_DUP | N_TREE)) != N_TREE)) {
|
|
62
|
+
NOTICE("dbi %zu refs to not a named table `%.*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len,
|
|
63
|
+
(const char *)kvx->name.iov_base, txn->txnid, "wrong flags");
|
|
64
|
+
return MDBX_INCOMPATIBLE; /* not a named DB */
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
rc = node_read(&couple.outer, nsr.node, &data, couple.outer.pg[couple.outer.top]);
|
|
68
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
69
|
+
return rc;
|
|
70
|
+
|
|
71
|
+
if (unlikely(data.iov_len != sizeof(tree_t))) {
|
|
72
|
+
NOTICE("dbi %zu refs to not a named table `%.*s` for txn %" PRIaTXN " (%s)", dbi, (int)kvx->name.iov_len,
|
|
73
|
+
(const char *)kvx->name.iov_base, txn->txnid, "wrong rec-size");
|
|
74
|
+
return MDBX_INCOMPATIBLE; /* not a named DB */
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
uint16_t flags = UNALIGNED_PEEK_16(data.iov_base, tree_t, flags);
|
|
78
|
+
/* The txn may not know this DBI, or another process may
|
|
79
|
+
* have dropped and recreated the DB with other flags. */
|
|
80
|
+
tree_t *const db = &txn->dbs[dbi];
|
|
81
|
+
if (unlikely((db->flags & DB_PERSISTENT_FLAGS) != flags)) {
|
|
82
|
+
NOTICE("dbi %zu refs to the re-created table `%.*s` for txn %" PRIaTXN
|
|
83
|
+
" with different flags (present 0x%X != wanna 0x%X)",
|
|
84
|
+
dbi, (int)kvx->name.iov_len, (const char *)kvx->name.iov_base, txn->txnid, db->flags & DB_PERSISTENT_FLAGS,
|
|
85
|
+
flags);
|
|
86
|
+
return MDBX_INCOMPATIBLE;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
memcpy(db, data.iov_base, sizeof(tree_t));
|
|
90
|
+
#if !MDBX_DISABLE_VALIDATION
|
|
91
|
+
const txnid_t pp_txnid = couple.outer.pg[couple.outer.top]->txnid;
|
|
92
|
+
tASSERT(txn, txn->front_txnid >= pp_txnid);
|
|
93
|
+
if (unlikely(db->mod_txnid > pp_txnid)) {
|
|
94
|
+
ERROR("db.mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN ")", db->mod_txnid, pp_txnid);
|
|
95
|
+
return MDBX_CORRUPTED;
|
|
96
|
+
}
|
|
97
|
+
#endif /* !MDBX_DISABLE_VALIDATION */
|
|
98
|
+
rc = tbl_setup_ifneed(txn->env, kvx, db);
|
|
99
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
|
100
|
+
return rc;
|
|
101
|
+
|
|
102
|
+
if (unlikely(dbi_changed(txn, dbi)))
|
|
103
|
+
return MDBX_BAD_DBI;
|
|
104
|
+
|
|
105
|
+
txn->dbi_state[dbi] &= ~DBI_STALE;
|
|
106
|
+
return MDBX_SUCCESS;
|
|
107
|
+
}
|