mdbxmou 0.1.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +32 -0
- package/.github/workflows/publish.yml +27 -0
- package/.gitmodules +3 -0
- package/CMakeLists.txt +53 -0
- package/LICENSE +201 -0
- package/README.md +639 -0
- package/build.js +11 -0
- package/deps/libmdbx/.clang-format +3 -0
- package/deps/libmdbx/.cmake-format.yaml +3 -0
- package/deps/libmdbx/.le.ini +40 -0
- package/deps/libmdbx/CMakeLists.txt +1269 -0
- package/deps/libmdbx/COPYRIGHT +159 -0
- package/deps/libmdbx/ChangeLog.md +2786 -0
- package/deps/libmdbx/GNUmakefile +950 -0
- package/deps/libmdbx/LICENSE +177 -0
- package/deps/libmdbx/Makefile +16 -0
- package/deps/libmdbx/NOTICE +39 -0
- package/deps/libmdbx/README.md +863 -0
- package/deps/libmdbx/TODO.md +43 -0
- package/deps/libmdbx/cmake/compiler.cmake +1221 -0
- package/deps/libmdbx/cmake/profile.cmake +58 -0
- package/deps/libmdbx/cmake/utils.cmake +524 -0
- package/deps/libmdbx/conanfile.py +323 -0
- package/deps/libmdbx/docs/Doxyfile.in +2734 -0
- package/deps/libmdbx/docs/_preface.md +47 -0
- package/deps/libmdbx/docs/_restrictions.md +248 -0
- package/deps/libmdbx/docs/_starting.md +245 -0
- package/deps/libmdbx/docs/_toc.md +34 -0
- package/deps/libmdbx/docs/header.html +96 -0
- package/deps/libmdbx/example/CMakeLists.txt +6 -0
- package/deps/libmdbx/example/README.md +1 -0
- package/deps/libmdbx/example/example-mdbx.c +154 -0
- package/deps/libmdbx/example/sample-bdb.txt +77 -0
- package/deps/libmdbx/mdbx.h +6655 -0
- package/deps/libmdbx/mdbx.h++ +6428 -0
- package/deps/libmdbx/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +173 -0
- package/deps/libmdbx/src/alloy.c +54 -0
- package/deps/libmdbx/src/api-cold.c +543 -0
- package/deps/libmdbx/src/api-copy.c +912 -0
- package/deps/libmdbx/src/api-cursor.c +754 -0
- package/deps/libmdbx/src/api-dbi.c +315 -0
- package/deps/libmdbx/src/api-env.c +1434 -0
- package/deps/libmdbx/src/api-extra.c +165 -0
- package/deps/libmdbx/src/api-key-transform.c +197 -0
- package/deps/libmdbx/src/api-misc.c +286 -0
- package/deps/libmdbx/src/api-opts.c +575 -0
- package/deps/libmdbx/src/api-range-estimate.c +365 -0
- package/deps/libmdbx/src/api-txn-data.c +454 -0
- package/deps/libmdbx/src/api-txn.c +921 -0
- package/deps/libmdbx/src/atomics-ops.h +364 -0
- package/deps/libmdbx/src/atomics-types.h +97 -0
- package/deps/libmdbx/src/audit.c +109 -0
- package/deps/libmdbx/src/bits.md +34 -0
- package/deps/libmdbx/src/chk.c +1796 -0
- package/deps/libmdbx/src/cogs.c +309 -0
- package/deps/libmdbx/src/cogs.h +506 -0
- package/deps/libmdbx/src/coherency.c +170 -0
- package/deps/libmdbx/src/config.h.in +88 -0
- package/deps/libmdbx/src/cursor.c +2396 -0
- package/deps/libmdbx/src/cursor.h +391 -0
- package/deps/libmdbx/src/dbi.c +717 -0
- package/deps/libmdbx/src/dbi.h +142 -0
- package/deps/libmdbx/src/debug_begin.h +36 -0
- package/deps/libmdbx/src/debug_end.h +15 -0
- package/deps/libmdbx/src/dpl.c +486 -0
- package/deps/libmdbx/src/dpl.h +134 -0
- package/deps/libmdbx/src/dxb.c +1335 -0
- package/deps/libmdbx/src/env.c +607 -0
- package/deps/libmdbx/src/essentials.h +125 -0
- package/deps/libmdbx/src/gc-get.c +1345 -0
- package/deps/libmdbx/src/gc-put.c +970 -0
- package/deps/libmdbx/src/gc.h +40 -0
- package/deps/libmdbx/src/global.c +474 -0
- package/deps/libmdbx/src/internals.h +585 -0
- package/deps/libmdbx/src/layout-dxb.h +288 -0
- package/deps/libmdbx/src/layout-lck.h +289 -0
- package/deps/libmdbx/src/lck-posix.c +859 -0
- package/deps/libmdbx/src/lck-windows.c +607 -0
- package/deps/libmdbx/src/lck.c +174 -0
- package/deps/libmdbx/src/lck.h +110 -0
- package/deps/libmdbx/src/logging_and_debug.c +250 -0
- package/deps/libmdbx/src/logging_and_debug.h +159 -0
- package/deps/libmdbx/src/man1/mdbx_chk.1 +106 -0
- package/deps/libmdbx/src/man1/mdbx_copy.1 +95 -0
- package/deps/libmdbx/src/man1/mdbx_drop.1 +48 -0
- package/deps/libmdbx/src/man1/mdbx_dump.1 +101 -0
- package/deps/libmdbx/src/man1/mdbx_load.1 +105 -0
- package/deps/libmdbx/src/man1/mdbx_stat.1 +86 -0
- package/deps/libmdbx/src/mdbx.c++ +1837 -0
- package/deps/libmdbx/src/meta.c +656 -0
- package/deps/libmdbx/src/meta.h +168 -0
- package/deps/libmdbx/src/mvcc-readers.c +414 -0
- package/deps/libmdbx/src/node.c +365 -0
- package/deps/libmdbx/src/node.h +102 -0
- package/deps/libmdbx/src/ntdll.def +1246 -0
- package/deps/libmdbx/src/options.h +534 -0
- package/deps/libmdbx/src/osal.c +3485 -0
- package/deps/libmdbx/src/osal.h +587 -0
- package/deps/libmdbx/src/page-get.c +483 -0
- package/deps/libmdbx/src/page-iov.c +185 -0
- package/deps/libmdbx/src/page-iov.h +34 -0
- package/deps/libmdbx/src/page-ops.c +744 -0
- package/deps/libmdbx/src/page-ops.h +142 -0
- package/deps/libmdbx/src/pnl.c +236 -0
- package/deps/libmdbx/src/pnl.h +146 -0
- package/deps/libmdbx/src/preface.h +990 -0
- package/deps/libmdbx/src/proto.h +105 -0
- package/deps/libmdbx/src/refund.c +212 -0
- package/deps/libmdbx/src/sort.h +484 -0
- package/deps/libmdbx/src/spill.c +431 -0
- package/deps/libmdbx/src/spill.h +74 -0
- package/deps/libmdbx/src/table.c +107 -0
- package/deps/libmdbx/src/tls.c +551 -0
- package/deps/libmdbx/src/tls.h +43 -0
- package/deps/libmdbx/src/tools/chk.c +673 -0
- package/deps/libmdbx/src/tools/copy.c +166 -0
- package/deps/libmdbx/src/tools/drop.c +199 -0
- package/deps/libmdbx/src/tools/dump.c +515 -0
- package/deps/libmdbx/src/tools/load.c +831 -0
- package/deps/libmdbx/src/tools/stat.c +516 -0
- package/deps/libmdbx/src/tools/wingetopt.c +87 -0
- package/deps/libmdbx/src/tools/wingetopt.h +30 -0
- package/deps/libmdbx/src/tree-ops.c +1554 -0
- package/deps/libmdbx/src/tree-search.c +140 -0
- package/deps/libmdbx/src/txl.c +99 -0
- package/deps/libmdbx/src/txl.h +26 -0
- package/deps/libmdbx/src/txn.c +1083 -0
- package/deps/libmdbx/src/unaligned.h +205 -0
- package/deps/libmdbx/src/utils.c +32 -0
- package/deps/libmdbx/src/utils.h +76 -0
- package/deps/libmdbx/src/version.c.in +44 -0
- package/deps/libmdbx/src/walk.c +290 -0
- package/deps/libmdbx/src/walk.h +20 -0
- package/deps/libmdbx/src/windows-import.c +152 -0
- package/deps/libmdbx/src/windows-import.h +128 -0
- package/deps/libmdbx/test/CMakeLists.txt +317 -0
- package/deps/libmdbx/test/append.c++ +237 -0
- package/deps/libmdbx/test/base.h++ +92 -0
- package/deps/libmdbx/test/battery-tmux.sh +64 -0
- package/deps/libmdbx/test/cases.c++ +118 -0
- package/deps/libmdbx/test/chrono.c++ +134 -0
- package/deps/libmdbx/test/chrono.h++ +85 -0
- package/deps/libmdbx/test/config.c++ +643 -0
- package/deps/libmdbx/test/config.h++ +334 -0
- package/deps/libmdbx/test/copy.c++ +62 -0
- package/deps/libmdbx/test/dead.c++ +39 -0
- package/deps/libmdbx/test/dump-load.sh +40 -0
- package/deps/libmdbx/test/extra/crunched_delete.c++ +409 -0
- package/deps/libmdbx/test/extra/cursor_closing.c++ +410 -0
- package/deps/libmdbx/test/extra/dbi.c++ +229 -0
- package/deps/libmdbx/test/extra/doubtless_positioning.c++ +253 -0
- package/deps/libmdbx/test/extra/dupfix_addodd.c +94 -0
- package/deps/libmdbx/test/extra/dupfix_multiple.c++ +311 -0
- package/deps/libmdbx/test/extra/early_close_dbi.c++ +137 -0
- package/deps/libmdbx/test/extra/hex_base64_base58.c++ +118 -0
- package/deps/libmdbx/test/extra/maindb_ordinal.c++ +61 -0
- package/deps/libmdbx/test/extra/open.c++ +96 -0
- package/deps/libmdbx/test/extra/pcrf/README.md +2 -0
- package/deps/libmdbx/test/extra/pcrf/pcrf_test.c +380 -0
- package/deps/libmdbx/test/extra/probe.c++ +10 -0
- package/deps/libmdbx/test/extra/txn.c++ +407 -0
- package/deps/libmdbx/test/extra/upsert_alldups.c +193 -0
- package/deps/libmdbx/test/fork.c++ +263 -0
- package/deps/libmdbx/test/hill.c++ +447 -0
- package/deps/libmdbx/test/jitter.c++ +197 -0
- package/deps/libmdbx/test/keygen.c++ +393 -0
- package/deps/libmdbx/test/keygen.h++ +130 -0
- package/deps/libmdbx/test/log.c++ +358 -0
- package/deps/libmdbx/test/log.h++ +91 -0
- package/deps/libmdbx/test/main.c++ +706 -0
- package/deps/libmdbx/test/nested.c++ +318 -0
- package/deps/libmdbx/test/osal-unix.c++ +647 -0
- package/deps/libmdbx/test/osal-windows.c++ +440 -0
- package/deps/libmdbx/test/osal.h++ +41 -0
- package/deps/libmdbx/test/stochastic.sh +690 -0
- package/deps/libmdbx/test/stub/LICENSE +24 -0
- package/deps/libmdbx/test/stub/README.md +8 -0
- package/deps/libmdbx/test/stub/pthread_barrier.c +104 -0
- package/deps/libmdbx/test/stub/pthread_barrier.h +77 -0
- package/deps/libmdbx/test/test.c++ +1551 -0
- package/deps/libmdbx/test/test.h++ +298 -0
- package/deps/libmdbx/test/tmux.conf +3 -0
- package/deps/libmdbx/test/try.c++ +30 -0
- package/deps/libmdbx/test/ttl.c++ +240 -0
- package/deps/libmdbx/test/utils.c++ +203 -0
- package/deps/libmdbx/test/utils.h++ +326 -0
- package/deps/libmdbx/test/valgrind_suppress.txt +536 -0
- package/lib/mdbx_evn_async.js +211 -0
- package/lib/mdbx_worker.js +195 -0
- package/lib/nativemou.js +6 -0
- package/package.json +38 -0
- package/src/async/envmou_close.cpp +34 -0
- package/src/async/envmou_close.hpp +32 -0
- package/src/async/envmou_copy_to.cpp +29 -0
- package/src/async/envmou_copy_to.hpp +38 -0
- package/src/async/envmou_keys.cpp +201 -0
- package/src/async/envmou_keys.hpp +50 -0
- package/src/async/envmou_open.cpp +38 -0
- package/src/async/envmou_open.hpp +33 -0
- package/src/async/envmou_query.cpp +167 -0
- package/src/async/envmou_query.hpp +53 -0
- package/src/dbimou.cpp +522 -0
- package/src/dbimou.hpp +82 -0
- package/src/env_arg0.hpp +24 -0
- package/src/envmou.cpp +445 -0
- package/src/envmou.hpp +116 -0
- package/src/modulemou.cpp +113 -0
- package/src/querymou.cpp +177 -0
- package/src/querymou.hpp +93 -0
- package/src/txnmou.cpp +254 -0
- package/src/txnmou.hpp +122 -0
- package/src/typemou.hpp +239 -0
- package/src/valuemou.hpp +194 -0
- package/test/async.js +67 -0
- package/test/e3.js +38 -0
- package/test/e4.js +89 -0
- package/test/e5.js +162 -0
- package/test/test-batch-ops.js +243 -0
- package/test/test-cursor-mode.js +84 -0
- package/test/test-multi-mode.js +87 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#pragma once
|
|
5
|
+
|
|
6
|
+
#include "essentials.h"
|
|
7
|
+
|
|
8
|
+
MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL pgno_t pv2pages(uint16_t pv);
|
|
9
|
+
|
|
10
|
+
MDBX_NOTHROW_CONST_FUNCTION MDBX_INTERNAL uint16_t pages2pv(size_t pages);
|
|
11
|
+
|
|
12
|
+
MDBX_MAYBE_UNUSED MDBX_INTERNAL bool pv2pages_verify(void);
|
|
13
|
+
|
|
14
|
+
/*------------------------------------------------------------------------------
|
|
15
|
+
* Nodes, Keys & Values length limitation factors:
|
|
16
|
+
*
|
|
17
|
+
* BRANCH_NODE_MAX
|
|
18
|
+
* Branch-page must contain at least two nodes, within each a key and a child
|
|
19
|
+
* page number. But page can't be split if it contains less that 4 keys,
|
|
20
|
+
* i.e. a page should not overflow before adding the fourth key. Therefore,
|
|
21
|
+
* at least 3 branch-node should fit in the single branch-page. Further, the
|
|
22
|
+
* first node of a branch-page doesn't contain a key, i.e. the first node
|
|
23
|
+
* is always require space just for itself. Thus:
|
|
24
|
+
* PAGESPACE = pagesize - page_hdr_len;
|
|
25
|
+
* BRANCH_NODE_MAX = even_floor(
|
|
26
|
+
* (PAGESPACE - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t));
|
|
27
|
+
* KEYLEN_MAX = BRANCH_NODE_MAX - node_hdr_len;
|
|
28
|
+
*
|
|
29
|
+
* LEAF_NODE_MAX
|
|
30
|
+
* Leaf-node must fit into single leaf-page, where a value could be placed on
|
|
31
|
+
* a large/overflow page. However, may require to insert a nearly page-sized
|
|
32
|
+
* node between two large nodes are already fill-up a page. In this case the
|
|
33
|
+
* page must be split to two if some pair of nodes fits on one page, or
|
|
34
|
+
* otherwise the page should be split to the THREE with a single node
|
|
35
|
+
* per each of ones. Such 1-into-3 page splitting is costly and complex since
|
|
36
|
+
* requires TWO insertion into the parent page, that could lead to split it
|
|
37
|
+
* and so on up to the root. Therefore double-splitting is avoided here and
|
|
38
|
+
* the maximum node size is half of a leaf page space:
|
|
39
|
+
* LEAF_NODE_MAX = even_floor(PAGESPACE / 2 - sizeof(indx_t));
|
|
40
|
+
* DATALEN_NO_OVERFLOW = LEAF_NODE_MAX - NODESIZE - KEYLEN_MAX;
|
|
41
|
+
*
|
|
42
|
+
* - Table-node must fit into one leaf-page:
|
|
43
|
+
* TABLE_NAME_MAX = LEAF_NODE_MAX - node_hdr_len - sizeof(tree_t);
|
|
44
|
+
*
|
|
45
|
+
* - Dupsort values itself are a keys in a dupsort-table and couldn't be longer
|
|
46
|
+
* than the KEYLEN_MAX. But dupsort node must not great than LEAF_NODE_MAX,
|
|
47
|
+
* since dupsort value couldn't be placed on a large/overflow page:
|
|
48
|
+
* DUPSORT_DATALEN_MAX = min(KEYLEN_MAX,
|
|
49
|
+
* max(DATALEN_NO_OVERFLOW, sizeof(tree_t));
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
#define PAGESPACE(pagesize) ((pagesize) - PAGEHDRSZ)
|
|
53
|
+
|
|
54
|
+
#define BRANCH_NODE_MAX(pagesize) \
|
|
55
|
+
(EVEN_FLOOR((PAGESPACE(pagesize) - sizeof(indx_t) - NODESIZE) / (3 - 1) - sizeof(indx_t)))
|
|
56
|
+
|
|
57
|
+
#define LEAF_NODE_MAX(pagesize) (EVEN_FLOOR(PAGESPACE(pagesize) / 2) - sizeof(indx_t))
|
|
58
|
+
|
|
59
|
+
#define MAX_GC1OVPAGE(pagesize) (PAGESPACE(pagesize) / sizeof(pgno_t) - 1)
|
|
60
|
+
|
|
61
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline size_t keysize_max(size_t pagesize, MDBX_db_flags_t flags) {
|
|
62
|
+
assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && is_powerof2(pagesize));
|
|
63
|
+
STATIC_ASSERT(BRANCH_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE >= 8);
|
|
64
|
+
if (flags & MDBX_INTEGERKEY)
|
|
65
|
+
return 8 /* sizeof(uint64_t) */;
|
|
66
|
+
|
|
67
|
+
const intptr_t max_branch_key = BRANCH_NODE_MAX(pagesize) - NODESIZE;
|
|
68
|
+
STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE -
|
|
69
|
+
/* sizeof(uint64) as a key */ 8 >
|
|
70
|
+
sizeof(tree_t));
|
|
71
|
+
if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) {
|
|
72
|
+
const intptr_t max_dupsort_leaf_key = LEAF_NODE_MAX(pagesize) - NODESIZE - sizeof(tree_t);
|
|
73
|
+
return (max_branch_key < max_dupsort_leaf_key) ? max_branch_key : max_dupsort_leaf_key;
|
|
74
|
+
}
|
|
75
|
+
return max_branch_key;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline size_t env_keysize_max(const MDBX_env *env, MDBX_db_flags_t flags) {
|
|
79
|
+
size_t size_max;
|
|
80
|
+
if (flags & MDBX_INTEGERKEY)
|
|
81
|
+
size_max = 8 /* sizeof(uint64_t) */;
|
|
82
|
+
else {
|
|
83
|
+
const intptr_t max_branch_key = env->branch_nodemax - NODESIZE;
|
|
84
|
+
STATIC_ASSERT(LEAF_NODE_MAX(MDBX_MIN_PAGESIZE) - NODESIZE -
|
|
85
|
+
/* sizeof(uint64) as a key */ 8 >
|
|
86
|
+
sizeof(tree_t));
|
|
87
|
+
if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP | MDBX_INTEGERDUP)) {
|
|
88
|
+
const intptr_t max_dupsort_leaf_key = env->leaf_nodemax - NODESIZE - sizeof(tree_t);
|
|
89
|
+
size_max = (max_branch_key < max_dupsort_leaf_key) ? max_branch_key : max_dupsort_leaf_key;
|
|
90
|
+
} else
|
|
91
|
+
size_max = max_branch_key;
|
|
92
|
+
}
|
|
93
|
+
eASSERT(env, size_max == keysize_max(env->ps, flags));
|
|
94
|
+
return size_max;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline size_t keysize_min(MDBX_db_flags_t flags) {
|
|
98
|
+
return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline size_t valsize_min(MDBX_db_flags_t flags) {
|
|
102
|
+
if (flags & MDBX_INTEGERDUP)
|
|
103
|
+
return 4 /* sizeof(uint32_t) */;
|
|
104
|
+
else if (flags & MDBX_DUPFIXED)
|
|
105
|
+
return sizeof(indx_t);
|
|
106
|
+
else
|
|
107
|
+
return 0;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline size_t valsize_max(size_t pagesize, MDBX_db_flags_t flags) {
|
|
111
|
+
assert(pagesize >= MDBX_MIN_PAGESIZE && pagesize <= MDBX_MAX_PAGESIZE && is_powerof2(pagesize));
|
|
112
|
+
|
|
113
|
+
if (flags & MDBX_INTEGERDUP)
|
|
114
|
+
return 8 /* sizeof(uint64_t) */;
|
|
115
|
+
|
|
116
|
+
if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP))
|
|
117
|
+
return keysize_max(pagesize, 0);
|
|
118
|
+
|
|
119
|
+
const unsigned page_ln2 = log2n_powerof2(pagesize);
|
|
120
|
+
const size_t hard = 0x7FF00000ul;
|
|
121
|
+
const size_t hard_pages = hard >> page_ln2;
|
|
122
|
+
STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO);
|
|
123
|
+
const size_t pages_limit = PAGELIST_LIMIT / 4;
|
|
124
|
+
const size_t limit = (hard_pages < pages_limit) ? hard : (pages_limit << page_ln2);
|
|
125
|
+
return (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline size_t env_valsize_max(const MDBX_env *env, MDBX_db_flags_t flags) {
|
|
129
|
+
size_t size_max;
|
|
130
|
+
if (flags & MDBX_INTEGERDUP)
|
|
131
|
+
size_max = 8 /* sizeof(uint64_t) */;
|
|
132
|
+
else if (flags & (MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP))
|
|
133
|
+
size_max = env_keysize_max(env, 0);
|
|
134
|
+
else {
|
|
135
|
+
const size_t hard = 0x7FF00000ul;
|
|
136
|
+
const size_t hard_pages = hard >> env->ps2ln;
|
|
137
|
+
STATIC_ASSERT(PAGELIST_LIMIT <= MAX_PAGENO);
|
|
138
|
+
const size_t pages_limit = PAGELIST_LIMIT / 4;
|
|
139
|
+
const size_t limit = (hard_pages < pages_limit) ? hard : (pages_limit << env->ps2ln);
|
|
140
|
+
size_max = (limit < MAX_MAPSIZE / 2) ? limit : MAX_MAPSIZE / 2;
|
|
141
|
+
}
|
|
142
|
+
eASSERT(env, size_max == valsize_max(env->ps, flags));
|
|
143
|
+
return size_max;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/*----------------------------------------------------------------------------*/
|
|
147
|
+
|
|
148
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t leaf_size(const MDBX_env *env, const MDBX_val *key,
|
|
149
|
+
const MDBX_val *data) {
|
|
150
|
+
size_t node_bytes = node_size(key, data);
|
|
151
|
+
if (node_bytes > env->leaf_nodemax)
|
|
152
|
+
/* put on large/overflow page */
|
|
153
|
+
node_bytes = node_size_len(key->iov_len, 0) + sizeof(pgno_t);
|
|
154
|
+
|
|
155
|
+
return node_bytes + sizeof(indx_t);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t branch_size(const MDBX_env *env, const MDBX_val *key) {
|
|
159
|
+
/* Size of a node in a branch page with a given key.
|
|
160
|
+
* This is just the node header plus the key, there is no data. */
|
|
161
|
+
size_t node_bytes = node_size(key, nullptr);
|
|
162
|
+
if (unlikely(node_bytes > env->branch_nodemax)) {
|
|
163
|
+
/* put on large/overflow page, not implemented */
|
|
164
|
+
mdbx_panic("node_size(key) %zu > %u branch_nodemax", node_bytes, env->branch_nodemax);
|
|
165
|
+
node_bytes = node_size(key, nullptr) + sizeof(pgno_t);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return node_bytes + sizeof(indx_t);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
MDBX_NOTHROW_CONST_FUNCTION static inline uint16_t flags_db2sub(uint16_t db_flags) {
|
|
172
|
+
uint16_t sub_flags = db_flags & MDBX_DUPFIXED;
|
|
173
|
+
|
|
174
|
+
/* MDBX_INTEGERDUP => MDBX_INTEGERKEY */
|
|
175
|
+
#define SHIFT_INTEGERDUP_TO_INTEGERKEY 2
|
|
176
|
+
STATIC_ASSERT((MDBX_INTEGERDUP >> SHIFT_INTEGERDUP_TO_INTEGERKEY) == MDBX_INTEGERKEY);
|
|
177
|
+
sub_flags |= (db_flags & MDBX_INTEGERDUP) >> SHIFT_INTEGERDUP_TO_INTEGERKEY;
|
|
178
|
+
|
|
179
|
+
/* MDBX_REVERSEDUP => MDBX_REVERSEKEY */
|
|
180
|
+
#define SHIFT_REVERSEDUP_TO_REVERSEKEY 5
|
|
181
|
+
STATIC_ASSERT((MDBX_REVERSEDUP >> SHIFT_REVERSEDUP_TO_REVERSEKEY) == MDBX_REVERSEKEY);
|
|
182
|
+
sub_flags |= (db_flags & MDBX_REVERSEDUP) >> SHIFT_REVERSEDUP_TO_REVERSEKEY;
|
|
183
|
+
|
|
184
|
+
return sub_flags;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
static inline bool check_table_flags(unsigned flags) {
|
|
188
|
+
switch (flags & ~(MDBX_REVERSEKEY | MDBX_INTEGERKEY)) {
|
|
189
|
+
default:
|
|
190
|
+
NOTICE("invalid db-flags 0x%x", flags);
|
|
191
|
+
return false;
|
|
192
|
+
case MDBX_DUPSORT:
|
|
193
|
+
case MDBX_DUPSORT | MDBX_REVERSEDUP:
|
|
194
|
+
case MDBX_DUPSORT | MDBX_DUPFIXED:
|
|
195
|
+
case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP:
|
|
196
|
+
case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP:
|
|
197
|
+
case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP:
|
|
198
|
+
case MDBX_DB_DEFAULTS:
|
|
199
|
+
return (flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != (MDBX_REVERSEKEY | MDBX_INTEGERKEY);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
static inline int tbl_setup_ifneed(const MDBX_env *env, volatile kvx_t *const kvx, const tree_t *const db) {
|
|
204
|
+
return likely(kvx->clc.v.lmax) ? MDBX_SUCCESS : tbl_setup(env, kvx, db);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/*----------------------------------------------------------------------------*/
|
|
208
|
+
|
|
209
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t pgno2bytes(const MDBX_env *env, size_t pgno) {
|
|
210
|
+
eASSERT(env, (1u << env->ps2ln) == env->ps);
|
|
211
|
+
return ((size_t)pgno) << env->ps2ln;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline page_t *pgno2page(const MDBX_env *env, size_t pgno) {
|
|
215
|
+
return ptr_disp(env->dxb_mmap.base, pgno2bytes(env, pgno));
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t bytes2pgno(const MDBX_env *env, size_t bytes) {
|
|
219
|
+
eASSERT(env, (env->ps >> env->ps2ln) == 1);
|
|
220
|
+
return (pgno_t)(bytes >> env->ps2ln);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes);
|
|
224
|
+
|
|
225
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL size_t pgno_align2os_bytes(const MDBX_env *env, size_t pgno);
|
|
226
|
+
|
|
227
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL pgno_t pgno_align2os_pgno(const MDBX_env *env, size_t pgno);
|
|
228
|
+
|
|
229
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t largechunk_npages(const MDBX_env *env, size_t bytes) {
|
|
230
|
+
return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val get_key(const node_t *node) {
|
|
234
|
+
MDBX_val key;
|
|
235
|
+
key.iov_len = node_ks(node);
|
|
236
|
+
key.iov_base = node_key(node);
|
|
237
|
+
return key;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
static inline void get_key_optional(const node_t *node, MDBX_val *keyptr /* __may_null */) {
|
|
241
|
+
if (keyptr)
|
|
242
|
+
*keyptr = get_key(node);
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline void *page_data(const page_t *mp) { return ptr_disp(mp, PAGEHDRSZ); }
|
|
246
|
+
|
|
247
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline const page_t *data_page(const void *data) {
|
|
248
|
+
return container_of(data, page_t, entries);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline meta_t *page_meta(page_t *mp) { return (meta_t *)page_data(mp); }
|
|
252
|
+
|
|
253
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_numkeys(const page_t *mp) { return mp->lower >> 1; }
|
|
254
|
+
|
|
255
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_room(const page_t *mp) { return mp->upper - mp->lower; }
|
|
256
|
+
|
|
257
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_space(const MDBX_env *env) {
|
|
258
|
+
STATIC_ASSERT(PAGEHDRSZ % 2 == 0);
|
|
259
|
+
return env->ps - PAGEHDRSZ;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline size_t page_used(const MDBX_env *env, const page_t *mp) {
|
|
263
|
+
return page_space(env) - page_room(mp);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/* The percentage of space used in the page, in a percents. */
|
|
267
|
+
MDBX_MAYBE_UNUSED MDBX_NOTHROW_PURE_FUNCTION static inline unsigned page_fill_percentum_x10(const MDBX_env *env,
|
|
268
|
+
const page_t *mp) {
|
|
269
|
+
const size_t space = page_space(env);
|
|
270
|
+
return (unsigned)((page_used(env, mp) * 1000 + space / 2) / space);
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline node_t *page_node(const page_t *mp, size_t i) {
|
|
274
|
+
assert(page_type_compat(mp) == P_LEAF || page_type(mp) == P_BRANCH);
|
|
275
|
+
assert(page_numkeys(mp) > i);
|
|
276
|
+
assert(mp->entries[i] % 2 == 0);
|
|
277
|
+
return ptr_disp(mp, mp->entries[i] + PAGEHDRSZ);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline void *page_dupfix_ptr(const page_t *mp, size_t i, size_t keysize) {
|
|
281
|
+
assert(page_type_compat(mp) == (P_LEAF | P_DUPFIX) && i == (indx_t)i && mp->dupfix_ksize == keysize);
|
|
282
|
+
(void)keysize;
|
|
283
|
+
return ptr_disp(mp, PAGEHDRSZ + mp->dupfix_ksize * (indx_t)i);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline MDBX_val page_dupfix_key(const page_t *mp, size_t i, size_t keysize) {
|
|
287
|
+
MDBX_val r;
|
|
288
|
+
r.iov_base = page_dupfix_ptr(mp, i, keysize);
|
|
289
|
+
r.iov_len = mp->dupfix_ksize;
|
|
290
|
+
return r;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/*----------------------------------------------------------------------------*/
|
|
294
|
+
|
|
295
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_int_unaligned(const MDBX_val *a, const MDBX_val *b);
|
|
296
|
+
|
|
297
|
+
#if MDBX_UNALIGNED_OK < 2 || (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG))
|
|
298
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int
|
|
299
|
+
/* Compare two items pointing at 2-byte aligned unsigned int's. */
|
|
300
|
+
cmp_int_align2(const MDBX_val *a, const MDBX_val *b);
|
|
301
|
+
#else
|
|
302
|
+
#define cmp_int_align2 cmp_int_unaligned
|
|
303
|
+
#endif /* !MDBX_UNALIGNED_OK || debug */
|
|
304
|
+
|
|
305
|
+
#if MDBX_UNALIGNED_OK < 4 || (MDBX_DEBUG || MDBX_FORCE_ASSERTIONS || !defined(NDEBUG))
|
|
306
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int
|
|
307
|
+
/* Compare two items pointing at 4-byte aligned unsigned int's. */
|
|
308
|
+
cmp_int_align4(const MDBX_val *a, const MDBX_val *b);
|
|
309
|
+
#else
|
|
310
|
+
#define cmp_int_align4 cmp_int_unaligned
|
|
311
|
+
#endif /* !MDBX_UNALIGNED_OK || debug */
|
|
312
|
+
|
|
313
|
+
/* Compare two items lexically */
|
|
314
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lexical(const MDBX_val *a, const MDBX_val *b);
|
|
315
|
+
|
|
316
|
+
/* Compare two items in reverse byte order */
|
|
317
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_reverse(const MDBX_val *a, const MDBX_val *b);
|
|
318
|
+
|
|
319
|
+
/* Fast non-lexically comparator */
|
|
320
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_lenfast(const MDBX_val *a, const MDBX_val *b);
|
|
321
|
+
|
|
322
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL bool eq_fast_slowpath(const uint8_t *a, const uint8_t *b, size_t l);
|
|
323
|
+
|
|
324
|
+
MDBX_NOTHROW_PURE_FUNCTION static inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) {
|
|
325
|
+
return unlikely(a->iov_len == b->iov_len) && eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b);
|
|
329
|
+
|
|
330
|
+
MDBX_NOTHROW_PURE_FUNCTION MDBX_INTERNAL int cmp_equal_or_wrong(const MDBX_val *a, const MDBX_val *b);
|
|
331
|
+
|
|
332
|
+
static inline MDBX_cmp_func *builtin_keycmp(MDBX_db_flags_t flags) {
|
|
333
|
+
return (flags & MDBX_REVERSEKEY) ? cmp_reverse : (flags & MDBX_INTEGERKEY) ? cmp_int_align2 : cmp_lexical;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
static inline MDBX_cmp_func *builtin_datacmp(MDBX_db_flags_t flags) {
|
|
337
|
+
return !(flags & MDBX_DUPSORT)
|
|
338
|
+
? cmp_lenfast
|
|
339
|
+
: ((flags & MDBX_INTEGERDUP) ? cmp_int_unaligned
|
|
340
|
+
: ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical));
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/*----------------------------------------------------------------------------*/
|
|
344
|
+
|
|
345
|
+
MDBX_INTERNAL uint32_t combine_durability_flags(const uint32_t a, const uint32_t b);
|
|
346
|
+
|
|
347
|
+
MDBX_CONST_FUNCTION static inline lck_t *lckless_stub(const MDBX_env *env) {
|
|
348
|
+
uintptr_t stub = (uintptr_t)&env->lckless_placeholder;
|
|
349
|
+
/* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */
|
|
350
|
+
stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1);
|
|
351
|
+
return (lck_t *)stub;
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
#if !(defined(_WIN32) || defined(_WIN64))
|
|
355
|
+
MDBX_CONST_FUNCTION static inline int ignore_enosys(int err) {
|
|
356
|
+
#ifdef ENOSYS
|
|
357
|
+
if (err == ENOSYS)
|
|
358
|
+
return MDBX_RESULT_TRUE;
|
|
359
|
+
#endif /* ENOSYS */
|
|
360
|
+
#ifdef ENOIMPL
|
|
361
|
+
if (err == ENOIMPL)
|
|
362
|
+
return MDBX_RESULT_TRUE;
|
|
363
|
+
#endif /* ENOIMPL */
|
|
364
|
+
#ifdef ENOTSUP
|
|
365
|
+
if (err == ENOTSUP)
|
|
366
|
+
return MDBX_RESULT_TRUE;
|
|
367
|
+
#endif /* ENOTSUP */
|
|
368
|
+
#ifdef ENOSUPP
|
|
369
|
+
if (err == ENOSUPP)
|
|
370
|
+
return MDBX_RESULT_TRUE;
|
|
371
|
+
#endif /* ENOSUPP */
|
|
372
|
+
#ifdef EOPNOTSUPP
|
|
373
|
+
if (err == EOPNOTSUPP)
|
|
374
|
+
return MDBX_RESULT_TRUE;
|
|
375
|
+
#endif /* EOPNOTSUPP */
|
|
376
|
+
return err;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
MDBX_MAYBE_UNUSED MDBX_CONST_FUNCTION static inline int ignore_enosys_and_eagain(int err) {
|
|
380
|
+
return (err == EAGAIN) ? MDBX_RESULT_TRUE : ignore_enosys(err);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
MDBX_MAYBE_UNUSED MDBX_CONST_FUNCTION static inline int ignore_enosys_and_einval(int err) {
|
|
384
|
+
return (err == EINVAL) ? MDBX_RESULT_TRUE : ignore_enosys(err);
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
MDBX_MAYBE_UNUSED MDBX_CONST_FUNCTION static inline int ignore_enosys_and_eremote(int err) {
|
|
388
|
+
return (err == MDBX_EREMOTE) ? MDBX_RESULT_TRUE : ignore_enosys(err);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
#endif /* defined(_WIN32) || defined(_WIN64) */
|
|
392
|
+
|
|
393
|
+
static inline int check_env(const MDBX_env *env, const bool wanna_active) {
|
|
394
|
+
if (unlikely(!env))
|
|
395
|
+
return MDBX_EINVAL;
|
|
396
|
+
|
|
397
|
+
if (unlikely(env->signature.weak != env_signature))
|
|
398
|
+
return MDBX_EBADSIGN;
|
|
399
|
+
|
|
400
|
+
if (unlikely(env->flags & ENV_FATAL_ERROR))
|
|
401
|
+
return MDBX_PANIC;
|
|
402
|
+
|
|
403
|
+
if (wanna_active) {
|
|
404
|
+
#if MDBX_ENV_CHECKPID
|
|
405
|
+
if (unlikely(env->pid != osal_getpid()) && env->pid) {
|
|
406
|
+
((MDBX_env *)env)->flags |= ENV_FATAL_ERROR;
|
|
407
|
+
return MDBX_PANIC;
|
|
408
|
+
}
|
|
409
|
+
#endif /* MDBX_ENV_CHECKPID */
|
|
410
|
+
if (unlikely((env->flags & ENV_ACTIVE) == 0))
|
|
411
|
+
return MDBX_EPERM;
|
|
412
|
+
eASSERT(env, env->dxb_mmap.base != nullptr);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return MDBX_SUCCESS;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) {
|
|
419
|
+
if (unlikely(!txn))
|
|
420
|
+
return MDBX_EINVAL;
|
|
421
|
+
|
|
422
|
+
if (unlikely(txn->signature != txn_signature))
|
|
423
|
+
return MDBX_EBADSIGN;
|
|
424
|
+
|
|
425
|
+
if (bad_bits) {
|
|
426
|
+
if (unlikely(!txn->env->dxb_mmap.base))
|
|
427
|
+
return MDBX_EPERM;
|
|
428
|
+
|
|
429
|
+
if (unlikely(txn->flags & bad_bits)) {
|
|
430
|
+
if ((bad_bits & MDBX_TXN_RDONLY) && unlikely(txn->flags & MDBX_TXN_RDONLY))
|
|
431
|
+
return MDBX_EACCESS;
|
|
432
|
+
if ((bad_bits & MDBX_TXN_PARKED) == 0)
|
|
433
|
+
return MDBX_BAD_TXN;
|
|
434
|
+
return txn_check_badbits_parked(txn, bad_bits);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
tASSERT(txn, (txn->flags & MDBX_TXN_FINISHED) ||
|
|
439
|
+
(txn->flags & MDBX_NOSTICKYTHREADS) == (txn->env->flags & MDBX_NOSTICKYTHREADS));
|
|
440
|
+
#if MDBX_TXN_CHECKOWNER
|
|
441
|
+
if ((txn->flags & (MDBX_NOSTICKYTHREADS | MDBX_TXN_FINISHED)) != MDBX_NOSTICKYTHREADS &&
|
|
442
|
+
!(bad_bits /* abort/reset/txn-break */ == 0 &&
|
|
443
|
+
((txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED)) == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED))) &&
|
|
444
|
+
unlikely(txn->owner != osal_thread_self()))
|
|
445
|
+
return txn->owner ? MDBX_THREAD_MISMATCH : MDBX_BAD_TXN;
|
|
446
|
+
#endif /* MDBX_TXN_CHECKOWNER */
|
|
447
|
+
|
|
448
|
+
return MDBX_SUCCESS;
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
static inline int check_txn_rw(const MDBX_txn *txn, int bad_bits) {
|
|
452
|
+
return check_txn(txn, (bad_bits | MDBX_TXN_RDONLY) & ~MDBX_TXN_PARKED);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/*----------------------------------------------------------------------------*/
|
|
456
|
+
|
|
457
|
+
MDBX_INTERNAL void mincore_clean_cache(const MDBX_env *const env);
|
|
458
|
+
|
|
459
|
+
MDBX_INTERNAL void update_mlcnt(const MDBX_env *env, const pgno_t new_aligned_mlocked_pgno,
|
|
460
|
+
const bool lock_not_release);
|
|
461
|
+
|
|
462
|
+
MDBX_INTERNAL void munlock_after(const MDBX_env *env, const pgno_t aligned_pgno, const size_t end_bytes);
|
|
463
|
+
|
|
464
|
+
MDBX_INTERNAL void munlock_all(const MDBX_env *env);
|
|
465
|
+
|
|
466
|
+
/*----------------------------------------------------------------------------*/
|
|
467
|
+
/* Cache coherence and mmap invalidation */
|
|
468
|
+
#ifndef MDBX_CPU_WRITEBACK_INCOHERENT
|
|
469
|
+
#error "The MDBX_CPU_WRITEBACK_INCOHERENT must be defined before"
|
|
470
|
+
#elif MDBX_CPU_WRITEBACK_INCOHERENT
|
|
471
|
+
#define osal_flush_incoherent_cpu_writeback() osal_memory_barrier()
|
|
472
|
+
#else
|
|
473
|
+
#define osal_flush_incoherent_cpu_writeback() osal_compiler_barrier()
|
|
474
|
+
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
|
|
475
|
+
|
|
476
|
+
MDBX_MAYBE_UNUSED static inline void osal_flush_incoherent_mmap(const void *addr, size_t nbytes,
|
|
477
|
+
const intptr_t pagesize) {
|
|
478
|
+
#ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
|
|
479
|
+
#error "The MDBX_MMAP_INCOHERENT_FILE_WRITE must be defined before"
|
|
480
|
+
#elif MDBX_MMAP_INCOHERENT_FILE_WRITE
|
|
481
|
+
char *const begin = (char *)(-pagesize & (intptr_t)addr);
|
|
482
|
+
char *const end = (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1));
|
|
483
|
+
int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0;
|
|
484
|
+
eASSERT(nullptr, err == 0);
|
|
485
|
+
(void)err;
|
|
486
|
+
#else
|
|
487
|
+
(void)pagesize;
|
|
488
|
+
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
|
|
489
|
+
|
|
490
|
+
#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
|
|
491
|
+
#error "The MDBX_MMAP_INCOHERENT_CPU_CACHE must be defined before"
|
|
492
|
+
#elif MDBX_MMAP_INCOHERENT_CPU_CACHE
|
|
493
|
+
#ifdef DCACHE
|
|
494
|
+
/* MIPS has cache coherency issues.
|
|
495
|
+
* Note: for any nbytes >= on-chip cache size, entire is flushed. */
|
|
496
|
+
cacheflush((void *)addr, nbytes, DCACHE);
|
|
497
|
+
#else
|
|
498
|
+
#error "Oops, cacheflush() not available"
|
|
499
|
+
#endif /* DCACHE */
|
|
500
|
+
#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
|
|
501
|
+
|
|
502
|
+
#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE
|
|
503
|
+
(void)addr;
|
|
504
|
+
(void)nbytes;
|
|
505
|
+
#endif
|
|
506
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
|
|
3
|
+
|
|
4
|
+
#include "internals.h"
|
|
5
|
+
|
|
6
|
+
/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
|
7
|
+
static bool coherency_check(const MDBX_env *env, const txnid_t txnid, const volatile tree_t *trees,
|
|
8
|
+
const volatile meta_t *meta, bool report) {
|
|
9
|
+
const txnid_t freedb_mod_txnid = trees[FREE_DBI].mod_txnid;
|
|
10
|
+
const txnid_t maindb_mod_txnid = trees[MAIN_DBI].mod_txnid;
|
|
11
|
+
const pgno_t last_pgno = meta->geometry.now;
|
|
12
|
+
|
|
13
|
+
const pgno_t freedb_root_pgno = trees[FREE_DBI].root;
|
|
14
|
+
const page_t *freedb_root =
|
|
15
|
+
(env->dxb_mmap.base && freedb_root_pgno < last_pgno) ? pgno2page(env, freedb_root_pgno) : nullptr;
|
|
16
|
+
|
|
17
|
+
const pgno_t maindb_root_pgno = trees[MAIN_DBI].root;
|
|
18
|
+
const page_t *maindb_root =
|
|
19
|
+
(env->dxb_mmap.base && maindb_root_pgno < last_pgno) ? pgno2page(env, maindb_root_pgno) : nullptr;
|
|
20
|
+
const uint64_t magic_and_version = unaligned_peek_u64_volatile(4, &meta->magic_and_version);
|
|
21
|
+
|
|
22
|
+
bool ok = true;
|
|
23
|
+
if (freedb_root_pgno != P_INVALID && unlikely(freedb_root_pgno >= last_pgno)) {
|
|
24
|
+
if (report)
|
|
25
|
+
WARNING("catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "free", freedb_root_pgno, txnid,
|
|
26
|
+
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)"
|
|
27
|
+
: "(wagering meta)");
|
|
28
|
+
ok = false;
|
|
29
|
+
}
|
|
30
|
+
if (maindb_root_pgno != P_INVALID && unlikely(maindb_root_pgno >= last_pgno)) {
|
|
31
|
+
if (report)
|
|
32
|
+
WARNING("catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN " %s", "main", maindb_root_pgno, txnid,
|
|
33
|
+
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)"
|
|
34
|
+
: "(wagering meta)");
|
|
35
|
+
ok = false;
|
|
36
|
+
}
|
|
37
|
+
if (unlikely(txnid < freedb_mod_txnid ||
|
|
38
|
+
(!freedb_mod_txnid && freedb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) {
|
|
39
|
+
if (report)
|
|
40
|
+
WARNING(
|
|
41
|
+
"catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "free", freedb_mod_txnid, txnid,
|
|
42
|
+
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" : "(wagering meta)");
|
|
43
|
+
ok = false;
|
|
44
|
+
}
|
|
45
|
+
if (unlikely(txnid < maindb_mod_txnid ||
|
|
46
|
+
(!maindb_mod_txnid && maindb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) {
|
|
47
|
+
if (report)
|
|
48
|
+
WARNING(
|
|
49
|
+
"catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN " %s", "main", maindb_mod_txnid, txnid,
|
|
50
|
+
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of unified page/buffer cache)" : "(wagering meta)");
|
|
51
|
+
ok = false;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/* Проверяем отметки внутри корневых страниц только если сами страницы
|
|
55
|
+
* в пределах текущего отображения. Иначе возможны SIGSEGV до переноса
|
|
56
|
+
* вызова coherency_check_head() после dxb_resize() внутри txn_renew(). */
|
|
57
|
+
if (likely(freedb_root && freedb_mod_txnid &&
|
|
58
|
+
(size_t)ptr_dist(env->dxb_mmap.base, freedb_root) < env->dxb_mmap.limit)) {
|
|
59
|
+
VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->txnid));
|
|
60
|
+
MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, sizeof(freedb_root->txnid));
|
|
61
|
+
const txnid_t root_txnid = freedb_root->txnid;
|
|
62
|
+
if (unlikely(root_txnid != freedb_mod_txnid)) {
|
|
63
|
+
if (report)
|
|
64
|
+
WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN " for %s-db.mod_txnid %" PRIaTXN " %s",
|
|
65
|
+
freedb_root_pgno, root_txnid, "free", freedb_mod_txnid,
|
|
66
|
+
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of "
|
|
67
|
+
"unified page/buffer cache)"
|
|
68
|
+
: "(wagering meta)");
|
|
69
|
+
ok = false;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
if (likely(maindb_root && maindb_mod_txnid &&
|
|
73
|
+
(size_t)ptr_dist(env->dxb_mmap.base, maindb_root) < env->dxb_mmap.limit)) {
|
|
74
|
+
VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->txnid));
|
|
75
|
+
MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, sizeof(maindb_root->txnid));
|
|
76
|
+
const txnid_t root_txnid = maindb_root->txnid;
|
|
77
|
+
if (unlikely(root_txnid != maindb_mod_txnid)) {
|
|
78
|
+
if (report)
|
|
79
|
+
WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN " for %s-db.mod_txnid %" PRIaTXN " %s",
|
|
80
|
+
maindb_root_pgno, root_txnid, "main", maindb_mod_txnid,
|
|
81
|
+
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of "
|
|
82
|
+
"unified page/buffer cache)"
|
|
83
|
+
: "(wagering meta)");
|
|
84
|
+
ok = false;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if (unlikely(!ok) && report)
|
|
88
|
+
env->lck->pgops.incoherence.weak =
|
|
89
|
+
(env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1;
|
|
90
|
+
return ok;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
__cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno, const MDBX_env *env) {
|
|
94
|
+
if (likely(timestamp && *timestamp == 0))
|
|
95
|
+
*timestamp = osal_monotime();
|
|
96
|
+
else if (unlikely(!timestamp || osal_monotime() - *timestamp > osal_16dot16_to_monotime(65536 / 10))) {
|
|
97
|
+
if (pgno >= 0 && pgno != env->stuck_meta)
|
|
98
|
+
ERROR("bailout waiting for %" PRIuSIZE " page arrival %s", pgno,
|
|
99
|
+
"(workaround for incoherent flaw of unified page/buffer cache)");
|
|
100
|
+
else if (env->stuck_meta < 0)
|
|
101
|
+
ERROR("bailout waiting for valid snapshot (%s)", "workaround for incoherent flaw of unified page/buffer cache");
|
|
102
|
+
return MDBX_PROBLEM;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
osal_memory_fence(mo_AcquireRelease, true);
|
|
106
|
+
#if defined(_WIN32) || defined(_WIN64)
|
|
107
|
+
SwitchToThread();
|
|
108
|
+
#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE)
|
|
109
|
+
sched_yield();
|
|
110
|
+
#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS)
|
|
111
|
+
pthread_yield();
|
|
112
|
+
#else
|
|
113
|
+
usleep(42);
|
|
114
|
+
#endif
|
|
115
|
+
return MDBX_RESULT_TRUE;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/* check with timeout as the workaround
|
|
119
|
+
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
|
120
|
+
__hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head, uint64_t *timestamp) {
|
|
121
|
+
/* Copy the DB info and flags */
|
|
122
|
+
txn->txnid = head.txnid;
|
|
123
|
+
txn->geo = head.ptr_c->geometry;
|
|
124
|
+
memcpy(txn->dbs, &head.ptr_c->trees, sizeof(head.ptr_c->trees));
|
|
125
|
+
STATIC_ASSERT(sizeof(head.ptr_c->trees) == CORE_DBS * sizeof(tree_t));
|
|
126
|
+
VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS, txn->env->max_dbi - CORE_DBS);
|
|
127
|
+
txn->canary = head.ptr_c->canary;
|
|
128
|
+
|
|
129
|
+
if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v, *timestamp == 0) ||
|
|
130
|
+
txn->txnid != meta_txnid(head.ptr_v)))
|
|
131
|
+
return coherency_timeout(timestamp, -1, txn->env);
|
|
132
|
+
|
|
133
|
+
if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) {
|
|
134
|
+
if ((txn->dbs[FREE_DBI].flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY ||
|
|
135
|
+
unaligned_peek_u64(4, &head.ptr_c->magic_and_version) == MDBX_DATA_MAGIC) {
|
|
136
|
+
ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, "GC/FreeDB");
|
|
137
|
+
return MDBX_INCOMPATIBLE;
|
|
138
|
+
}
|
|
139
|
+
txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS;
|
|
140
|
+
}
|
|
141
|
+
tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
|
|
142
|
+
tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
|
|
143
|
+
return MDBX_SUCCESS;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
int coherency_check_written(const MDBX_env *env, const txnid_t txnid, const volatile meta_t *meta, const intptr_t pgno,
|
|
147
|
+
uint64_t *timestamp) {
|
|
148
|
+
const bool report = !(timestamp && *timestamp);
|
|
149
|
+
const txnid_t head_txnid = meta_txnid(meta);
|
|
150
|
+
if (likely(head_txnid >= MIN_TXNID && head_txnid >= txnid)) {
|
|
151
|
+
if (likely(coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) {
|
|
152
|
+
eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY);
|
|
153
|
+
eASSERT(env, check_table_flags(meta->trees.main.flags));
|
|
154
|
+
return MDBX_SUCCESS;
|
|
155
|
+
}
|
|
156
|
+
} else if (report) {
|
|
157
|
+
env->lck->pgops.incoherence.weak =
|
|
158
|
+
(env->lck->pgops.incoherence.weak >= INT32_MAX) ? INT32_MAX : env->lck->pgops.incoherence.weak + 1;
|
|
159
|
+
WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s",
|
|
160
|
+
(head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid,
|
|
161
|
+
bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)),
|
|
162
|
+
"(workaround for incoherent flaw of unified page/buffer cache)");
|
|
163
|
+
}
|
|
164
|
+
return coherency_timeout(timestamp, pgno, env);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta, bool report) {
|
|
168
|
+
uint64_t timestamp = 0;
|
|
169
|
+
return coherency_check_written(env, 0, meta, -1, report ? ×tamp : nullptr) == MDBX_SUCCESS;
|
|
170
|
+
}
|