mdbxmou 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/.github/workflows/ci.yml +32 -0
  2. package/.github/workflows/publish.yml +27 -0
  3. package/.gitmodules +3 -0
  4. package/CMakeLists.txt +53 -0
  5. package/LICENSE +201 -0
  6. package/README.md +639 -0
  7. package/build.js +11 -0
  8. package/deps/libmdbx/.clang-format +3 -0
  9. package/deps/libmdbx/.cmake-format.yaml +3 -0
  10. package/deps/libmdbx/.le.ini +40 -0
  11. package/deps/libmdbx/CMakeLists.txt +1269 -0
  12. package/deps/libmdbx/COPYRIGHT +159 -0
  13. package/deps/libmdbx/ChangeLog.md +2786 -0
  14. package/deps/libmdbx/GNUmakefile +950 -0
  15. package/deps/libmdbx/LICENSE +177 -0
  16. package/deps/libmdbx/Makefile +16 -0
  17. package/deps/libmdbx/NOTICE +39 -0
  18. package/deps/libmdbx/README.md +863 -0
  19. package/deps/libmdbx/TODO.md +43 -0
  20. package/deps/libmdbx/cmake/compiler.cmake +1221 -0
  21. package/deps/libmdbx/cmake/profile.cmake +58 -0
  22. package/deps/libmdbx/cmake/utils.cmake +524 -0
  23. package/deps/libmdbx/conanfile.py +323 -0
  24. package/deps/libmdbx/docs/Doxyfile.in +2734 -0
  25. package/deps/libmdbx/docs/_preface.md +47 -0
  26. package/deps/libmdbx/docs/_restrictions.md +248 -0
  27. package/deps/libmdbx/docs/_starting.md +245 -0
  28. package/deps/libmdbx/docs/_toc.md +34 -0
  29. package/deps/libmdbx/docs/header.html +96 -0
  30. package/deps/libmdbx/example/CMakeLists.txt +6 -0
  31. package/deps/libmdbx/example/README.md +1 -0
  32. package/deps/libmdbx/example/example-mdbx.c +154 -0
  33. package/deps/libmdbx/example/sample-bdb.txt +77 -0
  34. package/deps/libmdbx/mdbx.h +6655 -0
  35. package/deps/libmdbx/mdbx.h++ +6428 -0
  36. package/deps/libmdbx/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +173 -0
  37. package/deps/libmdbx/src/alloy.c +54 -0
  38. package/deps/libmdbx/src/api-cold.c +543 -0
  39. package/deps/libmdbx/src/api-copy.c +912 -0
  40. package/deps/libmdbx/src/api-cursor.c +754 -0
  41. package/deps/libmdbx/src/api-dbi.c +315 -0
  42. package/deps/libmdbx/src/api-env.c +1434 -0
  43. package/deps/libmdbx/src/api-extra.c +165 -0
  44. package/deps/libmdbx/src/api-key-transform.c +197 -0
  45. package/deps/libmdbx/src/api-misc.c +286 -0
  46. package/deps/libmdbx/src/api-opts.c +575 -0
  47. package/deps/libmdbx/src/api-range-estimate.c +365 -0
  48. package/deps/libmdbx/src/api-txn-data.c +454 -0
  49. package/deps/libmdbx/src/api-txn.c +921 -0
  50. package/deps/libmdbx/src/atomics-ops.h +364 -0
  51. package/deps/libmdbx/src/atomics-types.h +97 -0
  52. package/deps/libmdbx/src/audit.c +109 -0
  53. package/deps/libmdbx/src/bits.md +34 -0
  54. package/deps/libmdbx/src/chk.c +1796 -0
  55. package/deps/libmdbx/src/cogs.c +309 -0
  56. package/deps/libmdbx/src/cogs.h +506 -0
  57. package/deps/libmdbx/src/coherency.c +170 -0
  58. package/deps/libmdbx/src/config.h.in +88 -0
  59. package/deps/libmdbx/src/cursor.c +2396 -0
  60. package/deps/libmdbx/src/cursor.h +391 -0
  61. package/deps/libmdbx/src/dbi.c +717 -0
  62. package/deps/libmdbx/src/dbi.h +142 -0
  63. package/deps/libmdbx/src/debug_begin.h +36 -0
  64. package/deps/libmdbx/src/debug_end.h +15 -0
  65. package/deps/libmdbx/src/dpl.c +486 -0
  66. package/deps/libmdbx/src/dpl.h +134 -0
  67. package/deps/libmdbx/src/dxb.c +1335 -0
  68. package/deps/libmdbx/src/env.c +607 -0
  69. package/deps/libmdbx/src/essentials.h +125 -0
  70. package/deps/libmdbx/src/gc-get.c +1345 -0
  71. package/deps/libmdbx/src/gc-put.c +970 -0
  72. package/deps/libmdbx/src/gc.h +40 -0
  73. package/deps/libmdbx/src/global.c +474 -0
  74. package/deps/libmdbx/src/internals.h +585 -0
  75. package/deps/libmdbx/src/layout-dxb.h +288 -0
  76. package/deps/libmdbx/src/layout-lck.h +289 -0
  77. package/deps/libmdbx/src/lck-posix.c +859 -0
  78. package/deps/libmdbx/src/lck-windows.c +607 -0
  79. package/deps/libmdbx/src/lck.c +174 -0
  80. package/deps/libmdbx/src/lck.h +110 -0
  81. package/deps/libmdbx/src/logging_and_debug.c +250 -0
  82. package/deps/libmdbx/src/logging_and_debug.h +159 -0
  83. package/deps/libmdbx/src/man1/mdbx_chk.1 +106 -0
  84. package/deps/libmdbx/src/man1/mdbx_copy.1 +95 -0
  85. package/deps/libmdbx/src/man1/mdbx_drop.1 +48 -0
  86. package/deps/libmdbx/src/man1/mdbx_dump.1 +101 -0
  87. package/deps/libmdbx/src/man1/mdbx_load.1 +105 -0
  88. package/deps/libmdbx/src/man1/mdbx_stat.1 +86 -0
  89. package/deps/libmdbx/src/mdbx.c++ +1837 -0
  90. package/deps/libmdbx/src/meta.c +656 -0
  91. package/deps/libmdbx/src/meta.h +168 -0
  92. package/deps/libmdbx/src/mvcc-readers.c +414 -0
  93. package/deps/libmdbx/src/node.c +365 -0
  94. package/deps/libmdbx/src/node.h +102 -0
  95. package/deps/libmdbx/src/ntdll.def +1246 -0
  96. package/deps/libmdbx/src/options.h +534 -0
  97. package/deps/libmdbx/src/osal.c +3485 -0
  98. package/deps/libmdbx/src/osal.h +587 -0
  99. package/deps/libmdbx/src/page-get.c +483 -0
  100. package/deps/libmdbx/src/page-iov.c +185 -0
  101. package/deps/libmdbx/src/page-iov.h +34 -0
  102. package/deps/libmdbx/src/page-ops.c +744 -0
  103. package/deps/libmdbx/src/page-ops.h +142 -0
  104. package/deps/libmdbx/src/pnl.c +236 -0
  105. package/deps/libmdbx/src/pnl.h +146 -0
  106. package/deps/libmdbx/src/preface.h +990 -0
  107. package/deps/libmdbx/src/proto.h +105 -0
  108. package/deps/libmdbx/src/refund.c +212 -0
  109. package/deps/libmdbx/src/sort.h +484 -0
  110. package/deps/libmdbx/src/spill.c +431 -0
  111. package/deps/libmdbx/src/spill.h +74 -0
  112. package/deps/libmdbx/src/table.c +107 -0
  113. package/deps/libmdbx/src/tls.c +551 -0
  114. package/deps/libmdbx/src/tls.h +43 -0
  115. package/deps/libmdbx/src/tools/chk.c +673 -0
  116. package/deps/libmdbx/src/tools/copy.c +166 -0
  117. package/deps/libmdbx/src/tools/drop.c +199 -0
  118. package/deps/libmdbx/src/tools/dump.c +515 -0
  119. package/deps/libmdbx/src/tools/load.c +831 -0
  120. package/deps/libmdbx/src/tools/stat.c +516 -0
  121. package/deps/libmdbx/src/tools/wingetopt.c +87 -0
  122. package/deps/libmdbx/src/tools/wingetopt.h +30 -0
  123. package/deps/libmdbx/src/tree-ops.c +1554 -0
  124. package/deps/libmdbx/src/tree-search.c +140 -0
  125. package/deps/libmdbx/src/txl.c +99 -0
  126. package/deps/libmdbx/src/txl.h +26 -0
  127. package/deps/libmdbx/src/txn.c +1083 -0
  128. package/deps/libmdbx/src/unaligned.h +205 -0
  129. package/deps/libmdbx/src/utils.c +32 -0
  130. package/deps/libmdbx/src/utils.h +76 -0
  131. package/deps/libmdbx/src/version.c.in +44 -0
  132. package/deps/libmdbx/src/walk.c +290 -0
  133. package/deps/libmdbx/src/walk.h +20 -0
  134. package/deps/libmdbx/src/windows-import.c +152 -0
  135. package/deps/libmdbx/src/windows-import.h +128 -0
  136. package/deps/libmdbx/test/CMakeLists.txt +317 -0
  137. package/deps/libmdbx/test/append.c++ +237 -0
  138. package/deps/libmdbx/test/base.h++ +92 -0
  139. package/deps/libmdbx/test/battery-tmux.sh +64 -0
  140. package/deps/libmdbx/test/cases.c++ +118 -0
  141. package/deps/libmdbx/test/chrono.c++ +134 -0
  142. package/deps/libmdbx/test/chrono.h++ +85 -0
  143. package/deps/libmdbx/test/config.c++ +643 -0
  144. package/deps/libmdbx/test/config.h++ +334 -0
  145. package/deps/libmdbx/test/copy.c++ +62 -0
  146. package/deps/libmdbx/test/dead.c++ +39 -0
  147. package/deps/libmdbx/test/dump-load.sh +40 -0
  148. package/deps/libmdbx/test/extra/crunched_delete.c++ +409 -0
  149. package/deps/libmdbx/test/extra/cursor_closing.c++ +410 -0
  150. package/deps/libmdbx/test/extra/dbi.c++ +229 -0
  151. package/deps/libmdbx/test/extra/doubtless_positioning.c++ +253 -0
  152. package/deps/libmdbx/test/extra/dupfix_addodd.c +94 -0
  153. package/deps/libmdbx/test/extra/dupfix_multiple.c++ +311 -0
  154. package/deps/libmdbx/test/extra/early_close_dbi.c++ +137 -0
  155. package/deps/libmdbx/test/extra/hex_base64_base58.c++ +118 -0
  156. package/deps/libmdbx/test/extra/maindb_ordinal.c++ +61 -0
  157. package/deps/libmdbx/test/extra/open.c++ +96 -0
  158. package/deps/libmdbx/test/extra/pcrf/README.md +2 -0
  159. package/deps/libmdbx/test/extra/pcrf/pcrf_test.c +380 -0
  160. package/deps/libmdbx/test/extra/probe.c++ +10 -0
  161. package/deps/libmdbx/test/extra/txn.c++ +407 -0
  162. package/deps/libmdbx/test/extra/upsert_alldups.c +193 -0
  163. package/deps/libmdbx/test/fork.c++ +263 -0
  164. package/deps/libmdbx/test/hill.c++ +447 -0
  165. package/deps/libmdbx/test/jitter.c++ +197 -0
  166. package/deps/libmdbx/test/keygen.c++ +393 -0
  167. package/deps/libmdbx/test/keygen.h++ +130 -0
  168. package/deps/libmdbx/test/log.c++ +358 -0
  169. package/deps/libmdbx/test/log.h++ +91 -0
  170. package/deps/libmdbx/test/main.c++ +706 -0
  171. package/deps/libmdbx/test/nested.c++ +318 -0
  172. package/deps/libmdbx/test/osal-unix.c++ +647 -0
  173. package/deps/libmdbx/test/osal-windows.c++ +440 -0
  174. package/deps/libmdbx/test/osal.h++ +41 -0
  175. package/deps/libmdbx/test/stochastic.sh +690 -0
  176. package/deps/libmdbx/test/stub/LICENSE +24 -0
  177. package/deps/libmdbx/test/stub/README.md +8 -0
  178. package/deps/libmdbx/test/stub/pthread_barrier.c +104 -0
  179. package/deps/libmdbx/test/stub/pthread_barrier.h +77 -0
  180. package/deps/libmdbx/test/test.c++ +1551 -0
  181. package/deps/libmdbx/test/test.h++ +298 -0
  182. package/deps/libmdbx/test/tmux.conf +3 -0
  183. package/deps/libmdbx/test/try.c++ +30 -0
  184. package/deps/libmdbx/test/ttl.c++ +240 -0
  185. package/deps/libmdbx/test/utils.c++ +203 -0
  186. package/deps/libmdbx/test/utils.h++ +326 -0
  187. package/deps/libmdbx/test/valgrind_suppress.txt +536 -0
  188. package/lib/mdbx_evn_async.js +211 -0
  189. package/lib/mdbx_worker.js +195 -0
  190. package/lib/nativemou.js +6 -0
  191. package/package.json +38 -0
  192. package/src/async/envmou_close.cpp +34 -0
  193. package/src/async/envmou_close.hpp +32 -0
  194. package/src/async/envmou_copy_to.cpp +29 -0
  195. package/src/async/envmou_copy_to.hpp +38 -0
  196. package/src/async/envmou_keys.cpp +201 -0
  197. package/src/async/envmou_keys.hpp +50 -0
  198. package/src/async/envmou_open.cpp +38 -0
  199. package/src/async/envmou_open.hpp +33 -0
  200. package/src/async/envmou_query.cpp +167 -0
  201. package/src/async/envmou_query.hpp +53 -0
  202. package/src/dbimou.cpp +522 -0
  203. package/src/dbimou.hpp +82 -0
  204. package/src/env_arg0.hpp +24 -0
  205. package/src/envmou.cpp +445 -0
  206. package/src/envmou.hpp +116 -0
  207. package/src/modulemou.cpp +113 -0
  208. package/src/querymou.cpp +177 -0
  209. package/src/querymou.hpp +93 -0
  210. package/src/txnmou.cpp +254 -0
  211. package/src/txnmou.hpp +122 -0
  212. package/src/typemou.hpp +239 -0
  213. package/src/valuemou.hpp +194 -0
  214. package/test/async.js +67 -0
  215. package/test/e3.js +38 -0
  216. package/test/e4.js +89 -0
  217. package/test/e5.js +162 -0
  218. package/test/test-batch-ops.js +243 -0
  219. package/test/test-cursor-mode.js +84 -0
  220. package/test/test-multi-mode.js +87 -0
@@ -0,0 +1,1335 @@
1
+ /// \copyright SPDX-License-Identifier: Apache-2.0
2
+ /// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
3
+
4
+ #include "internals.h"
5
+
6
+ __cold int dxb_read_header(MDBX_env *env, meta_t *dest, const int lck_exclusive, const mdbx_mode_t mode_bits) {
7
+ memset(dest, 0, sizeof(meta_t));
8
+ int rc = osal_filesize(env->lazy_fd, &env->dxb_mmap.filesize);
9
+ if (unlikely(rc != MDBX_SUCCESS))
10
+ return rc;
11
+
12
+ unaligned_poke_u64(4, dest->sign, DATASIGN_WEAK);
13
+ rc = MDBX_CORRUPTED;
14
+
15
+ /* Read twice all meta pages so we can find the latest one. */
16
+ unsigned loop_limit = NUM_METAS * 2;
17
+ /* We don't know the page size on first time. So, just guess it. */
18
+ unsigned guess_pagesize = 0;
19
+ for (unsigned loop_count = 0; loop_count < loop_limit; ++loop_count) {
20
+ const unsigned meta_number = loop_count % NUM_METAS;
21
+ const unsigned offset = (guess_pagesize ? guess_pagesize
22
+ : (loop_count > NUM_METAS) ? env->ps
23
+ : globals.sys_pagesize) *
24
+ meta_number;
25
+
26
+ char buffer[MDBX_MIN_PAGESIZE];
27
+ unsigned retryleft = 42;
28
+ while (1) {
29
+ TRACE("reading meta[%d]: offset %u, bytes %u, retry-left %u", meta_number, offset, MDBX_MIN_PAGESIZE, retryleft);
30
+ int err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset);
31
+ if (err == MDBX_ENODATA && offset == 0 && loop_count == 0 && env->dxb_mmap.filesize == 0 &&
32
+ mode_bits /* non-zero for DB creation */ != 0) {
33
+ NOTICE("read meta: empty file (%d, %s)", err, mdbx_strerror(err));
34
+ return err;
35
+ }
36
+ #if defined(_WIN32) || defined(_WIN64)
37
+ if (err == ERROR_LOCK_VIOLATION) {
38
+ SleepEx(0, true);
39
+ err = osal_pread(env->lazy_fd, buffer, MDBX_MIN_PAGESIZE, offset);
40
+ if (err == ERROR_LOCK_VIOLATION && --retryleft) {
41
+ WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
42
+ continue;
43
+ }
44
+ }
45
+ #endif /* Windows */
46
+ if (err != MDBX_SUCCESS) {
47
+ ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
48
+ return err;
49
+ }
50
+
51
+ char again[MDBX_MIN_PAGESIZE];
52
+ err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset);
53
+ #if defined(_WIN32) || defined(_WIN64)
54
+ if (err == ERROR_LOCK_VIOLATION) {
55
+ SleepEx(0, true);
56
+ err = osal_pread(env->lazy_fd, again, MDBX_MIN_PAGESIZE, offset);
57
+ if (err == ERROR_LOCK_VIOLATION && --retryleft) {
58
+ WARNING("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
59
+ continue;
60
+ }
61
+ }
62
+ #endif /* Windows */
63
+ if (err != MDBX_SUCCESS) {
64
+ ERROR("read meta[%u,%u]: %i, %s", offset, MDBX_MIN_PAGESIZE, err, mdbx_strerror(err));
65
+ return err;
66
+ }
67
+
68
+ if (memcmp(buffer, again, MDBX_MIN_PAGESIZE) == 0 || --retryleft == 0)
69
+ break;
70
+
71
+ VERBOSE("meta[%u] was updated, re-read it", meta_number);
72
+ }
73
+
74
+ if (!retryleft) {
75
+ ERROR("meta[%u] is too volatile, skip it", meta_number);
76
+ continue;
77
+ }
78
+
79
+ page_t *const page = (page_t *)buffer;
80
+ meta_t *const meta = page_meta(page);
81
+ rc = meta_validate(env, meta, page, meta_number, &guess_pagesize);
82
+ if (rc != MDBX_SUCCESS)
83
+ continue;
84
+
85
+ bool latch;
86
+ if (env->stuck_meta >= 0)
87
+ latch = (meta_number == (unsigned)env->stuck_meta);
88
+ else if (meta_bootid_match(meta))
89
+ latch = meta_choice_recent(meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), dest->unsafe_txnid,
90
+ SIGN_IS_STEADY(dest->unsafe_sign));
91
+ else
92
+ latch = meta_choice_steady(meta->unsafe_txnid, SIGN_IS_STEADY(meta->unsafe_sign), dest->unsafe_txnid,
93
+ SIGN_IS_STEADY(dest->unsafe_sign));
94
+ if (latch) {
95
+ *dest = *meta;
96
+ if (!lck_exclusive && !meta_is_steady(dest))
97
+ loop_limit += 1; /* LY: should re-read to hush race with update */
98
+ VERBOSE("latch meta[%u]", meta_number);
99
+ }
100
+ }
101
+
102
+ if (dest->pagesize == 0 ||
103
+ (env->stuck_meta < 0 && !(meta_is_steady(dest) || meta_weak_acceptable(env, dest, lck_exclusive)))) {
104
+ ERROR("%s", "no usable meta-pages, database is corrupted");
105
+ if (rc == MDBX_SUCCESS) {
106
+ /* TODO: try to restore the database by fully checking b-tree structure
107
+ * for the each meta page, if the corresponding option was given */
108
+ return MDBX_CORRUPTED;
109
+ }
110
+ return rc;
111
+ }
112
+
113
+ return MDBX_SUCCESS;
114
+ }
115
+
116
+ __cold int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, const pgno_t size_pgno, pgno_t limit_pgno,
117
+ const enum resize_mode mode) {
118
+ /* Acquire guard to avoid collision between read and write txns
119
+ * around geo_in_bytes and dxb_mmap */
120
+ #if defined(_WIN32) || defined(_WIN64)
121
+ imports.srwl_AcquireExclusive(&env->remap_guard);
122
+ int rc = MDBX_SUCCESS;
123
+ mdbx_handle_array_t *suspended = nullptr;
124
+ mdbx_handle_array_t array_onstack;
125
+ #else
126
+ int rc = osal_fastmutex_acquire(&env->remap_guard);
127
+ if (unlikely(rc != MDBX_SUCCESS))
128
+ return rc;
129
+ #endif
130
+
131
+ const size_t prev_size = env->dxb_mmap.current;
132
+ const size_t prev_limit = env->dxb_mmap.limit;
133
+ const pgno_t prev_limit_pgno = bytes2pgno(env, prev_limit);
134
+ eASSERT(env, limit_pgno >= size_pgno);
135
+ eASSERT(env, size_pgno >= used_pgno);
136
+ if (mode < explicit_resize && size_pgno <= prev_limit_pgno) {
137
+ /* The actual mapsize may be less since the geo.upper may be changed
138
+ * by other process. Avoids remapping until it necessary. */
139
+ limit_pgno = prev_limit_pgno;
140
+ }
141
+ const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno);
142
+ const size_t size_bytes = pgno_align2os_bytes(env, size_pgno);
143
+ const void *const prev_map = env->dxb_mmap.base;
144
+
145
+ VERBOSE("resize(env-flags 0x%x, mode %d) datafile/mapping: "
146
+ "present %" PRIuPTR " -> %" PRIuPTR ", "
147
+ "limit %" PRIuPTR " -> %" PRIuPTR,
148
+ env->flags, mode, prev_size, size_bytes, prev_limit, limit_bytes);
149
+
150
+ eASSERT(env, limit_bytes >= size_bytes);
151
+ eASSERT(env, bytes2pgno(env, size_bytes) >= size_pgno);
152
+ eASSERT(env, bytes2pgno(env, limit_bytes) >= limit_pgno);
153
+
154
+ unsigned mresize_flags = env->flags & (MDBX_RDONLY | MDBX_WRITEMAP | MDBX_UTTERLY_NOSYNC);
155
+ if (mode >= impilict_shrink)
156
+ mresize_flags |= txn_shrink_allowed;
157
+
158
+ if (limit_bytes == env->dxb_mmap.limit && size_bytes == env->dxb_mmap.current && size_bytes == env->dxb_mmap.filesize)
159
+ goto bailout;
160
+
161
+ /* При использовании MDBX_NOSTICKYTHREADS с транзакциями могут работать любые
162
+ * потоки и у нас нет информации о том, какие именно. Поэтому нет возможности
163
+ * выполнить remap-действия требующие приостановки работающих с БД потоков. */
164
+ if ((env->flags & MDBX_NOSTICKYTHREADS) == 0) {
165
+ #if defined(_WIN32) || defined(_WIN64)
166
+ if ((size_bytes < env->dxb_mmap.current && mode > implicit_grow) || limit_bytes != env->dxb_mmap.limit) {
167
+ /* 1) Windows allows only extending a read-write section, but not a
168
+ * corresponding mapped view. Therefore in other cases we must suspend
169
+ * the local threads for safe remap.
170
+ * 2) At least on Windows 10 1803 the entire mapped section is unavailable
171
+ * for short time during NtExtendSection() or VirtualAlloc() execution.
172
+ * 3) Under Wine runtime environment on Linux a section extending is not
173
+ * supported.
174
+ *
175
+ * THEREFORE LOCAL THREADS SUSPENDING IS ALWAYS REQUIRED! */
176
+ array_onstack.limit = ARRAY_LENGTH(array_onstack.handles);
177
+ array_onstack.count = 0;
178
+ suspended = &array_onstack;
179
+ rc = osal_suspend_threads_before_remap(env, &suspended);
180
+ if (rc != MDBX_SUCCESS) {
181
+ ERROR("failed suspend-for-remap: errcode %d", rc);
182
+ goto bailout;
183
+ }
184
+ mresize_flags |=
185
+ (mode < explicit_resize) ? MDBX_MRESIZE_MAY_UNMAP : MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
186
+ }
187
+ #else /* Windows */
188
+ lck_t *const lck = env->lck_mmap.lck;
189
+ if (mode == explicit_resize && limit_bytes != env->dxb_mmap.limit) {
190
+ mresize_flags |= MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE;
191
+ if (lck) {
192
+ int err = lck_rdt_lock(env) /* lock readers table until remap done */;
193
+ if (unlikely(MDBX_IS_ERROR(err))) {
194
+ rc = err;
195
+ goto bailout;
196
+ }
197
+
198
+ /* looking for readers from this process */
199
+ const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
200
+ eASSERT(env, mode == explicit_resize);
201
+ for (size_t i = 0; i < snap_nreaders; ++i) {
202
+ if (lck->rdt[i].pid.weak == env->pid && lck->rdt[i].tid.weak != osal_thread_self()) {
203
+ /* the base address of the mapping can't be changed since
204
+ * the other reader thread from this process exists. */
205
+ lck_rdt_unlock(env);
206
+ mresize_flags &= ~(MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE);
207
+ break;
208
+ }
209
+ }
210
+ }
211
+ }
212
+ #endif /* ! Windows */
213
+ }
214
+
215
+ const pgno_t aligned_munlock_pgno =
216
+ (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) ? 0 : bytes2pgno(env, size_bytes);
217
+ if (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) {
218
+ mincore_clean_cache(env);
219
+ if ((env->flags & MDBX_WRITEMAP) && env->lck->unsynced_pages.weak) {
220
+ #if MDBX_ENABLE_PGOP_STAT
221
+ env->lck->pgops.msync.weak += 1;
222
+ #endif /* MDBX_ENABLE_PGOP_STAT */
223
+ rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, used_pgno), MDBX_SYNC_NONE);
224
+ if (unlikely(rc != MDBX_SUCCESS))
225
+ goto bailout;
226
+ }
227
+ }
228
+ munlock_after(env, aligned_munlock_pgno, size_bytes);
229
+
230
+ if (size_bytes < prev_size && mode > implicit_grow) {
231
+ NOTICE("resize-MADV_%s %u..%u", (env->flags & MDBX_WRITEMAP) ? "REMOVE" : "DONTNEED", size_pgno,
232
+ bytes2pgno(env, prev_size));
233
+ const uint32_t munlocks_before = atomic_load32(&env->lck->mlcnt[1], mo_Relaxed);
234
+ rc = MDBX_RESULT_TRUE;
235
+ #if defined(MADV_REMOVE)
236
+ if (env->flags & MDBX_WRITEMAP)
237
+ rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, MADV_REMOVE)
238
+ ? ignore_enosys_and_eagain(errno)
239
+ : MDBX_SUCCESS;
240
+ #endif /* MADV_REMOVE */
241
+ #if defined(MADV_DONTNEED)
242
+ if (rc == MDBX_RESULT_TRUE)
243
+ rc = madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, MADV_DONTNEED)
244
+ ? ignore_enosys_and_eagain(errno)
245
+ : MDBX_SUCCESS;
246
+ #elif defined(POSIX_MADV_DONTNEED)
247
+ if (rc == MDBX_RESULT_TRUE)
248
+ rc = ignore_enosys(
249
+ posix_madvise(ptr_disp(env->dxb_mmap.base, size_bytes), prev_size - size_bytes, POSIX_MADV_DONTNEED));
250
+ #elif defined(POSIX_FADV_DONTNEED)
251
+ if (rc == MDBX_RESULT_TRUE)
252
+ rc = ignore_enosys(posix_fadvise(env->lazy_fd, size_bytes, prev_size - size_bytes, POSIX_FADV_DONTNEED));
253
+ #endif /* MADV_DONTNEED */
254
+ if (unlikely(MDBX_IS_ERROR(rc))) {
255
+ const uint32_t mlocks_after = atomic_load32(&env->lck->mlcnt[0], mo_Relaxed);
256
+ if (rc == MDBX_EINVAL) {
257
+ const int severity = (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN;
258
+ if (LOG_ENABLED(severity))
259
+ debug_log(severity, __func__, __LINE__,
260
+ "%s-madvise: ignore EINVAL (%d) since some pages maybe "
261
+ "locked (%u/%u mlcnt-processes)",
262
+ "resize", rc, mlocks_after, munlocks_before);
263
+ } else {
264
+ ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", "mresize", "DONTNEED", size_bytes,
265
+ prev_size - size_bytes, mlocks_after, munlocks_before, rc);
266
+ goto bailout;
267
+ }
268
+ } else
269
+ env->lck->discarded_tail.weak = size_pgno;
270
+ }
271
+
272
+ rc = osal_mresize(mresize_flags, &env->dxb_mmap, size_bytes, limit_bytes);
273
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
274
+
275
+ if (rc == MDBX_SUCCESS) {
276
+ eASSERT(env, limit_bytes == env->dxb_mmap.limit);
277
+ eASSERT(env, size_bytes <= env->dxb_mmap.filesize);
278
+ if (mode == explicit_resize)
279
+ eASSERT(env, size_bytes == env->dxb_mmap.current);
280
+ else
281
+ eASSERT(env, size_bytes <= env->dxb_mmap.current);
282
+ env->lck->discarded_tail.weak = size_pgno;
283
+ const bool readahead =
284
+ !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(size_bytes, -(intptr_t)prev_size);
285
+ const bool force = limit_bytes != prev_limit || env->dxb_mmap.base != prev_map
286
+ #if defined(_WIN32) || defined(_WIN64)
287
+ || prev_size > size_bytes
288
+ #endif /* Windows */
289
+ ;
290
+ rc = dxb_set_readahead(env, size_pgno, readahead, force);
291
+ }
292
+
293
+ bailout:
294
+ if (rc == MDBX_SUCCESS) {
295
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
296
+ eASSERT(env, limit_bytes == env->dxb_mmap.limit);
297
+ eASSERT(env, size_bytes <= env->dxb_mmap.filesize);
298
+ if (mode == explicit_resize)
299
+ eASSERT(env, size_bytes == env->dxb_mmap.current);
300
+ else
301
+ eASSERT(env, size_bytes <= env->dxb_mmap.current);
302
+ /* update env-geo to avoid influences */
303
+ env->geo_in_bytes.now = env->dxb_mmap.current;
304
+ env->geo_in_bytes.upper = env->dxb_mmap.limit;
305
+ env_options_adjust_defaults(env);
306
+ #ifdef ENABLE_MEMCHECK
307
+ if (prev_limit != env->dxb_mmap.limit || prev_map != env->dxb_mmap.base) {
308
+ VALGRIND_DISCARD(env->valgrind_handle);
309
+ env->valgrind_handle = 0;
310
+ if (env->dxb_mmap.limit)
311
+ env->valgrind_handle = VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx");
312
+ }
313
+ #endif /* ENABLE_MEMCHECK */
314
+ } else {
315
+ if (rc != MDBX_UNABLE_EXTEND_MAPSIZE && rc != MDBX_EPERM) {
316
+ ERROR("failed resize datafile/mapping: "
317
+ "present %" PRIuPTR " -> %" PRIuPTR ", "
318
+ "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
319
+ prev_size, size_bytes, prev_limit, limit_bytes, rc);
320
+ } else {
321
+ WARNING("unable resize datafile/mapping: "
322
+ "present %" PRIuPTR " -> %" PRIuPTR ", "
323
+ "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d",
324
+ prev_size, size_bytes, prev_limit, limit_bytes, rc);
325
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
326
+ }
327
+ if (!env->dxb_mmap.base) {
328
+ env->flags |= ENV_FATAL_ERROR;
329
+ if (env->txn)
330
+ env->txn->flags |= MDBX_TXN_ERROR;
331
+ rc = MDBX_PANIC;
332
+ }
333
+ }
334
+
335
+ #if defined(_WIN32) || defined(_WIN64)
336
+ int err = MDBX_SUCCESS;
337
+ imports.srwl_ReleaseExclusive(&env->remap_guard);
338
+ if (suspended) {
339
+ err = osal_resume_threads_after_remap(suspended);
340
+ if (suspended != &array_onstack)
341
+ osal_free(suspended);
342
+ }
343
+ #else
344
+ if (env->lck_mmap.lck && (mresize_flags & (MDBX_MRESIZE_MAY_UNMAP | MDBX_MRESIZE_MAY_MOVE)) != 0)
345
+ lck_rdt_unlock(env);
346
+ int err = osal_fastmutex_release(&env->remap_guard);
347
+ #endif /* Windows */
348
+ if (err != MDBX_SUCCESS) {
349
+ FATAL("failed resume-after-remap: errcode %d", err);
350
+ return MDBX_PANIC;
351
+ }
352
+ return rc;
353
+ }
354
+ #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
355
+ void dxb_sanitize_tail(MDBX_env *env, MDBX_txn *txn) {
356
+ #if !defined(__SANITIZE_ADDRESS__)
357
+ if (!RUNNING_ON_VALGRIND)
358
+ return;
359
+ #endif
360
+ if (txn) { /* transaction start */
361
+ if (env->poison_edge < txn->geo.first_unallocated)
362
+ env->poison_edge = txn->geo.first_unallocated;
363
+ VALGRIND_MAKE_MEM_DEFINED(env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated));
364
+ MDBX_ASAN_UNPOISON_MEMORY_REGION(env->dxb_mmap.base, pgno2bytes(env, txn->geo.first_unallocated));
365
+ /* don't touch more, it should be already poisoned */
366
+ } else { /* transaction end */
367
+ bool should_unlock = false;
368
+ pgno_t last = MAX_PAGENO + 1;
369
+ if (env->pid != osal_getpid()) {
370
+ /* resurrect after fork */
371
+ return;
372
+ } else if (env_owned_wrtxn(env)) {
373
+ /* inside write-txn */
374
+ last = meta_recent(env, &env->basal_txn->tw.troika).ptr_v->geometry.first_unallocated;
375
+ } else if (env->flags & MDBX_RDONLY) {
376
+ /* read-only mode, no write-txn, no wlock mutex */
377
+ last = NUM_METAS;
378
+ } else if (lck_txn_lock(env, true) == MDBX_SUCCESS) {
379
+ /* no write-txn */
380
+ last = NUM_METAS;
381
+ should_unlock = true;
382
+ } else {
383
+ /* write txn is running, therefore shouldn't poison any memory range */
384
+ return;
385
+ }
386
+
387
+ last = mvcc_largest_this(env, last);
388
+ const pgno_t edge = env->poison_edge;
389
+ if (edge > last) {
390
+ eASSERT(env, last >= NUM_METAS);
391
+ env->poison_edge = last;
392
+ VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), pgno2bytes(env, edge - last));
393
+ MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, last)), pgno2bytes(env, edge - last));
394
+ }
395
+ if (should_unlock)
396
+ lck_txn_unlock(env);
397
+ }
398
+ }
399
+ #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */
400
+
401
+ /* Turn on/off readahead. It's harmful when the DB is larger than RAM. */
402
+ __cold int dxb_set_readahead(const MDBX_env *env, const pgno_t edge, const bool enable, const bool force_whole) {
403
+ eASSERT(env, edge >= NUM_METAS && edge <= MAX_PAGENO + 1);
404
+ eASSERT(env, (enable & 1) == (enable != 0));
405
+ const bool toggle = force_whole || ((enable ^ env->lck->readahead_anchor) & 1) || !env->lck->readahead_anchor;
406
+ const pgno_t prev_edge = env->lck->readahead_anchor >> 1;
407
+ const size_t limit = env->dxb_mmap.limit;
408
+ size_t offset = toggle ? 0 : pgno_align2os_bytes(env, (prev_edge < edge) ? prev_edge : edge);
409
+ offset = (offset < limit) ? offset : limit;
410
+
411
+ size_t length = pgno_align2os_bytes(env, (prev_edge < edge) ? edge : prev_edge);
412
+ length = (length < limit) ? length : limit;
413
+ length -= offset;
414
+
415
+ eASSERT(env, 0 <= (intptr_t)length);
416
+ if (length == 0)
417
+ return MDBX_SUCCESS;
418
+
419
+ NOTICE("readahead %s %u..%u", enable ? "ON" : "OFF", bytes2pgno(env, offset), bytes2pgno(env, offset + length));
420
+
421
+ #if defined(F_RDAHEAD)
422
+ if (toggle && unlikely(fcntl(env->lazy_fd, F_RDAHEAD, enable) == -1))
423
+ return errno;
424
+ #endif /* F_RDAHEAD */
425
+
426
+ int err;
427
+ void *const ptr = ptr_disp(env->dxb_mmap.base, offset);
428
+ if (enable) {
429
+ #if defined(MADV_NORMAL)
430
+ err = madvise(ptr, length, MADV_NORMAL) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
431
+ if (unlikely(MDBX_IS_ERROR(err)))
432
+ return err;
433
+ #elif defined(POSIX_MADV_NORMAL)
434
+ err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_NORMAL));
435
+ if (unlikely(MDBX_IS_ERROR(err)))
436
+ return err;
437
+ #elif defined(POSIX_FADV_NORMAL) && defined(POSIX_FADV_WILLNEED)
438
+ err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_NORMAL));
439
+ if (unlikely(MDBX_IS_ERROR(err)))
440
+ return err;
441
+ #elif defined(_WIN32) || defined(_WIN64)
442
+ /* no madvise on Windows */
443
+ #else
444
+ #warning "FIXME"
445
+ #endif
446
+ if (toggle) {
447
+ /* NOTE: Seems there is a bug in the Mach/Darwin/OSX kernel,
448
+ * because MADV_WILLNEED with offset != 0 may cause SIGBUS
449
+ * on following access to the hinted region.
450
+ * 19.6.0 Darwin Kernel Version 19.6.0: Tue Jan 12 22:13:05 PST 2021;
451
+ * root:xnu-6153.141.16~1/RELEASE_X86_64 x86_64 */
452
+ #if defined(F_RDADVISE)
453
+ struct radvisory hint;
454
+ hint.ra_offset = offset;
455
+ hint.ra_count = unlikely(length > INT_MAX && sizeof(length) > sizeof(hint.ra_count)) ? INT_MAX : (int)length;
456
+ (void)/* Ignore ENOTTY for DB on the ram-disk and so on */ fcntl(env->lazy_fd, F_RDADVISE, &hint);
457
+ #elif defined(MADV_WILLNEED)
458
+ err = madvise(ptr, length, MADV_WILLNEED) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
459
+ if (unlikely(MDBX_IS_ERROR(err)))
460
+ return err;
461
+ #elif defined(POSIX_MADV_WILLNEED)
462
+ err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_WILLNEED));
463
+ if (unlikely(MDBX_IS_ERROR(err)))
464
+ return err;
465
+ #elif defined(_WIN32) || defined(_WIN64)
466
+ if (imports.PrefetchVirtualMemory) {
467
+ WIN32_MEMORY_RANGE_ENTRY hint;
468
+ hint.VirtualAddress = ptr;
469
+ hint.NumberOfBytes = length;
470
+ (void)imports.PrefetchVirtualMemory(GetCurrentProcess(), 1, &hint, 0);
471
+ }
472
+ #elif defined(POSIX_FADV_WILLNEED)
473
+ err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_WILLNEED));
474
+ if (unlikely(MDBX_IS_ERROR(err)))
475
+ return err;
476
+ #else
477
+ #warning "FIXME"
478
+ #endif
479
+ }
480
+ } else {
481
+ mincore_clean_cache(env);
482
+ #if defined(MADV_RANDOM)
483
+ err = madvise(ptr, length, MADV_RANDOM) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
484
+ if (unlikely(MDBX_IS_ERROR(err)))
485
+ return err;
486
+ #elif defined(POSIX_MADV_RANDOM)
487
+ err = ignore_enosys(posix_madvise(ptr, length, POSIX_MADV_RANDOM));
488
+ if (unlikely(MDBX_IS_ERROR(err)))
489
+ return err;
490
+ #elif defined(POSIX_FADV_RANDOM)
491
+ err = ignore_enosys(posix_fadvise(env->lazy_fd, offset, length, POSIX_FADV_RANDOM));
492
+ if (unlikely(MDBX_IS_ERROR(err)))
493
+ return err;
494
+ #elif defined(_WIN32) || defined(_WIN64)
495
+ /* no madvise on Windows */
496
+ #else
497
+ #warning "FIXME"
498
+ #endif /* MADV_RANDOM */
499
+ }
500
+
501
+ env->lck->readahead_anchor = (enable & 1) + (edge << 1);
502
+ err = MDBX_SUCCESS;
503
+ return err;
504
+ }
505
+
506
+ __cold int dxb_setup(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits) {
507
+ meta_t header;
508
+ eASSERT(env, !(env->flags & ENV_ACTIVE));
509
+ int rc = MDBX_RESULT_FALSE;
510
+ int err = dxb_read_header(env, &header, lck_rc, mode_bits);
511
+ if (unlikely(err != MDBX_SUCCESS)) {
512
+ if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA || (env->flags & MDBX_RDONLY) != 0 ||
513
+ /* recovery mode */ env->stuck_meta >= 0)
514
+ return err;
515
+
516
+ DEBUG("%s", "create new database");
517
+ rc = /* new database */ MDBX_RESULT_TRUE;
518
+
519
+ if (!env->geo_in_bytes.now) {
520
+ /* set defaults if not configured */
521
+ err = mdbx_env_set_geometry(env, 0, -1, -1, -1, -1, -1);
522
+ if (unlikely(err != MDBX_SUCCESS))
523
+ return err;
524
+ }
525
+
526
+ err = env_page_auxbuffer(env);
527
+ if (unlikely(err != MDBX_SUCCESS))
528
+ return err;
529
+
530
+ header = *meta_init_triplet(env, env->page_auxbuf);
531
+ err = osal_pwrite(env->lazy_fd, env->page_auxbuf, env->ps * (size_t)NUM_METAS, 0);
532
+ if (unlikely(err != MDBX_SUCCESS))
533
+ return err;
534
+
535
+ err = osal_ftruncate(env->lazy_fd, env->dxb_mmap.filesize = env->dxb_mmap.current = env->geo_in_bytes.now);
536
+ if (unlikely(err != MDBX_SUCCESS))
537
+ return err;
538
+
539
+ #ifndef NDEBUG /* just for checking */
540
+ err = dxb_read_header(env, &header, lck_rc, mode_bits);
541
+ if (unlikely(err != MDBX_SUCCESS))
542
+ return err;
543
+ #endif
544
+ }
545
+
546
+ VERBOSE("header: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
547
+ " +%u -%u, txn_id %" PRIaTXN ", %s",
548
+ header.trees.main.root, header.trees.gc.root, header.geometry.lower, header.geometry.first_unallocated,
549
+ header.geometry.now, header.geometry.upper, pv2pages(header.geometry.grow_pv),
550
+ pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header));
551
+
552
+ if (unlikely((header.trees.gc.flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY)) {
553
+ ERROR("unexpected/invalid db-flags 0x%x for %s", header.trees.gc.flags, "GC/FreeDB");
554
+ return MDBX_INCOMPATIBLE;
555
+ }
556
+ env->dbs_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY;
557
+ env->kvs[FREE_DBI].clc.k.cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */
558
+ env->kvs[FREE_DBI].clc.k.lmax = env->kvs[FREE_DBI].clc.k.lmin = 8;
559
+ env->kvs[FREE_DBI].clc.v.cmp = cmp_lenfast;
560
+ env->kvs[FREE_DBI].clc.v.lmin = 4;
561
+ env->kvs[FREE_DBI].clc.v.lmax = mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY);
562
+
563
+ if (env->ps != header.pagesize)
564
+ env_setup_pagesize(env, header.pagesize);
565
+ if ((env->flags & MDBX_RDONLY) == 0) {
566
+ err = env_page_auxbuffer(env);
567
+ if (unlikely(err != MDBX_SUCCESS))
568
+ return err;
569
+ }
570
+
571
+ size_t expected_filesize = 0;
572
+ const size_t used_bytes = pgno2bytes(env, header.geometry.first_unallocated);
573
+ const size_t used_aligned2os_bytes = ceil_powerof2(used_bytes, globals.sys_pagesize);
574
+ if ((env->flags & MDBX_RDONLY) /* readonly */
575
+ || lck_rc != MDBX_RESULT_TRUE /* not exclusive */
576
+ || /* recovery mode */ env->stuck_meta >= 0) {
577
+ /* use present params from db */
578
+ const size_t pagesize = header.pagesize;
579
+ err = mdbx_env_set_geometry(env, header.geometry.lower * pagesize, header.geometry.now * pagesize,
580
+ header.geometry.upper * pagesize, pv2pages(header.geometry.grow_pv) * pagesize,
581
+ pv2pages(header.geometry.shrink_pv) * pagesize, header.pagesize);
582
+ if (unlikely(err != MDBX_SUCCESS)) {
583
+ ERROR("%s: err %d", "could not apply geometry from db", err);
584
+ return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err;
585
+ }
586
+ } else if (env->geo_in_bytes.now) {
587
+ /* silently growth to last used page */
588
+ if (env->geo_in_bytes.now < used_aligned2os_bytes)
589
+ env->geo_in_bytes.now = used_aligned2os_bytes;
590
+ if (env->geo_in_bytes.upper < used_aligned2os_bytes)
591
+ env->geo_in_bytes.upper = used_aligned2os_bytes;
592
+
593
+ /* apply preconfigured params, but only if substantial changes:
594
+ * - upper or lower limit changes
595
+ * - shrink threshold or growth step
596
+ * But ignore change just a 'now/current' size. */
597
+ if (bytes_align2os_bytes(env, env->geo_in_bytes.upper) != pgno2bytes(env, header.geometry.upper) ||
598
+ bytes_align2os_bytes(env, env->geo_in_bytes.lower) != pgno2bytes(env, header.geometry.lower) ||
599
+ bytes_align2os_bytes(env, env->geo_in_bytes.shrink) != pgno2bytes(env, pv2pages(header.geometry.shrink_pv)) ||
600
+ bytes_align2os_bytes(env, env->geo_in_bytes.grow) != pgno2bytes(env, pv2pages(header.geometry.grow_pv))) {
601
+
602
+ if (env->geo_in_bytes.shrink && env->geo_in_bytes.now > used_bytes)
603
+ /* pre-shrink if enabled */
604
+ env->geo_in_bytes.now = used_bytes + env->geo_in_bytes.shrink - used_bytes % env->geo_in_bytes.shrink;
605
+
606
+ /* сейчас БД еще не открыта, поэтому этот вызов не изменит геометрию, но проверит и скорректирует параметры
607
+ * с учетом реального размера страницы. */
608
+ err = mdbx_env_set_geometry(env, env->geo_in_bytes.lower, env->geo_in_bytes.now, env->geo_in_bytes.upper,
609
+ env->geo_in_bytes.grow, env->geo_in_bytes.shrink, header.pagesize);
610
+ if (unlikely(err != MDBX_SUCCESS)) {
611
+ ERROR("%s: err %d", "could not apply preconfigured db-geometry", err);
612
+ return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err;
613
+ }
614
+
615
+ /* altering fields to match geometry given from user */
616
+ expected_filesize = pgno_align2os_bytes(env, header.geometry.now);
617
+ header.geometry.now = bytes2pgno(env, env->geo_in_bytes.now);
618
+ header.geometry.lower = bytes2pgno(env, env->geo_in_bytes.lower);
619
+ header.geometry.upper = bytes2pgno(env, env->geo_in_bytes.upper);
620
+ header.geometry.grow_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.grow));
621
+ header.geometry.shrink_pv = pages2pv(bytes2pgno(env, env->geo_in_bytes.shrink));
622
+
623
+ VERBOSE("amending: root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO
624
+ " +%u -%u, txn_id %" PRIaTXN ", %s",
625
+ header.trees.main.root, header.trees.gc.root, header.geometry.lower, header.geometry.first_unallocated,
626
+ header.geometry.now, header.geometry.upper, pv2pages(header.geometry.grow_pv),
627
+ pv2pages(header.geometry.shrink_pv), unaligned_peek_u64(4, header.txnid_a), durable_caption(&header));
628
+ } else {
629
+ /* fetch back 'now/current' size, since it was ignored during comparison and may differ. */
630
+ env->geo_in_bytes.now = pgno_align2os_bytes(env, header.geometry.now);
631
+ }
632
+ ENSURE(env, header.geometry.now >= header.geometry.first_unallocated);
633
+ } else {
634
+ /* geo-params are not pre-configured by user, get current values from the meta. */
635
+ env->geo_in_bytes.now = pgno2bytes(env, header.geometry.now);
636
+ env->geo_in_bytes.lower = pgno2bytes(env, header.geometry.lower);
637
+ env->geo_in_bytes.upper = pgno2bytes(env, header.geometry.upper);
638
+ env->geo_in_bytes.grow = pgno2bytes(env, pv2pages(header.geometry.grow_pv));
639
+ env->geo_in_bytes.shrink = pgno2bytes(env, pv2pages(header.geometry.shrink_pv));
640
+ }
641
+
642
+ ENSURE(env, pgno_align2os_bytes(env, header.geometry.now) == env->geo_in_bytes.now);
643
+ ENSURE(env, env->geo_in_bytes.now >= used_bytes);
644
+ if (!expected_filesize)
645
+ expected_filesize = env->geo_in_bytes.now;
646
+ const uint64_t filesize_before = env->dxb_mmap.filesize;
647
+ if (unlikely(filesize_before != env->geo_in_bytes.now)) {
648
+ if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
649
+ VERBOSE("filesize mismatch (expect %" PRIuPTR "b/%" PRIaPGNO "p, have %" PRIu64 "b/%" PRIu64
650
+ "p), assume other process working",
651
+ env->geo_in_bytes.now, bytes2pgno(env, env->geo_in_bytes.now), filesize_before,
652
+ filesize_before >> env->ps2ln);
653
+ } else {
654
+ if (filesize_before != expected_filesize)
655
+ WARNING("filesize mismatch (expect %" PRIuSIZE "b/%" PRIaPGNO "p, have %" PRIu64 "b/%" PRIu64 "p)",
656
+ expected_filesize, bytes2pgno(env, expected_filesize), filesize_before, filesize_before >> env->ps2ln);
657
+ if (filesize_before < used_bytes) {
658
+ ERROR("last-page beyond end-of-file (last %" PRIaPGNO ", have %" PRIaPGNO ")",
659
+ header.geometry.first_unallocated, bytes2pgno(env, (size_t)filesize_before));
660
+ return MDBX_CORRUPTED;
661
+ }
662
+
663
+ if (env->flags & MDBX_RDONLY) {
664
+ if (filesize_before & (globals.sys_allocation_granularity - 1)) {
665
+ ERROR("filesize should be rounded-up to system allocation granularity %u",
666
+ globals.sys_allocation_granularity);
667
+ return MDBX_WANNA_RECOVERY;
668
+ }
669
+ WARNING("%s", "ignore filesize mismatch in readonly-mode");
670
+ } else {
671
+ VERBOSE("will resize datafile to %" PRIuSIZE " bytes, %" PRIaPGNO " pages", env->geo_in_bytes.now,
672
+ bytes2pgno(env, env->geo_in_bytes.now));
673
+ }
674
+ }
675
+ }
676
+
677
+ VERBOSE("current boot-id %" PRIx64 "-%" PRIx64 " (%savailable)", globals.bootid.x, globals.bootid.y,
678
+ (globals.bootid.x | globals.bootid.y) ? "" : "not-");
679
+
680
+ /* calculate readahead hint before mmap with zero redundant pages */
681
+ const bool readahead =
682
+ !(env->flags & MDBX_NORDAHEAD) && mdbx_is_readahead_reasonable(used_bytes, 0) == MDBX_RESULT_TRUE;
683
+
684
+ err = osal_mmap(env->flags, &env->dxb_mmap, env->geo_in_bytes.now, env->geo_in_bytes.upper,
685
+ (lck_rc && env->stuck_meta < 0) ? MMAP_OPTION_TRUNCATE : 0, env->pathname.dxb);
686
+ if (unlikely(err != MDBX_SUCCESS))
687
+ return err;
688
+
689
+ #if defined(MADV_DONTDUMP)
690
+ err =
691
+ madvise(env->dxb_mmap.base, env->dxb_mmap.limit, MADV_DONTDUMP) ? ignore_enosys_and_eagain(errno) : MDBX_SUCCESS;
692
+ if (unlikely(MDBX_IS_ERROR(err)))
693
+ return err;
694
+ #endif /* MADV_DONTDUMP */
695
+ #if defined(MADV_DODUMP)
696
+ if (globals.runtime_flags & MDBX_DBG_DUMP) {
697
+ const size_t meta_length_aligned2os = pgno_align2os_bytes(env, NUM_METAS);
698
+ err = madvise(env->dxb_mmap.base, meta_length_aligned2os, MADV_DODUMP) ? ignore_enosys_and_eagain(errno)
699
+ : MDBX_SUCCESS;
700
+ if (unlikely(MDBX_IS_ERROR(err)))
701
+ return err;
702
+ }
703
+ #endif /* MADV_DODUMP */
704
+
705
+ #ifdef ENABLE_MEMCHECK
706
+ env->valgrind_handle = VALGRIND_CREATE_BLOCK(env->dxb_mmap.base, env->dxb_mmap.limit, "mdbx");
707
+ #endif /* ENABLE_MEMCHECK */
708
+
709
+ eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && used_bytes <= env->dxb_mmap.limit);
710
+ #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
711
+ if (env->dxb_mmap.filesize > used_bytes && env->dxb_mmap.filesize < env->dxb_mmap.limit) {
712
+ VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, used_bytes), env->dxb_mmap.filesize - used_bytes);
713
+ MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, used_bytes), env->dxb_mmap.filesize - used_bytes);
714
+ }
715
+ env->poison_edge =
716
+ bytes2pgno(env, (env->dxb_mmap.filesize < env->dxb_mmap.limit) ? env->dxb_mmap.filesize : env->dxb_mmap.limit);
717
+ #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */
718
+
719
+ troika_t troika = meta_tap(env);
720
+ #if MDBX_DEBUG
721
+ meta_troika_dump(env, &troika);
722
+ #endif
723
+ //-------------------------------- validate/rollback head & steady meta-pages
724
+ if (unlikely(env->stuck_meta >= 0)) {
725
+ /* recovery mode */
726
+ meta_t clone;
727
+ meta_t const *const target = METAPAGE(env, env->stuck_meta);
728
+ err = meta_validate_copy(env, target, &clone);
729
+ if (unlikely(err != MDBX_SUCCESS)) {
730
+ ERROR("target meta[%u] is corrupted", bytes2pgno(env, ptr_dist(data_page(target), env->dxb_mmap.base)));
731
+ meta_troika_dump(env, &troika);
732
+ return MDBX_CORRUPTED;
733
+ }
734
+ } else /* not recovery mode */
735
+ while (1) {
736
+ const unsigned meta_clash_mask = meta_eq_mask(&troika);
737
+ if (unlikely(meta_clash_mask)) {
738
+ ERROR("meta-pages are clashed: mask 0x%d", meta_clash_mask);
739
+ meta_troika_dump(env, &troika);
740
+ return MDBX_CORRUPTED;
741
+ }
742
+
743
+ if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
744
+ /* non-exclusive mode,
745
+ * meta-pages should be validated by a first process opened the DB */
746
+ if (troika.recent == troika.prefer_steady)
747
+ break;
748
+
749
+ if (!env->lck_mmap.lck) {
750
+ /* LY: without-lck (read-only) mode, so it is impossible that other
751
+ * process made weak checkpoint. */
752
+ ERROR("%s", "without-lck, unable recovery/rollback");
753
+ meta_troika_dump(env, &troika);
754
+ return MDBX_WANNA_RECOVERY;
755
+ }
756
+
757
+ /* LY: assume just have a collision with other running process,
758
+ * or someone make a weak checkpoint */
759
+ VERBOSE("%s", "assume collision or online weak checkpoint");
760
+ break;
761
+ }
762
+ eASSERT(env, lck_rc == MDBX_RESULT_TRUE);
763
+ /* exclusive mode */
764
+
765
+ const meta_ptr_t recent = meta_recent(env, &troika);
766
+ const meta_ptr_t prefer_steady = meta_prefer_steady(env, &troika);
767
+ meta_t clone;
768
+ if (prefer_steady.is_steady) {
769
+ err = meta_validate_copy(env, prefer_steady.ptr_c, &clone);
770
+ if (unlikely(err != MDBX_SUCCESS)) {
771
+ ERROR("meta[%u] with %s txnid %" PRIaTXN " is corrupted, %s needed",
772
+ bytes2pgno(env, ptr_dist(prefer_steady.ptr_c, env->dxb_mmap.base)), "steady", prefer_steady.txnid,
773
+ "manual recovery");
774
+ meta_troika_dump(env, &troika);
775
+ return MDBX_CORRUPTED;
776
+ }
777
+ if (prefer_steady.ptr_c == recent.ptr_c)
778
+ break;
779
+ }
780
+
781
+ const pgno_t pgno = bytes2pgno(env, ptr_dist(recent.ptr_c, env->dxb_mmap.base));
782
+ const bool last_valid = meta_validate_copy(env, recent.ptr_c, &clone) == MDBX_SUCCESS;
783
+ eASSERT(env, !prefer_steady.is_steady || recent.txnid != prefer_steady.txnid);
784
+ if (unlikely(!last_valid)) {
785
+ if (unlikely(!prefer_steady.is_steady)) {
786
+ ERROR("%s for open or automatic rollback, %s", "there are no suitable meta-pages",
787
+ "manual recovery is required");
788
+ meta_troika_dump(env, &troika);
789
+ return MDBX_CORRUPTED;
790
+ }
791
+ WARNING("meta[%u] with last txnid %" PRIaTXN " is corrupted, rollback needed", pgno, recent.txnid);
792
+ meta_troika_dump(env, &troika);
793
+ goto purge_meta_head;
794
+ }
795
+
796
+ if (meta_bootid_match(recent.ptr_c)) {
797
+ if (env->flags & MDBX_RDONLY) {
798
+ ERROR("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: "
799
+ "rollback NOT needed, steady-sync NEEDED%s",
800
+ "opening after an unclean shutdown", globals.bootid.x, globals.bootid.y,
801
+ ", but unable in read-only mode");
802
+ meta_troika_dump(env, &troika);
803
+ return MDBX_WANNA_RECOVERY;
804
+ }
805
+ WARNING("%s, but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH: "
806
+ "rollback NOT needed, steady-sync NEEDED%s",
807
+ "opening after an unclean shutdown", globals.bootid.x, globals.bootid.y, "");
808
+ header = clone;
809
+ env->lck->unsynced_pages.weak = header.geometry.first_unallocated;
810
+ if (!env->lck->eoos_timestamp.weak)
811
+ env->lck->eoos_timestamp.weak = osal_monotime();
812
+ break;
813
+ }
814
+ if (unlikely(!prefer_steady.is_steady)) {
815
+ ERROR("%s, but %s for automatic rollback: %s", "opening after an unclean shutdown",
816
+ "there are no suitable meta-pages", "manual recovery is required");
817
+ meta_troika_dump(env, &troika);
818
+ return MDBX_CORRUPTED;
819
+ }
820
+ if (env->flags & MDBX_RDONLY) {
821
+ ERROR("%s and rollback needed: (from head %" PRIaTXN " to steady %" PRIaTXN ")%s",
822
+ "opening after an unclean shutdown", recent.txnid, prefer_steady.txnid, ", but unable in read-only mode");
823
+ meta_troika_dump(env, &troika);
824
+ return MDBX_WANNA_RECOVERY;
825
+ }
826
+
827
+ purge_meta_head:
828
+ NOTICE("%s and doing automatic rollback: "
829
+ "purge%s meta[%u] with%s txnid %" PRIaTXN,
830
+ "opening after an unclean shutdown", last_valid ? "" : " invalid", pgno, last_valid ? " weak" : "",
831
+ recent.txnid);
832
+ meta_troika_dump(env, &troika);
833
+ ENSURE(env, prefer_steady.is_steady);
834
+ err = meta_override(env, pgno, 0, last_valid ? recent.ptr_c : prefer_steady.ptr_c);
835
+ if (err) {
836
+ ERROR("rollback: overwrite meta[%u] with txnid %" PRIaTXN ", error %d", pgno, recent.txnid, err);
837
+ return err;
838
+ }
839
+ troika = meta_tap(env);
840
+ ENSURE(env, 0 == meta_txnid(recent.ptr_v));
841
+ ENSURE(env, 0 == meta_eq_mask(&troika));
842
+ }
843
+
844
+ if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
845
+ //-------------------------------------------------- shrink DB & update geo
846
+ /* re-check size after mmap */
847
+ if ((env->dxb_mmap.current & (globals.sys_pagesize - 1)) != 0 || env->dxb_mmap.current < used_bytes) {
848
+ ERROR("unacceptable/unexpected datafile size %" PRIuPTR, env->dxb_mmap.current);
849
+ return MDBX_PROBLEM;
850
+ }
851
+ if (env->dxb_mmap.current != env->geo_in_bytes.now) {
852
+ header.geometry.now = bytes2pgno(env, env->dxb_mmap.current);
853
+ NOTICE("need update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO " pages", env->dxb_mmap.current,
854
+ header.geometry.now);
855
+ }
856
+
857
+ const meta_ptr_t recent = meta_recent(env, &troika);
858
+ if (/* не учитываем различия в geo.first_unallocated */
859
+ header.geometry.grow_pv != recent.ptr_c->geometry.grow_pv ||
860
+ header.geometry.shrink_pv != recent.ptr_c->geometry.shrink_pv ||
861
+ header.geometry.lower != recent.ptr_c->geometry.lower ||
862
+ header.geometry.upper != recent.ptr_c->geometry.upper || header.geometry.now != recent.ptr_c->geometry.now) {
863
+ if ((env->flags & MDBX_RDONLY) != 0 ||
864
+ /* recovery mode */ env->stuck_meta >= 0) {
865
+ WARNING("skipped update meta.geo in %s mode: from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO
866
+ "/s%u-g%u, to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u",
867
+ (env->stuck_meta < 0) ? "read-only" : "recovery", recent.ptr_c->geometry.lower,
868
+ recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper, pv2pages(recent.ptr_c->geometry.shrink_pv),
869
+ pv2pages(recent.ptr_c->geometry.grow_pv), header.geometry.lower, header.geometry.now,
870
+ header.geometry.upper, pv2pages(header.geometry.shrink_pv), pv2pages(header.geometry.grow_pv));
871
+ } else {
872
+ const txnid_t next_txnid = safe64_txnid_next(recent.txnid);
873
+ if (unlikely(next_txnid > MAX_TXNID)) {
874
+ ERROR("txnid overflow, raise %d", MDBX_TXN_FULL);
875
+ return MDBX_TXN_FULL;
876
+ }
877
+ NOTICE("updating meta.geo: "
878
+ "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN "), "
879
+ "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN ")",
880
+ recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper,
881
+ pv2pages(recent.ptr_c->geometry.shrink_pv), pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid,
882
+ header.geometry.lower, header.geometry.now, header.geometry.upper, pv2pages(header.geometry.shrink_pv),
883
+ pv2pages(header.geometry.grow_pv), next_txnid);
884
+
885
+ ENSURE(env, header.unsafe_txnid == recent.txnid);
886
+ meta_set_txnid(env, &header, next_txnid);
887
+ err = dxb_sync_locked(env, env->flags | txn_shrink_allowed, &header, &troika);
888
+ if (err) {
889
+ ERROR("error %d, while updating meta.geo: "
890
+ "from l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN "), "
891
+ "to l%" PRIaPGNO "-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u (txn#%" PRIaTXN ")",
892
+ err, recent.ptr_c->geometry.lower, recent.ptr_c->geometry.now, recent.ptr_c->geometry.upper,
893
+ pv2pages(recent.ptr_c->geometry.shrink_pv), pv2pages(recent.ptr_c->geometry.grow_pv), recent.txnid,
894
+ header.geometry.lower, header.geometry.now, header.geometry.upper, pv2pages(header.geometry.shrink_pv),
895
+ pv2pages(header.geometry.grow_pv), header.unsafe_txnid);
896
+ return err;
897
+ }
898
+ }
899
+ }
900
+
901
+ atomic_store32(&env->lck->discarded_tail, bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed);
902
+
903
+ if ((env->flags & MDBX_RDONLY) == 0 && env->stuck_meta < 0 &&
904
+ (globals.runtime_flags & MDBX_DBG_DONT_UPGRADE) == 0) {
905
+ for (unsigned n = 0; n < NUM_METAS; ++n) {
906
+ meta_t *const meta = METAPAGE(env, n);
907
+ if (unlikely(unaligned_peek_u64(4, &meta->magic_and_version) != MDBX_DATA_MAGIC) ||
908
+ (meta->dxbid.x | meta->dxbid.y) == 0 || (meta->gc_flags & ~DB_PERSISTENT_FLAGS)) {
909
+ const txnid_t txnid = meta_is_used(&troika, n) ? constmeta_txnid(meta) : 0;
910
+ NOTICE("%s %s"
911
+ "meta[%u], txnid %" PRIaTXN,
912
+ "updating db-format/guid signature for", meta_is_steady(meta) ? "stead-" : "weak-", n, txnid);
913
+ err = meta_override(env, n, txnid, meta);
914
+ if (unlikely(err != MDBX_SUCCESS) &&
915
+ /* Just ignore the MDBX_PROBLEM error, since here it is
916
+ * returned only in case of the attempt to upgrade an obsolete
917
+ * meta-page that is invalid for current state of a DB,
918
+ * e.g. after shrinking DB file */
919
+ err != MDBX_PROBLEM) {
920
+ ERROR("%s meta[%u], txnid %" PRIaTXN ", error %d", "updating db-format signature for", n, txnid, err);
921
+ return err;
922
+ }
923
+ troika = meta_tap(env);
924
+ }
925
+ }
926
+ }
927
+ } /* lck exclusive, lck_rc == MDBX_RESULT_TRUE */
928
+
929
+ //---------------------------------------------------- setup madvise/readahead
930
+ if (used_aligned2os_bytes < env->dxb_mmap.current) {
931
+ #if defined(MADV_REMOVE)
932
+ if (lck_rc && (env->flags & MDBX_WRITEMAP) != 0 &&
933
+ /* not recovery mode */ env->stuck_meta < 0) {
934
+ NOTICE("open-MADV_%s %u..%u", "REMOVE (deallocate file space)", env->lck->discarded_tail.weak,
935
+ bytes2pgno(env, env->dxb_mmap.current));
936
+ err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), env->dxb_mmap.current - used_aligned2os_bytes,
937
+ MADV_REMOVE)
938
+ ? ignore_enosys_and_eagain(errno)
939
+ : MDBX_SUCCESS;
940
+ if (unlikely(MDBX_IS_ERROR(err)))
941
+ return err;
942
+ }
943
+ #endif /* MADV_REMOVE */
944
+ #if defined(MADV_DONTNEED)
945
+ NOTICE("open-MADV_%s %u..%u", "DONTNEED", env->lck->discarded_tail.weak, bytes2pgno(env, env->dxb_mmap.current));
946
+ err = madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes), env->dxb_mmap.current - used_aligned2os_bytes,
947
+ MADV_DONTNEED)
948
+ ? ignore_enosys_and_eagain(errno)
949
+ : MDBX_SUCCESS;
950
+ if (unlikely(MDBX_IS_ERROR(err)))
951
+ return err;
952
+ #elif defined(POSIX_MADV_DONTNEED)
953
+ err = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, used_aligned2os_bytes),
954
+ env->dxb_mmap.current - used_aligned2os_bytes, POSIX_MADV_DONTNEED));
955
+ if (unlikely(MDBX_IS_ERROR(err)))
956
+ return err;
957
+ #elif defined(POSIX_FADV_DONTNEED)
958
+ err = ignore_enosys(posix_fadvise(env->lazy_fd, used_aligned2os_bytes,
959
+ env->dxb_mmap.current - used_aligned2os_bytes, POSIX_FADV_DONTNEED));
960
+ if (unlikely(MDBX_IS_ERROR(err)))
961
+ return err;
962
+ #endif /* MADV_DONTNEED */
963
+ }
964
+
965
+ err = dxb_set_readahead(env, bytes2pgno(env, used_bytes), readahead, true);
966
+ if (unlikely(err != MDBX_SUCCESS))
967
+ return err;
968
+
969
+ return rc;
970
+ }
971
+
972
+ int dxb_sync_locked(MDBX_env *env, unsigned flags, meta_t *const pending, troika_t *const troika) {
973
+ eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0);
974
+ eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY);
975
+ eASSERT(env, check_table_flags(pending->trees.main.flags));
976
+ const meta_t *const meta0 = METAPAGE(env, 0);
977
+ const meta_t *const meta1 = METAPAGE(env, 1);
978
+ const meta_t *const meta2 = METAPAGE(env, 2);
979
+ const meta_ptr_t head = meta_recent(env, troika);
980
+ int rc;
981
+
982
+ eASSERT(env, pending < METAPAGE(env, 0) || pending > METAPAGE(env, NUM_METAS));
983
+ eASSERT(env, (env->flags & (MDBX_RDONLY | ENV_FATAL_ERROR)) == 0);
984
+ eASSERT(env, pending->geometry.first_unallocated <= pending->geometry.now);
985
+
986
+ if (flags & MDBX_SAFE_NOSYNC) {
987
+ /* Check auto-sync conditions */
988
+ const pgno_t autosync_threshold = atomic_load32(&env->lck->autosync_threshold, mo_Relaxed);
989
+ const uint64_t autosync_period = atomic_load64(&env->lck->autosync_period, mo_Relaxed);
990
+ uint64_t eoos_timestamp;
991
+ if ((autosync_threshold && atomic_load64(&env->lck->unsynced_pages, mo_Relaxed) >= autosync_threshold) ||
992
+ (autosync_period && (eoos_timestamp = atomic_load64(&env->lck->eoos_timestamp, mo_Relaxed)) &&
993
+ osal_monotime() - eoos_timestamp >= autosync_period))
994
+ flags &= MDBX_WRITEMAP | txn_shrink_allowed; /* force steady */
995
+ }
996
+
997
+ pgno_t shrink = 0;
998
+ if (flags & txn_shrink_allowed) {
999
+ const size_t prev_discarded_pgno = atomic_load32(&env->lck->discarded_tail, mo_Relaxed);
1000
+ if (prev_discarded_pgno < pending->geometry.first_unallocated)
1001
+ env->lck->discarded_tail.weak = pending->geometry.first_unallocated;
1002
+ else if (prev_discarded_pgno >= pending->geometry.first_unallocated + env->madv_threshold) {
1003
+ /* LY: check conditions to discard unused pages */
1004
+ const pgno_t largest_pgno =
1005
+ mvcc_snapshot_largest(env, (head.ptr_c->geometry.first_unallocated > pending->geometry.first_unallocated)
1006
+ ? head.ptr_c->geometry.first_unallocated
1007
+ : pending->geometry.first_unallocated);
1008
+ eASSERT(env, largest_pgno >= NUM_METAS);
1009
+
1010
+ #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)
1011
+ const pgno_t edge = env->poison_edge;
1012
+ if (edge > largest_pgno) {
1013
+ env->poison_edge = largest_pgno;
1014
+ VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)),
1015
+ pgno2bytes(env, edge - largest_pgno));
1016
+ MDBX_ASAN_POISON_MEMORY_REGION(ptr_disp(env->dxb_mmap.base, pgno2bytes(env, largest_pgno)),
1017
+ pgno2bytes(env, edge - largest_pgno));
1018
+ }
1019
+ #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */
1020
+
1021
+ #if defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)
1022
+ const size_t discard_edge_pgno = pgno_align2os_pgno(env, largest_pgno);
1023
+ if (prev_discarded_pgno >= discard_edge_pgno + env->madv_threshold) {
1024
+ const size_t prev_discarded_bytes = pgno_align2os_bytes(env, prev_discarded_pgno);
1025
+ const size_t discard_edge_bytes = pgno2bytes(env, discard_edge_pgno);
1026
+ /* из-за выравнивания prev_discarded_bytes и discard_edge_bytes
1027
+ * могут быть равны */
1028
+ if (prev_discarded_bytes > discard_edge_bytes) {
1029
+ NOTICE("shrink-MADV_%s %zu..%zu", "DONTNEED", discard_edge_pgno, prev_discarded_pgno);
1030
+ munlock_after(env, discard_edge_pgno, bytes_align2os_bytes(env, env->dxb_mmap.current));
1031
+ const uint32_t munlocks_before = atomic_load32(&env->lck->mlcnt[1], mo_Relaxed);
1032
+ #if defined(MADV_DONTNEED)
1033
+ int advise = MADV_DONTNEED;
1034
+ #if defined(MADV_FREE) && 0 /* MADV_FREE works for only anonymous vma at the moment */
1035
+ if ((env->flags & MDBX_WRITEMAP) && global.linux_kernel_version > 0x04050000)
1036
+ advise = MADV_FREE;
1037
+ #endif /* MADV_FREE */
1038
+ int err = madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes), prev_discarded_bytes - discard_edge_bytes,
1039
+ advise)
1040
+ ? ignore_enosys_and_eagain(errno)
1041
+ : MDBX_SUCCESS;
1042
+ #else
1043
+ int err = ignore_enosys(posix_madvise(ptr_disp(env->dxb_mmap.base, discard_edge_bytes),
1044
+ prev_discarded_bytes - discard_edge_bytes, POSIX_MADV_DONTNEED));
1045
+ #endif
1046
+ if (unlikely(MDBX_IS_ERROR(err))) {
1047
+ const uint32_t mlocks_after = atomic_load32(&env->lck->mlcnt[0], mo_Relaxed);
1048
+ if (err == MDBX_EINVAL) {
1049
+ const int severity = (mlocks_after - munlocks_before) ? MDBX_LOG_NOTICE : MDBX_LOG_WARN;
1050
+ if (LOG_ENABLED(severity))
1051
+ debug_log(severity, __func__, __LINE__,
1052
+ "%s-madvise: ignore EINVAL (%d) since some pages maybe "
1053
+ "locked (%u/%u mlcnt-processes)",
1054
+ "shrink", err, mlocks_after, munlocks_before);
1055
+ } else {
1056
+ ERROR("%s-madvise(%s, %zu, +%zu), %u/%u mlcnt-processes, err %d", "shrink", "DONTNEED",
1057
+ discard_edge_bytes, prev_discarded_bytes - discard_edge_bytes, mlocks_after, munlocks_before, err);
1058
+ return err;
1059
+ }
1060
+ } else
1061
+ env->lck->discarded_tail.weak = discard_edge_pgno;
1062
+ }
1063
+ }
1064
+ #endif /* MADV_DONTNEED || POSIX_MADV_DONTNEED */
1065
+
1066
+ /* LY: check conditions to shrink datafile */
1067
+ const pgno_t backlog_gap = 3 + pending->trees.gc.height * 3;
1068
+ pgno_t shrink_step = 0;
1069
+ if (pending->geometry.shrink_pv && pending->geometry.now - pending->geometry.first_unallocated >
1070
+ (shrink_step = pv2pages(pending->geometry.shrink_pv)) + backlog_gap) {
1071
+ if (pending->geometry.now > largest_pgno && pending->geometry.now - largest_pgno > shrink_step + backlog_gap) {
1072
+ const pgno_t aligner =
1073
+ pending->geometry.grow_pv ? /* grow_step */ pv2pages(pending->geometry.grow_pv) : shrink_step;
1074
+ const pgno_t with_backlog_gap = largest_pgno + backlog_gap;
1075
+ const pgno_t aligned =
1076
+ pgno_align2os_pgno(env, (size_t)with_backlog_gap + aligner - with_backlog_gap % aligner);
1077
+ const pgno_t bottom = (aligned > pending->geometry.lower) ? aligned : pending->geometry.lower;
1078
+ if (pending->geometry.now > bottom) {
1079
+ if (TROIKA_HAVE_STEADY(troika))
1080
+ /* force steady, but only if steady-checkpoint is present */
1081
+ flags &= MDBX_WRITEMAP | txn_shrink_allowed;
1082
+ shrink = pending->geometry.now - bottom;
1083
+ pending->geometry.now = bottom;
1084
+ if (unlikely(head.txnid == pending->unsafe_txnid)) {
1085
+ const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid);
1086
+ NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, pending->unsafe_txnid, txnid);
1087
+ ENSURE(env, !env->basal_txn || !env->txn);
1088
+ if (unlikely(txnid > MAX_TXNID)) {
1089
+ rc = MDBX_TXN_FULL;
1090
+ ERROR("txnid overflow, raise %d", rc);
1091
+ goto fail;
1092
+ }
1093
+ meta_set_txnid(env, pending, txnid);
1094
+ eASSERT(env, coherency_check_meta(env, pending, true));
1095
+ }
1096
+ }
1097
+ }
1098
+ }
1099
+ }
1100
+ }
1101
+
1102
+ /* LY: step#1 - sync previously written/updated data-pages */
1103
+ rc = MDBX_RESULT_FALSE /* carry steady */;
1104
+ if (atomic_load64(&env->lck->unsynced_pages, mo_Relaxed)) {
1105
+ eASSERT(env, ((flags ^ env->flags) & MDBX_WRITEMAP) == 0);
1106
+ enum osal_syncmode_bits mode_bits = MDBX_SYNC_NONE;
1107
+ unsigned sync_op = 0;
1108
+ if ((flags & MDBX_SAFE_NOSYNC) == 0) {
1109
+ sync_op = 1;
1110
+ mode_bits = MDBX_SYNC_DATA;
1111
+ if (pending->geometry.first_unallocated > meta_prefer_steady(env, troika).ptr_c->geometry.now)
1112
+ mode_bits |= MDBX_SYNC_SIZE;
1113
+ if (flags & MDBX_NOMETASYNC)
1114
+ mode_bits |= MDBX_SYNC_IODQ;
1115
+ } else if (unlikely(env->incore))
1116
+ goto skip_incore_sync;
1117
+ if (flags & MDBX_WRITEMAP) {
1118
+ #if MDBX_ENABLE_PGOP_STAT
1119
+ env->lck->pgops.msync.weak += sync_op;
1120
+ #else
1121
+ (void)sync_op;
1122
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1123
+ rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, pending->geometry.first_unallocated), mode_bits);
1124
+ } else {
1125
+ #if MDBX_ENABLE_PGOP_STAT
1126
+ env->lck->pgops.fsync.weak += sync_op;
1127
+ #else
1128
+ (void)sync_op;
1129
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1130
+ rc = osal_fsync(env->lazy_fd, mode_bits);
1131
+ }
1132
+ if (unlikely(rc != MDBX_SUCCESS))
1133
+ goto fail;
1134
+ rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */
1135
+ : MDBX_RESULT_FALSE /* carry steady */;
1136
+ }
1137
+ eASSERT(env, coherency_check_meta(env, pending, true));
1138
+
1139
+ /* Steady or Weak */
1140
+ if (rc == MDBX_RESULT_FALSE /* carry steady */) {
1141
+ meta_sign_as_steady(pending);
1142
+ atomic_store64(&env->lck->eoos_timestamp, 0, mo_Relaxed);
1143
+ atomic_store64(&env->lck->unsynced_pages, 0, mo_Relaxed);
1144
+ } else {
1145
+ assert(rc == MDBX_RESULT_TRUE /* carry non-steady */);
1146
+ skip_incore_sync:
1147
+ eASSERT(env, env->lck->unsynced_pages.weak > 0);
1148
+ /* Может быть нулевым если unsynced_pages > 0 в результате спиллинга.
1149
+ * eASSERT(env, env->lck->eoos_timestamp.weak != 0); */
1150
+ unaligned_poke_u64(4, pending->sign, DATASIGN_WEAK);
1151
+ }
1152
+
1153
+ const bool legal4overwrite = head.txnid == pending->unsafe_txnid &&
1154
+ !memcmp(&head.ptr_c->trees, &pending->trees, sizeof(pending->trees)) &&
1155
+ !memcmp(&head.ptr_c->canary, &pending->canary, sizeof(pending->canary)) &&
1156
+ !memcmp(&head.ptr_c->geometry, &pending->geometry, sizeof(pending->geometry));
1157
+ meta_t *target = nullptr;
1158
+ if (head.txnid == pending->unsafe_txnid) {
1159
+ ENSURE(env, legal4overwrite);
1160
+ if (!head.is_steady && meta_is_steady(pending))
1161
+ target = (meta_t *)head.ptr_c;
1162
+ else {
1163
+ NOTICE("skip update meta%" PRIaPGNO " for txn#%" PRIaTXN ", since it is already steady",
1164
+ data_page(head.ptr_c)->pgno, head.txnid);
1165
+ return MDBX_SUCCESS;
1166
+ }
1167
+ } else {
1168
+ const unsigned troika_tail = troika->tail_and_flags & 3;
1169
+ ENSURE(env, troika_tail < NUM_METAS && troika_tail != troika->recent && troika_tail != troika->prefer_steady);
1170
+ target = (meta_t *)meta_tail(env, troika).ptr_c;
1171
+ }
1172
+
1173
+ /* LY: step#2 - update meta-page. */
1174
+ DEBUG("writing meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO
1175
+ "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s",
1176
+ data_page(target)->pgno, pending->trees.main.root, pending->trees.gc.root, pending->geometry.lower,
1177
+ pending->geometry.first_unallocated, pending->geometry.now, pending->geometry.upper,
1178
+ pv2pages(pending->geometry.grow_pv), pv2pages(pending->geometry.shrink_pv), pending->unsafe_txnid,
1179
+ durable_caption(pending));
1180
+
1181
+ DEBUG("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO,
1182
+ (meta0 == head.ptr_c) ? "head"
1183
+ : (meta0 == target) ? "tail"
1184
+ : "stay",
1185
+ durable_caption(meta0), constmeta_txnid(meta0), meta0->trees.main.root, meta0->trees.gc.root);
1186
+ DEBUG("meta1: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO,
1187
+ (meta1 == head.ptr_c) ? "head"
1188
+ : (meta1 == target) ? "tail"
1189
+ : "stay",
1190
+ durable_caption(meta1), constmeta_txnid(meta1), meta1->trees.main.root, meta1->trees.gc.root);
1191
+ DEBUG("meta2: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO "/%" PRIaPGNO,
1192
+ (meta2 == head.ptr_c) ? "head"
1193
+ : (meta2 == target) ? "tail"
1194
+ : "stay",
1195
+ durable_caption(meta2), constmeta_txnid(meta2), meta2->trees.main.root, meta2->trees.gc.root);
1196
+
1197
+ eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta0) || (meta_is_steady(pending) && !meta_is_steady(meta0)));
1198
+ eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta1) || (meta_is_steady(pending) && !meta_is_steady(meta1)));
1199
+ eASSERT(env, pending->unsafe_txnid != constmeta_txnid(meta2) || (meta_is_steady(pending) && !meta_is_steady(meta2)));
1200
+
1201
+ eASSERT(env, ((env->flags ^ flags) & MDBX_WRITEMAP) == 0);
1202
+ ENSURE(env, target == head.ptr_c || constmeta_txnid(target) < pending->unsafe_txnid);
1203
+ if (flags & MDBX_WRITEMAP) {
1204
+ jitter4testing(true);
1205
+ if (likely(target != head.ptr_c)) {
1206
+ /* LY: 'invalidate' the meta. */
1207
+ meta_update_begin(env, target, pending->unsafe_txnid);
1208
+ unaligned_poke_u64(4, target->sign, DATASIGN_WEAK);
1209
+ #ifndef NDEBUG
1210
+ /* debug: provoke failure to catch a violators, but don't touch pagesize
1211
+ * to allow readers catch actual pagesize. */
1212
+ void *provoke_begin = &target->trees.gc.root;
1213
+ void *provoke_end = &target->sign;
1214
+ memset(provoke_begin, 0xCC, ptr_dist(provoke_end, provoke_begin));
1215
+ jitter4testing(false);
1216
+ #endif
1217
+
1218
+ /* LY: update info */
1219
+ target->geometry = pending->geometry;
1220
+ target->trees.gc = pending->trees.gc;
1221
+ target->trees.main = pending->trees.main;
1222
+ eASSERT(env, target->trees.gc.flags == MDBX_INTEGERKEY);
1223
+ eASSERT(env, check_table_flags(target->trees.main.flags));
1224
+ target->canary = pending->canary;
1225
+ memcpy(target->pages_retired, pending->pages_retired, 8);
1226
+ jitter4testing(true);
1227
+
1228
+ /* LY: 'commit' the meta */
1229
+ meta_update_end(env, target, unaligned_peek_u64(4, pending->txnid_b));
1230
+ jitter4testing(true);
1231
+ eASSERT(env, coherency_check_meta(env, target, true));
1232
+ } else {
1233
+ /* dangerous case (target == head), only sign could
1234
+ * me updated, check assertions once again */
1235
+ eASSERT(env, legal4overwrite && !head.is_steady && meta_is_steady(pending));
1236
+ }
1237
+ memcpy(target->sign, pending->sign, 8);
1238
+ osal_flush_incoherent_cpu_writeback();
1239
+ jitter4testing(true);
1240
+ if (!env->incore) {
1241
+ if (!MDBX_AVOID_MSYNC) {
1242
+ /* sync meta-pages */
1243
+ #if MDBX_ENABLE_PGOP_STAT
1244
+ env->lck->pgops.msync.weak += 1;
1245
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1246
+ rc = osal_msync(&env->dxb_mmap, 0, pgno_align2os_bytes(env, NUM_METAS),
1247
+ (flags & MDBX_NOMETASYNC) ? MDBX_SYNC_NONE : MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
1248
+ } else {
1249
+ #if MDBX_ENABLE_PGOP_STAT
1250
+ env->lck->pgops.wops.weak += 1;
1251
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1252
+ const page_t *page = data_page(target);
1253
+ rc = osal_pwrite(env->fd4meta, page, env->ps, ptr_dist(page, env->dxb_mmap.base));
1254
+ if (likely(rc == MDBX_SUCCESS)) {
1255
+ osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize);
1256
+ if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd) {
1257
+ #if MDBX_ENABLE_PGOP_STAT
1258
+ env->lck->pgops.fsync.weak += 1;
1259
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1260
+ rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
1261
+ }
1262
+ }
1263
+ }
1264
+ if (unlikely(rc != MDBX_SUCCESS))
1265
+ goto fail;
1266
+ }
1267
+ } else {
1268
+ #if MDBX_ENABLE_PGOP_STAT
1269
+ env->lck->pgops.wops.weak += 1;
1270
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1271
+ const meta_t undo_meta = *target;
1272
+ eASSERT(env, pending->trees.gc.flags == MDBX_INTEGERKEY);
1273
+ eASSERT(env, check_table_flags(pending->trees.main.flags));
1274
+ rc = osal_pwrite(env->fd4meta, pending, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base));
1275
+ if (unlikely(rc != MDBX_SUCCESS)) {
1276
+ undo:
1277
+ DEBUG("%s", "write failed, disk error?");
1278
+ /* On a failure, the pagecache still contains the new data.
1279
+ * Try write some old data back, to prevent it from being used. */
1280
+ osal_pwrite(env->fd4meta, &undo_meta, sizeof(meta_t), ptr_dist(target, env->dxb_mmap.base));
1281
+ goto fail;
1282
+ }
1283
+ osal_flush_incoherent_mmap(target, sizeof(meta_t), globals.sys_pagesize);
1284
+ /* sync meta-pages */
1285
+ if ((flags & MDBX_NOMETASYNC) == 0 && env->fd4meta == env->lazy_fd && !env->incore) {
1286
+ #if MDBX_ENABLE_PGOP_STAT
1287
+ env->lck->pgops.fsync.weak += 1;
1288
+ #endif /* MDBX_ENABLE_PGOP_STAT */
1289
+ rc = osal_fsync(env->lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
1290
+ if (rc != MDBX_SUCCESS)
1291
+ goto undo;
1292
+ }
1293
+ }
1294
+
1295
+ uint64_t timestamp = 0;
1296
+ while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
1297
+ rc = coherency_check_written(env, pending->unsafe_txnid, target,
1298
+ bytes2pgno(env, ptr_dist(target, env->dxb_mmap.base)), &timestamp);
1299
+ if (likely(rc == MDBX_SUCCESS))
1300
+ break;
1301
+ if (unlikely(rc != MDBX_RESULT_TRUE))
1302
+ goto fail;
1303
+ }
1304
+
1305
+ const uint32_t sync_txnid_dist = ((flags & MDBX_NOMETASYNC) == 0) ? 0
1306
+ : ((flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD
1307
+ : MDBX_NOMETASYNC_LAZY_WRITEMAP;
1308
+ env->lck->meta_sync_txnid.weak = pending->txnid_a[__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__].weak - sync_txnid_dist;
1309
+
1310
+ *troika = meta_tap(env);
1311
+ for (MDBX_txn *txn = env->basal_txn; txn; txn = txn->nested)
1312
+ if (troika != &txn->tw.troika)
1313
+ txn->tw.troika = *troika;
1314
+
1315
+ /* LY: shrink datafile if needed */
1316
+ if (unlikely(shrink)) {
1317
+ VERBOSE("shrink to %" PRIaPGNO " pages (-%" PRIaPGNO ")", pending->geometry.now, shrink);
1318
+ rc = dxb_resize(env, pending->geometry.first_unallocated, pending->geometry.now, pending->geometry.upper,
1319
+ impilict_shrink);
1320
+ if (rc != MDBX_SUCCESS && rc != MDBX_EPERM)
1321
+ goto fail;
1322
+ eASSERT(env, coherency_check_meta(env, target, true));
1323
+ }
1324
+
1325
+ lck_t *const lck = env->lck_mmap.lck;
1326
+ if (likely(lck))
1327
+ /* toggle oldest refresh */
1328
+ atomic_store32(&lck->rdt_refresh_flag, false, mo_Relaxed);
1329
+
1330
+ return MDBX_SUCCESS;
1331
+
1332
+ fail:
1333
+ env->flags |= ENV_FATAL_ERROR;
1334
+ return rc;
1335
+ }