mdbxmou 0.1.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. package/.github/workflows/ci.yml +32 -0
  2. package/.github/workflows/publish.yml +27 -0
  3. package/.gitmodules +3 -0
  4. package/CMakeLists.txt +53 -0
  5. package/LICENSE +201 -0
  6. package/README.md +639 -0
  7. package/build.js +11 -0
  8. package/deps/libmdbx/.clang-format +3 -0
  9. package/deps/libmdbx/.cmake-format.yaml +3 -0
  10. package/deps/libmdbx/.le.ini +40 -0
  11. package/deps/libmdbx/CMakeLists.txt +1269 -0
  12. package/deps/libmdbx/COPYRIGHT +159 -0
  13. package/deps/libmdbx/ChangeLog.md +2786 -0
  14. package/deps/libmdbx/GNUmakefile +950 -0
  15. package/deps/libmdbx/LICENSE +177 -0
  16. package/deps/libmdbx/Makefile +16 -0
  17. package/deps/libmdbx/NOTICE +39 -0
  18. package/deps/libmdbx/README.md +863 -0
  19. package/deps/libmdbx/TODO.md +43 -0
  20. package/deps/libmdbx/cmake/compiler.cmake +1221 -0
  21. package/deps/libmdbx/cmake/profile.cmake +58 -0
  22. package/deps/libmdbx/cmake/utils.cmake +524 -0
  23. package/deps/libmdbx/conanfile.py +323 -0
  24. package/deps/libmdbx/docs/Doxyfile.in +2734 -0
  25. package/deps/libmdbx/docs/_preface.md +47 -0
  26. package/deps/libmdbx/docs/_restrictions.md +248 -0
  27. package/deps/libmdbx/docs/_starting.md +245 -0
  28. package/deps/libmdbx/docs/_toc.md +34 -0
  29. package/deps/libmdbx/docs/header.html +96 -0
  30. package/deps/libmdbx/example/CMakeLists.txt +6 -0
  31. package/deps/libmdbx/example/README.md +1 -0
  32. package/deps/libmdbx/example/example-mdbx.c +154 -0
  33. package/deps/libmdbx/example/sample-bdb.txt +77 -0
  34. package/deps/libmdbx/mdbx.h +6655 -0
  35. package/deps/libmdbx/mdbx.h++ +6428 -0
  36. package/deps/libmdbx/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +173 -0
  37. package/deps/libmdbx/src/alloy.c +54 -0
  38. package/deps/libmdbx/src/api-cold.c +543 -0
  39. package/deps/libmdbx/src/api-copy.c +912 -0
  40. package/deps/libmdbx/src/api-cursor.c +754 -0
  41. package/deps/libmdbx/src/api-dbi.c +315 -0
  42. package/deps/libmdbx/src/api-env.c +1434 -0
  43. package/deps/libmdbx/src/api-extra.c +165 -0
  44. package/deps/libmdbx/src/api-key-transform.c +197 -0
  45. package/deps/libmdbx/src/api-misc.c +286 -0
  46. package/deps/libmdbx/src/api-opts.c +575 -0
  47. package/deps/libmdbx/src/api-range-estimate.c +365 -0
  48. package/deps/libmdbx/src/api-txn-data.c +454 -0
  49. package/deps/libmdbx/src/api-txn.c +921 -0
  50. package/deps/libmdbx/src/atomics-ops.h +364 -0
  51. package/deps/libmdbx/src/atomics-types.h +97 -0
  52. package/deps/libmdbx/src/audit.c +109 -0
  53. package/deps/libmdbx/src/bits.md +34 -0
  54. package/deps/libmdbx/src/chk.c +1796 -0
  55. package/deps/libmdbx/src/cogs.c +309 -0
  56. package/deps/libmdbx/src/cogs.h +506 -0
  57. package/deps/libmdbx/src/coherency.c +170 -0
  58. package/deps/libmdbx/src/config.h.in +88 -0
  59. package/deps/libmdbx/src/cursor.c +2396 -0
  60. package/deps/libmdbx/src/cursor.h +391 -0
  61. package/deps/libmdbx/src/dbi.c +717 -0
  62. package/deps/libmdbx/src/dbi.h +142 -0
  63. package/deps/libmdbx/src/debug_begin.h +36 -0
  64. package/deps/libmdbx/src/debug_end.h +15 -0
  65. package/deps/libmdbx/src/dpl.c +486 -0
  66. package/deps/libmdbx/src/dpl.h +134 -0
  67. package/deps/libmdbx/src/dxb.c +1335 -0
  68. package/deps/libmdbx/src/env.c +607 -0
  69. package/deps/libmdbx/src/essentials.h +125 -0
  70. package/deps/libmdbx/src/gc-get.c +1345 -0
  71. package/deps/libmdbx/src/gc-put.c +970 -0
  72. package/deps/libmdbx/src/gc.h +40 -0
  73. package/deps/libmdbx/src/global.c +474 -0
  74. package/deps/libmdbx/src/internals.h +585 -0
  75. package/deps/libmdbx/src/layout-dxb.h +288 -0
  76. package/deps/libmdbx/src/layout-lck.h +289 -0
  77. package/deps/libmdbx/src/lck-posix.c +859 -0
  78. package/deps/libmdbx/src/lck-windows.c +607 -0
  79. package/deps/libmdbx/src/lck.c +174 -0
  80. package/deps/libmdbx/src/lck.h +110 -0
  81. package/deps/libmdbx/src/logging_and_debug.c +250 -0
  82. package/deps/libmdbx/src/logging_and_debug.h +159 -0
  83. package/deps/libmdbx/src/man1/mdbx_chk.1 +106 -0
  84. package/deps/libmdbx/src/man1/mdbx_copy.1 +95 -0
  85. package/deps/libmdbx/src/man1/mdbx_drop.1 +48 -0
  86. package/deps/libmdbx/src/man1/mdbx_dump.1 +101 -0
  87. package/deps/libmdbx/src/man1/mdbx_load.1 +105 -0
  88. package/deps/libmdbx/src/man1/mdbx_stat.1 +86 -0
  89. package/deps/libmdbx/src/mdbx.c++ +1837 -0
  90. package/deps/libmdbx/src/meta.c +656 -0
  91. package/deps/libmdbx/src/meta.h +168 -0
  92. package/deps/libmdbx/src/mvcc-readers.c +414 -0
  93. package/deps/libmdbx/src/node.c +365 -0
  94. package/deps/libmdbx/src/node.h +102 -0
  95. package/deps/libmdbx/src/ntdll.def +1246 -0
  96. package/deps/libmdbx/src/options.h +534 -0
  97. package/deps/libmdbx/src/osal.c +3485 -0
  98. package/deps/libmdbx/src/osal.h +587 -0
  99. package/deps/libmdbx/src/page-get.c +483 -0
  100. package/deps/libmdbx/src/page-iov.c +185 -0
  101. package/deps/libmdbx/src/page-iov.h +34 -0
  102. package/deps/libmdbx/src/page-ops.c +744 -0
  103. package/deps/libmdbx/src/page-ops.h +142 -0
  104. package/deps/libmdbx/src/pnl.c +236 -0
  105. package/deps/libmdbx/src/pnl.h +146 -0
  106. package/deps/libmdbx/src/preface.h +990 -0
  107. package/deps/libmdbx/src/proto.h +105 -0
  108. package/deps/libmdbx/src/refund.c +212 -0
  109. package/deps/libmdbx/src/sort.h +484 -0
  110. package/deps/libmdbx/src/spill.c +431 -0
  111. package/deps/libmdbx/src/spill.h +74 -0
  112. package/deps/libmdbx/src/table.c +107 -0
  113. package/deps/libmdbx/src/tls.c +551 -0
  114. package/deps/libmdbx/src/tls.h +43 -0
  115. package/deps/libmdbx/src/tools/chk.c +673 -0
  116. package/deps/libmdbx/src/tools/copy.c +166 -0
  117. package/deps/libmdbx/src/tools/drop.c +199 -0
  118. package/deps/libmdbx/src/tools/dump.c +515 -0
  119. package/deps/libmdbx/src/tools/load.c +831 -0
  120. package/deps/libmdbx/src/tools/stat.c +516 -0
  121. package/deps/libmdbx/src/tools/wingetopt.c +87 -0
  122. package/deps/libmdbx/src/tools/wingetopt.h +30 -0
  123. package/deps/libmdbx/src/tree-ops.c +1554 -0
  124. package/deps/libmdbx/src/tree-search.c +140 -0
  125. package/deps/libmdbx/src/txl.c +99 -0
  126. package/deps/libmdbx/src/txl.h +26 -0
  127. package/deps/libmdbx/src/txn.c +1083 -0
  128. package/deps/libmdbx/src/unaligned.h +205 -0
  129. package/deps/libmdbx/src/utils.c +32 -0
  130. package/deps/libmdbx/src/utils.h +76 -0
  131. package/deps/libmdbx/src/version.c.in +44 -0
  132. package/deps/libmdbx/src/walk.c +290 -0
  133. package/deps/libmdbx/src/walk.h +20 -0
  134. package/deps/libmdbx/src/windows-import.c +152 -0
  135. package/deps/libmdbx/src/windows-import.h +128 -0
  136. package/deps/libmdbx/test/CMakeLists.txt +317 -0
  137. package/deps/libmdbx/test/append.c++ +237 -0
  138. package/deps/libmdbx/test/base.h++ +92 -0
  139. package/deps/libmdbx/test/battery-tmux.sh +64 -0
  140. package/deps/libmdbx/test/cases.c++ +118 -0
  141. package/deps/libmdbx/test/chrono.c++ +134 -0
  142. package/deps/libmdbx/test/chrono.h++ +85 -0
  143. package/deps/libmdbx/test/config.c++ +643 -0
  144. package/deps/libmdbx/test/config.h++ +334 -0
  145. package/deps/libmdbx/test/copy.c++ +62 -0
  146. package/deps/libmdbx/test/dead.c++ +39 -0
  147. package/deps/libmdbx/test/dump-load.sh +40 -0
  148. package/deps/libmdbx/test/extra/crunched_delete.c++ +409 -0
  149. package/deps/libmdbx/test/extra/cursor_closing.c++ +410 -0
  150. package/deps/libmdbx/test/extra/dbi.c++ +229 -0
  151. package/deps/libmdbx/test/extra/doubtless_positioning.c++ +253 -0
  152. package/deps/libmdbx/test/extra/dupfix_addodd.c +94 -0
  153. package/deps/libmdbx/test/extra/dupfix_multiple.c++ +311 -0
  154. package/deps/libmdbx/test/extra/early_close_dbi.c++ +137 -0
  155. package/deps/libmdbx/test/extra/hex_base64_base58.c++ +118 -0
  156. package/deps/libmdbx/test/extra/maindb_ordinal.c++ +61 -0
  157. package/deps/libmdbx/test/extra/open.c++ +96 -0
  158. package/deps/libmdbx/test/extra/pcrf/README.md +2 -0
  159. package/deps/libmdbx/test/extra/pcrf/pcrf_test.c +380 -0
  160. package/deps/libmdbx/test/extra/probe.c++ +10 -0
  161. package/deps/libmdbx/test/extra/txn.c++ +407 -0
  162. package/deps/libmdbx/test/extra/upsert_alldups.c +193 -0
  163. package/deps/libmdbx/test/fork.c++ +263 -0
  164. package/deps/libmdbx/test/hill.c++ +447 -0
  165. package/deps/libmdbx/test/jitter.c++ +197 -0
  166. package/deps/libmdbx/test/keygen.c++ +393 -0
  167. package/deps/libmdbx/test/keygen.h++ +130 -0
  168. package/deps/libmdbx/test/log.c++ +358 -0
  169. package/deps/libmdbx/test/log.h++ +91 -0
  170. package/deps/libmdbx/test/main.c++ +706 -0
  171. package/deps/libmdbx/test/nested.c++ +318 -0
  172. package/deps/libmdbx/test/osal-unix.c++ +647 -0
  173. package/deps/libmdbx/test/osal-windows.c++ +440 -0
  174. package/deps/libmdbx/test/osal.h++ +41 -0
  175. package/deps/libmdbx/test/stochastic.sh +690 -0
  176. package/deps/libmdbx/test/stub/LICENSE +24 -0
  177. package/deps/libmdbx/test/stub/README.md +8 -0
  178. package/deps/libmdbx/test/stub/pthread_barrier.c +104 -0
  179. package/deps/libmdbx/test/stub/pthread_barrier.h +77 -0
  180. package/deps/libmdbx/test/test.c++ +1551 -0
  181. package/deps/libmdbx/test/test.h++ +298 -0
  182. package/deps/libmdbx/test/tmux.conf +3 -0
  183. package/deps/libmdbx/test/try.c++ +30 -0
  184. package/deps/libmdbx/test/ttl.c++ +240 -0
  185. package/deps/libmdbx/test/utils.c++ +203 -0
  186. package/deps/libmdbx/test/utils.h++ +326 -0
  187. package/deps/libmdbx/test/valgrind_suppress.txt +536 -0
  188. package/lib/mdbx_evn_async.js +211 -0
  189. package/lib/mdbx_worker.js +195 -0
  190. package/lib/nativemou.js +6 -0
  191. package/package.json +38 -0
  192. package/src/async/envmou_close.cpp +34 -0
  193. package/src/async/envmou_close.hpp +32 -0
  194. package/src/async/envmou_copy_to.cpp +29 -0
  195. package/src/async/envmou_copy_to.hpp +38 -0
  196. package/src/async/envmou_keys.cpp +201 -0
  197. package/src/async/envmou_keys.hpp +50 -0
  198. package/src/async/envmou_open.cpp +38 -0
  199. package/src/async/envmou_open.hpp +33 -0
  200. package/src/async/envmou_query.cpp +167 -0
  201. package/src/async/envmou_query.hpp +53 -0
  202. package/src/dbimou.cpp +522 -0
  203. package/src/dbimou.hpp +82 -0
  204. package/src/env_arg0.hpp +24 -0
  205. package/src/envmou.cpp +445 -0
  206. package/src/envmou.hpp +116 -0
  207. package/src/modulemou.cpp +113 -0
  208. package/src/querymou.cpp +177 -0
  209. package/src/querymou.hpp +93 -0
  210. package/src/txnmou.cpp +254 -0
  211. package/src/txnmou.hpp +122 -0
  212. package/src/typemou.hpp +239 -0
  213. package/src/valuemou.hpp +194 -0
  214. package/test/async.js +67 -0
  215. package/test/e3.js +38 -0
  216. package/test/e4.js +89 -0
  217. package/test/e5.js +162 -0
  218. package/test/test-batch-ops.js +243 -0
  219. package/test/test-cursor-mode.js +84 -0
  220. package/test/test-multi-mode.js +87 -0
@@ -0,0 +1,1083 @@
1
+ /// \copyright SPDX-License-Identifier: Apache-2.0
2
+ /// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2025
3
+
4
+ #include "internals.h"
5
+
6
+ __hot txnid_t txn_snapshot_oldest(const MDBX_txn *const txn) {
7
+ return mvcc_shapshot_oldest(txn->env, txn->tw.troika.txnid[txn->tw.troika.prefer_steady]);
8
+ }
9
+
10
+ void txn_done_cursors(MDBX_txn *txn, const bool merge) {
11
+ TXN_FOREACH_DBI_ALL(txn, i) {
12
+ MDBX_cursor *mc = txn->cursors[i];
13
+ if (mc) {
14
+ txn->cursors[i] = nullptr;
15
+ do
16
+ mc = cursor_eot(mc, txn, merge);
17
+ while (mc);
18
+ }
19
+ }
20
+ }
21
+
22
+ int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) {
23
+ tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
24
+ dpl_t *const dl = dpl_sort(txn);
25
+ int rc = MDBX_SUCCESS;
26
+ size_t r, w, total_npages = 0;
27
+ for (w = 0, r = 1; r <= dl->length; ++r) {
28
+ page_t *dp = dl->items[r].ptr;
29
+ if (dp->flags & P_LOOSE) {
30
+ dl->items[++w] = dl->items[r];
31
+ continue;
32
+ }
33
+ unsigned npages = dpl_npages(dl, r);
34
+ total_npages += npages;
35
+ rc = iov_page(txn, ctx, dp, npages);
36
+ if (unlikely(rc != MDBX_SUCCESS))
37
+ return rc;
38
+ }
39
+
40
+ if (!iov_empty(ctx)) {
41
+ tASSERT(txn, rc == MDBX_SUCCESS);
42
+ rc = iov_write(ctx);
43
+ }
44
+
45
+ if (likely(rc == MDBX_SUCCESS) && ctx->fd == txn->env->lazy_fd) {
46
+ txn->env->lck->unsynced_pages.weak += total_npages;
47
+ if (!txn->env->lck->eoos_timestamp.weak)
48
+ txn->env->lck->eoos_timestamp.weak = osal_monotime();
49
+ }
50
+
51
+ txn->tw.dirtylist->pages_including_loose -= total_npages;
52
+ while (r <= dl->length)
53
+ dl->items[++w] = dl->items[r++];
54
+
55
+ dl->sorted = dpl_setlen(dl, w);
56
+ txn->tw.dirtyroom += r - 1 - w;
57
+ tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
58
+ (txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));
59
+ tASSERT(txn, txn->tw.dirtylist->length == txn->tw.loose_count);
60
+ tASSERT(txn, txn->tw.dirtylist->pages_including_loose == txn->tw.loose_count);
61
+ return rc;
62
+ }
63
+
64
+ /* Merge child txn into parent */
65
+ void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) {
66
+ tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0);
67
+ dpl_t *const src = dpl_sort(txn);
68
+
69
+ /* Remove refunded pages from parent's dirty list */
70
+ dpl_t *const dst = dpl_sort(parent);
71
+ if (MDBX_ENABLE_REFUND) {
72
+ size_t n = dst->length;
73
+ while (n && dst->items[n].pgno >= parent->geo.first_unallocated) {
74
+ const unsigned npages = dpl_npages(dst, n);
75
+ page_shadow_release(txn->env, dst->items[n].ptr, npages);
76
+ --n;
77
+ }
78
+ parent->tw.dirtyroom += dst->sorted - n;
79
+ dst->sorted = dpl_setlen(dst, n);
80
+ tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length ==
81
+ (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit));
82
+ }
83
+
84
+ /* Remove reclaimed pages from parent's dirty list */
85
+ const pnl_t reclaimed_list = parent->tw.repnl;
86
+ dpl_sift(parent, reclaimed_list, false);
87
+
88
+ /* Move retired pages from parent's dirty & spilled list to reclaimed */
89
+ size_t r, w, d, s, l;
90
+ for (r = w = parent_retired_len; ++r <= MDBX_PNL_GETSIZE(parent->tw.retired_pages);) {
91
+ const pgno_t pgno = parent->tw.retired_pages[r];
92
+ const size_t di = dpl_exist(parent, pgno);
93
+ const size_t si = !di ? spill_search(parent, pgno) : 0;
94
+ unsigned npages;
95
+ const char *kind;
96
+ if (di) {
97
+ page_t *dp = dst->items[di].ptr;
98
+ tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_SPILLED)) == 0);
99
+ npages = dpl_npages(dst, di);
100
+ page_wash(parent, di, dp, npages);
101
+ kind = "dirty";
102
+ l = 1;
103
+ if (unlikely(npages > l)) {
104
+ /* OVERFLOW-страница могла быть переиспользована по частям. Тогда
105
+ * в retired-списке может быть только начало последовательности,
106
+ * а остаток растащен по dirty, spilled и reclaimed спискам. Поэтому
107
+ * переносим в reclaimed с проверкой на обрыв последовательности.
108
+ * В любом случае, все осколки будут учтены и отфильтрованы, т.е. если
109
+ * страница была разбита на части, то важно удалить dirty-элемент,
110
+ * а все осколки будут учтены отдельно. */
111
+
112
+ /* Список retired страниц не сортирован, но для ускорения сортировки
113
+ * дополняется в соответствии с MDBX_PNL_ASCENDING */
114
+ #if MDBX_PNL_ASCENDING
115
+ const size_t len = MDBX_PNL_GETSIZE(parent->tw.retired_pages);
116
+ while (r < len && parent->tw.retired_pages[r + 1] == pgno + l) {
117
+ ++r;
118
+ if (++l == npages)
119
+ break;
120
+ }
121
+ #else
122
+ while (w > parent_retired_len && parent->tw.retired_pages[w - 1] == pgno + l) {
123
+ --w;
124
+ if (++l == npages)
125
+ break;
126
+ }
127
+ #endif
128
+ }
129
+ } else if (unlikely(si)) {
130
+ l = npages = 1;
131
+ spill_remove(parent, si, 1);
132
+ kind = "spilled";
133
+ } else {
134
+ parent->tw.retired_pages[++w] = pgno;
135
+ continue;
136
+ }
137
+
138
+ DEBUG("reclaim retired parent's %u -> %zu %s page %" PRIaPGNO, npages, l, kind, pgno);
139
+ int err = pnl_insert_span(&parent->tw.repnl, pgno, l);
140
+ ENSURE(txn->env, err == MDBX_SUCCESS);
141
+ }
142
+ MDBX_PNL_SETSIZE(parent->tw.retired_pages, w);
143
+
144
+ /* Filter-out parent spill list */
145
+ if (parent->tw.spilled.list && MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) {
146
+ const pnl_t sl = spill_purge(parent);
147
+ size_t len = MDBX_PNL_GETSIZE(sl);
148
+ if (len) {
149
+ /* Remove refunded pages from parent's spill list */
150
+ if (MDBX_ENABLE_REFUND && MDBX_PNL_MOST(sl) >= (parent->geo.first_unallocated << 1)) {
151
+ #if MDBX_PNL_ASCENDING
152
+ size_t i = MDBX_PNL_GETSIZE(sl);
153
+ assert(MDBX_PNL_MOST(sl) == MDBX_PNL_LAST(sl));
154
+ do {
155
+ if ((sl[i] & 1) == 0)
156
+ DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1);
157
+ i -= 1;
158
+ } while (i && sl[i] >= (parent->geo.first_unallocated << 1));
159
+ MDBX_PNL_SETSIZE(sl, i);
160
+ #else
161
+ assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl));
162
+ size_t i = 0;
163
+ do {
164
+ ++i;
165
+ if ((sl[i] & 1) == 0)
166
+ DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1);
167
+ } while (i < len && sl[i + 1] >= (parent->geo.first_unallocated << 1));
168
+ MDBX_PNL_SETSIZE(sl, len -= i);
169
+ memmove(sl + 1, sl + 1 + i, len * sizeof(sl[0]));
170
+ #endif
171
+ }
172
+ tASSERT(txn, pnl_check_allocated(sl, (size_t)parent->geo.first_unallocated << 1));
173
+
174
+ /* Remove reclaimed pages from parent's spill list */
175
+ s = MDBX_PNL_GETSIZE(sl), r = MDBX_PNL_GETSIZE(reclaimed_list);
176
+ /* Scanning from end to begin */
177
+ while (s && r) {
178
+ if (sl[s] & 1) {
179
+ --s;
180
+ continue;
181
+ }
182
+ const pgno_t spilled_pgno = sl[s] >> 1;
183
+ const pgno_t reclaimed_pgno = reclaimed_list[r];
184
+ if (reclaimed_pgno != spilled_pgno) {
185
+ const bool cmp = MDBX_PNL_ORDERED(spilled_pgno, reclaimed_pgno);
186
+ s -= !cmp;
187
+ r -= cmp;
188
+ } else {
189
+ DEBUG("remove reclaimed parent's spilled page %" PRIaPGNO, reclaimed_pgno);
190
+ spill_remove(parent, s, 1);
191
+ --s;
192
+ --r;
193
+ }
194
+ }
195
+
196
+ /* Remove anything in our dirty list from parent's spill list */
197
+ /* Scanning spill list in descend order */
198
+ const intptr_t step = MDBX_PNL_ASCENDING ? -1 : 1;
199
+ s = MDBX_PNL_ASCENDING ? MDBX_PNL_GETSIZE(sl) : 1;
200
+ d = src->length;
201
+ while (d && (MDBX_PNL_ASCENDING ? s > 0 : s <= MDBX_PNL_GETSIZE(sl))) {
202
+ if (sl[s] & 1) {
203
+ s += step;
204
+ continue;
205
+ }
206
+ const pgno_t spilled_pgno = sl[s] >> 1;
207
+ const pgno_t dirty_pgno_form = src->items[d].pgno;
208
+ const unsigned npages = dpl_npages(src, d);
209
+ const pgno_t dirty_pgno_to = dirty_pgno_form + npages;
210
+ if (dirty_pgno_form > spilled_pgno) {
211
+ --d;
212
+ continue;
213
+ }
214
+ if (dirty_pgno_to <= spilled_pgno) {
215
+ s += step;
216
+ continue;
217
+ }
218
+
219
+ DEBUG("remove dirtied parent's spilled %u page %" PRIaPGNO, npages, dirty_pgno_form);
220
+ spill_remove(parent, s, 1);
221
+ s += step;
222
+ }
223
+
224
+ /* Squash deleted pagenums if we deleted any */
225
+ spill_purge(parent);
226
+ }
227
+ }
228
+
229
+ /* Remove anything in our spill list from parent's dirty list */
230
+ if (txn->tw.spilled.list) {
231
+ tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list, (size_t)parent->geo.first_unallocated << 1));
232
+ dpl_sift(parent, txn->tw.spilled.list, true);
233
+ tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length ==
234
+ (parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit));
235
+ }
236
+
237
+ /* Find length of merging our dirty list with parent's and release
238
+ * filter-out pages */
239
+ for (l = 0, d = dst->length, s = src->length; d > 0 && s > 0;) {
240
+ page_t *sp = src->items[s].ptr;
241
+ tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_LOOSE | P_SPILLED)) == 0);
242
+ const unsigned s_npages = dpl_npages(src, s);
243
+ const pgno_t s_pgno = src->items[s].pgno;
244
+
245
+ page_t *dp = dst->items[d].ptr;
246
+ tASSERT(parent, (dp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_SPILLED)) == 0);
247
+ const unsigned d_npages = dpl_npages(dst, d);
248
+ const pgno_t d_pgno = dst->items[d].pgno;
249
+
250
+ if (d_pgno >= s_pgno + s_npages) {
251
+ --d;
252
+ ++l;
253
+ } else if (d_pgno + d_npages <= s_pgno) {
254
+ if (sp->flags != P_LOOSE) {
255
+ sp->txnid = parent->front_txnid;
256
+ sp->flags &= ~P_SPILLED;
257
+ }
258
+ --s;
259
+ ++l;
260
+ } else {
261
+ dst->items[d--].ptr = nullptr;
262
+ page_shadow_release(txn->env, dp, d_npages);
263
+ }
264
+ }
265
+ assert(dst->sorted == dst->length);
266
+ tASSERT(parent, dst->detent >= l + d + s);
267
+ dst->sorted = l + d + s; /* the merged length */
268
+
269
+ while (s > 0) {
270
+ page_t *sp = src->items[s].ptr;
271
+ tASSERT(parent, (sp->flags & ~(P_LEAF | P_DUPFIX | P_BRANCH | P_LARGE | P_LOOSE | P_SPILLED)) == 0);
272
+ if (sp->flags != P_LOOSE) {
273
+ sp->txnid = parent->front_txnid;
274
+ sp->flags &= ~P_SPILLED;
275
+ }
276
+ --s;
277
+ }
278
+
279
+ /* Merge our dirty list into parent's, i.e. merge(dst, src) -> dst */
280
+ if (dst->sorted >= dst->length) {
281
+ /* from end to begin with dst extending */
282
+ for (l = dst->sorted, s = src->length, d = dst->length; s > 0 && d > 0;) {
283
+ if (unlikely(l <= d)) {
284
+ /* squash to get a gap of free space for merge */
285
+ for (r = w = 1; r <= d; ++r)
286
+ if (dst->items[r].ptr) {
287
+ if (w != r) {
288
+ dst->items[w] = dst->items[r];
289
+ dst->items[r].ptr = nullptr;
290
+ }
291
+ ++w;
292
+ }
293
+ VERBOSE("squash to begin for extending-merge %zu -> %zu", d, w - 1);
294
+ d = w - 1;
295
+ continue;
296
+ }
297
+ assert(l > d);
298
+ if (dst->items[d].ptr) {
299
+ dst->items[l--] = (dst->items[d].pgno > src->items[s].pgno) ? dst->items[d--] : src->items[s--];
300
+ } else
301
+ --d;
302
+ }
303
+ if (s > 0) {
304
+ assert(l == s);
305
+ while (d > 0) {
306
+ assert(dst->items[d].ptr == nullptr);
307
+ --d;
308
+ }
309
+ do {
310
+ assert(l > 0);
311
+ dst->items[l--] = src->items[s--];
312
+ } while (s > 0);
313
+ } else {
314
+ assert(l == d);
315
+ while (l > 0) {
316
+ assert(dst->items[l].ptr != nullptr);
317
+ --l;
318
+ }
319
+ }
320
+ } else {
321
+ /* from begin to end with shrinking (a lot of new large/overflow pages) */
322
+ for (l = s = d = 1; s <= src->length && d <= dst->length;) {
323
+ if (unlikely(l >= d)) {
324
+ /* squash to get a gap of free space for merge */
325
+ for (r = w = dst->length; r >= d; --r)
326
+ if (dst->items[r].ptr) {
327
+ if (w != r) {
328
+ dst->items[w] = dst->items[r];
329
+ dst->items[r].ptr = nullptr;
330
+ }
331
+ --w;
332
+ }
333
+ VERBOSE("squash to end for shrinking-merge %zu -> %zu", d, w + 1);
334
+ d = w + 1;
335
+ continue;
336
+ }
337
+ assert(l < d);
338
+ if (dst->items[d].ptr) {
339
+ dst->items[l++] = (dst->items[d].pgno < src->items[s].pgno) ? dst->items[d++] : src->items[s++];
340
+ } else
341
+ ++d;
342
+ }
343
+ if (s <= src->length) {
344
+ assert(dst->sorted - l == src->length - s);
345
+ while (d <= dst->length) {
346
+ assert(dst->items[d].ptr == nullptr);
347
+ --d;
348
+ }
349
+ do {
350
+ assert(l <= dst->sorted);
351
+ dst->items[l++] = src->items[s++];
352
+ } while (s <= src->length);
353
+ } else {
354
+ assert(dst->sorted - l == dst->length - d);
355
+ while (l <= dst->sorted) {
356
+ assert(l <= d && d <= dst->length && dst->items[d].ptr);
357
+ dst->items[l++] = dst->items[d++];
358
+ }
359
+ }
360
+ }
361
+ parent->tw.dirtyroom -= dst->sorted - dst->length;
362
+ assert(parent->tw.dirtyroom <= parent->env->options.dp_limit);
363
+ dpl_setlen(dst, dst->sorted);
364
+ parent->tw.dirtylru = txn->tw.dirtylru;
365
+
366
+ /* В текущем понимании выгоднее пересчитать кол-во страниц,
367
+ * чем подмешивать лишние ветвления и вычисления в циклы выше. */
368
+ dst->pages_including_loose = 0;
369
+ for (r = 1; r <= dst->length; ++r)
370
+ dst->pages_including_loose += dpl_npages(dst, r);
371
+
372
+ tASSERT(parent, dpl_check(parent));
373
+ dpl_free(txn);
374
+
375
+ if (txn->tw.spilled.list) {
376
+ if (parent->tw.spilled.list) {
377
+ /* Must not fail since space was preserved above. */
378
+ pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list);
379
+ pnl_free(txn->tw.spilled.list);
380
+ } else {
381
+ parent->tw.spilled.list = txn->tw.spilled.list;
382
+ parent->tw.spilled.least_removed = txn->tw.spilled.least_removed;
383
+ }
384
+ tASSERT(parent, dpl_check(parent));
385
+ }
386
+
387
+ parent->flags &= ~MDBX_TXN_HAS_CHILD;
388
+ if (parent->tw.spilled.list) {
389
+ assert(pnl_check_allocated(parent->tw.spilled.list, (size_t)parent->geo.first_unallocated << 1));
390
+ if (MDBX_PNL_GETSIZE(parent->tw.spilled.list))
391
+ parent->flags |= MDBX_TXN_SPILLS;
392
+ }
393
+ }
394
+
395
+ void txn_take_gcprof(const MDBX_txn *txn, MDBX_commit_latency *latency) {
396
+ MDBX_env *const env = txn->env;
397
+ if (MDBX_ENABLE_PROFGC) {
398
+ pgop_stat_t *const ptr = &env->lck->pgops;
399
+ latency->gc_prof.work_counter = ptr->gc_prof.work.spe_counter;
400
+ latency->gc_prof.work_rtime_monotonic = osal_monotime_to_16dot16(ptr->gc_prof.work.rtime_monotonic);
401
+ latency->gc_prof.work_xtime_cpu = osal_monotime_to_16dot16(ptr->gc_prof.work.xtime_cpu);
402
+ latency->gc_prof.work_rsteps = ptr->gc_prof.work.rsteps;
403
+ latency->gc_prof.work_xpages = ptr->gc_prof.work.xpages;
404
+ latency->gc_prof.work_majflt = ptr->gc_prof.work.majflt;
405
+
406
+ latency->gc_prof.self_counter = ptr->gc_prof.self.spe_counter;
407
+ latency->gc_prof.self_rtime_monotonic = osal_monotime_to_16dot16(ptr->gc_prof.self.rtime_monotonic);
408
+ latency->gc_prof.self_xtime_cpu = osal_monotime_to_16dot16(ptr->gc_prof.self.xtime_cpu);
409
+ latency->gc_prof.self_rsteps = ptr->gc_prof.self.rsteps;
410
+ latency->gc_prof.self_xpages = ptr->gc_prof.self.xpages;
411
+ latency->gc_prof.self_majflt = ptr->gc_prof.self.majflt;
412
+
413
+ latency->gc_prof.wloops = ptr->gc_prof.wloops;
414
+ latency->gc_prof.coalescences = ptr->gc_prof.coalescences;
415
+ latency->gc_prof.wipes = ptr->gc_prof.wipes;
416
+ latency->gc_prof.flushes = ptr->gc_prof.flushes;
417
+ latency->gc_prof.kicks = ptr->gc_prof.kicks;
418
+
419
+ latency->gc_prof.pnl_merge_work.time = osal_monotime_to_16dot16(ptr->gc_prof.work.pnl_merge.time);
420
+ latency->gc_prof.pnl_merge_work.calls = ptr->gc_prof.work.pnl_merge.calls;
421
+ latency->gc_prof.pnl_merge_work.volume = ptr->gc_prof.work.pnl_merge.volume;
422
+ latency->gc_prof.pnl_merge_self.time = osal_monotime_to_16dot16(ptr->gc_prof.self.pnl_merge.time);
423
+ latency->gc_prof.pnl_merge_self.calls = ptr->gc_prof.self.pnl_merge.calls;
424
+ latency->gc_prof.pnl_merge_self.volume = ptr->gc_prof.self.pnl_merge.volume;
425
+
426
+ if (txn == env->basal_txn)
427
+ memset(&ptr->gc_prof, 0, sizeof(ptr->gc_prof));
428
+ } else
429
+ memset(&latency->gc_prof, 0, sizeof(latency->gc_prof));
430
+ }
431
+
432
+ int txn_abort(MDBX_txn *txn) {
433
+ if (txn->flags & MDBX_TXN_RDONLY)
434
+ /* LY: don't close DBI-handles */
435
+ return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE);
436
+
437
+ if (unlikely(txn->flags & MDBX_TXN_FINISHED))
438
+ return MDBX_BAD_TXN;
439
+
440
+ if (txn->nested)
441
+ txn_abort(txn->nested);
442
+
443
+ tASSERT(txn, (txn->flags & MDBX_TXN_ERROR) || dpl_check(txn));
444
+ return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE);
445
+ }
446
+
447
+ int txn_renew(MDBX_txn *txn, unsigned flags) {
448
+ MDBX_env *const env = txn->env;
449
+ int rc;
450
+
451
+ #if MDBX_ENV_CHECKPID
452
+ if (unlikely(env->pid != osal_getpid())) {
453
+ env->flags |= ENV_FATAL_ERROR;
454
+ return MDBX_PANIC;
455
+ }
456
+ #endif /* MDBX_ENV_CHECKPID */
457
+
458
+ flags |= env->flags & (MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP);
459
+ if (flags & MDBX_TXN_RDONLY) {
460
+ eASSERT(env, (flags & ~(txn_ro_begin_flags | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0);
461
+ txn->flags = flags;
462
+ reader_slot_t *r = txn->to.reader;
463
+ STATIC_ASSERT(sizeof(uintptr_t) <= sizeof(r->tid));
464
+ if (likely(env->flags & ENV_TXKEY)) {
465
+ eASSERT(env, !(env->flags & MDBX_NOSTICKYTHREADS));
466
+ r = thread_rthc_get(env->me_txkey);
467
+ if (likely(r)) {
468
+ if (unlikely(!r->pid.weak) && (globals.runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN)) {
469
+ thread_rthc_set(env->me_txkey, nullptr);
470
+ r = nullptr;
471
+ } else {
472
+ eASSERT(env, r->pid.weak == env->pid);
473
+ eASSERT(env, r->tid.weak == osal_thread_self());
474
+ }
475
+ }
476
+ } else {
477
+ eASSERT(env, !env->lck_mmap.lck || (env->flags & MDBX_NOSTICKYTHREADS));
478
+ }
479
+
480
+ if (likely(r)) {
481
+ if (unlikely(r->pid.weak != env->pid || r->txnid.weak < SAFE64_INVALID_THRESHOLD))
482
+ return MDBX_BAD_RSLOT;
483
+ } else if (env->lck_mmap.lck) {
484
+ bsr_t brs = mvcc_bind_slot(env);
485
+ if (unlikely(brs.err != MDBX_SUCCESS))
486
+ return brs.err;
487
+ r = brs.rslot;
488
+ }
489
+ txn->to.reader = r;
490
+ STATIC_ASSERT(MDBX_TXN_RDONLY_PREPARE > MDBX_TXN_RDONLY);
491
+ if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) {
492
+ eASSERT(env, txn->txnid == 0);
493
+ eASSERT(env, txn->owner == 0);
494
+ eASSERT(env, txn->n_dbi == 0);
495
+ if (likely(r)) {
496
+ eASSERT(env, r->snapshot_pages_used.weak == 0);
497
+ eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD);
498
+ atomic_store32(&r->snapshot_pages_used, 0, mo_Relaxed);
499
+ }
500
+ txn->flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED;
501
+ return MDBX_SUCCESS;
502
+ }
503
+ txn->owner = likely(r) ? (uintptr_t)r->tid.weak : ((env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self());
504
+ if ((env->flags & MDBX_NOSTICKYTHREADS) == 0 && env->txn && unlikely(env->basal_txn->owner == txn->owner) &&
505
+ (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0)
506
+ return MDBX_TXN_OVERLAPPING;
507
+
508
+ /* Seek & fetch the last meta */
509
+ uint64_t timestamp = 0;
510
+ size_t loop = 0;
511
+ troika_t troika = meta_tap(env);
512
+ while (1) {
513
+ const meta_ptr_t head = likely(env->stuck_meta < 0) ? /* regular */ meta_recent(env, &troika)
514
+ : /* recovery mode */ meta_ptr(env, env->stuck_meta);
515
+ if (likely(r != nullptr)) {
516
+ safe64_reset(&r->txnid, true);
517
+ atomic_store32(&r->snapshot_pages_used, head.ptr_v->geometry.first_unallocated, mo_Relaxed);
518
+ atomic_store64(&r->snapshot_pages_retired, unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired),
519
+ mo_Relaxed);
520
+ safe64_write(&r->txnid, head.txnid);
521
+ eASSERT(env, r->pid.weak == osal_getpid());
522
+ eASSERT(env, r->tid.weak == ((env->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self()));
523
+ eASSERT(env, r->txnid.weak == head.txnid ||
524
+ (r->txnid.weak >= SAFE64_INVALID_THRESHOLD && head.txnid < env->lck->cached_oldest.weak));
525
+ atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease);
526
+ } else {
527
+ /* exclusive mode without lck */
528
+ eASSERT(env, !env->lck_mmap.lck && env->lck == lckless_stub(env));
529
+ }
530
+ jitter4testing(true);
531
+
532
+ if (unlikely(meta_should_retry(env, &troika))) {
533
+ retry:
534
+ if (likely(++loop < 42)) {
535
+ timestamp = 0;
536
+ continue;
537
+ }
538
+ ERROR("bailout waiting for valid snapshot (%s)", "meta-pages are too volatile");
539
+ rc = MDBX_PROBLEM;
540
+ goto read_failed;
541
+ }
542
+
543
+ /* Snap the state from current meta-head */
544
+ rc = coherency_fetch_head(txn, head, &timestamp);
545
+ jitter4testing(false);
546
+ if (unlikely(rc != MDBX_SUCCESS)) {
547
+ if (rc == MDBX_RESULT_TRUE)
548
+ goto retry;
549
+ else
550
+ goto read_failed;
551
+ }
552
+
553
+ const uint64_t snap_oldest = atomic_load64(&env->lck->cached_oldest, mo_AcquireRelease);
554
+ if (unlikely(txn->txnid < snap_oldest)) {
555
+ if (env->stuck_meta < 0)
556
+ goto retry;
557
+ ERROR("target meta-page %i is referenced to an obsolete MVCC-snapshot "
558
+ "%" PRIaTXN " < cached-oldest %" PRIaTXN,
559
+ env->stuck_meta, txn->txnid, snap_oldest);
560
+ rc = MDBX_MVCC_RETARDED;
561
+ goto read_failed;
562
+ }
563
+
564
+ if (likely(r != nullptr) && unlikely(txn->txnid != atomic_load64(&r->txnid, mo_Relaxed)))
565
+ goto retry;
566
+ break;
567
+ }
568
+
569
+ if (unlikely(txn->txnid < MIN_TXNID || txn->txnid > MAX_TXNID)) {
570
+ ERROR("%s", "environment corrupted by died writer, must shutdown!");
571
+ rc = MDBX_CORRUPTED;
572
+ read_failed:
573
+ txn->txnid = INVALID_TXNID;
574
+ if (likely(r != nullptr))
575
+ safe64_reset(&r->txnid, true);
576
+ goto bailout;
577
+ }
578
+
579
+ tASSERT(txn, rc == MDBX_SUCCESS);
580
+ ENSURE(env, txn->txnid >=
581
+ /* paranoia is appropriate here */ env->lck->cached_oldest.weak);
582
+ tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
583
+ tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
584
+ } else {
585
+ eASSERT(env, (flags & ~(txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_WRITEMAP | MDBX_NOSTICKYTHREADS)) == 0);
586
+ const uintptr_t tid = osal_thread_self();
587
+ if (unlikely(txn->owner == tid ||
588
+ /* not recovery mode */ env->stuck_meta >= 0))
589
+ return MDBX_BUSY;
590
+ lck_t *const lck = env->lck_mmap.lck;
591
+ if (lck && (env->flags & MDBX_NOSTICKYTHREADS) == 0 && (globals.runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) {
592
+ const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
593
+ for (size_t i = 0; i < snap_nreaders; ++i) {
594
+ if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) == env->pid &&
595
+ unlikely(atomic_load64(&lck->rdt[i].tid, mo_Relaxed) == tid)) {
596
+ const txnid_t txnid = safe64_read(&lck->rdt[i].txnid);
597
+ if (txnid >= MIN_TXNID && txnid <= MAX_TXNID)
598
+ return MDBX_TXN_OVERLAPPING;
599
+ }
600
+ }
601
+ }
602
+
603
+ /* Not yet touching txn == env->basal_txn, it may be active */
604
+ jitter4testing(false);
605
+ rc = lck_txn_lock(env, !!(flags & MDBX_TXN_TRY));
606
+ if (unlikely(rc))
607
+ return rc;
608
+ if (unlikely(env->flags & ENV_FATAL_ERROR)) {
609
+ lck_txn_unlock(env);
610
+ return MDBX_PANIC;
611
+ }
612
+ #if defined(_WIN32) || defined(_WIN64)
613
+ if (unlikely(!env->dxb_mmap.base)) {
614
+ lck_txn_unlock(env);
615
+ return MDBX_EPERM;
616
+ }
617
+ #endif /* Windows */
618
+
619
+ txn->tw.troika = meta_tap(env);
620
+ const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
621
+ uint64_t timestamp = 0;
622
+ while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
623
+ rc = coherency_fetch_head(txn, head, &timestamp);
624
+ if (likely(rc == MDBX_SUCCESS))
625
+ break;
626
+ if (unlikely(rc != MDBX_RESULT_TRUE))
627
+ goto bailout;
628
+ }
629
+ eASSERT(env, meta_txnid(head.ptr_v) == txn->txnid);
630
+ txn->txnid = safe64_txnid_next(txn->txnid);
631
+ if (unlikely(txn->txnid > MAX_TXNID)) {
632
+ rc = MDBX_TXN_FULL;
633
+ ERROR("txnid overflow, raise %d", rc);
634
+ goto bailout;
635
+ }
636
+
637
+ tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
638
+ tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
639
+ txn->flags = flags;
640
+ txn->nested = nullptr;
641
+ txn->tw.loose_pages = nullptr;
642
+ txn->tw.loose_count = 0;
643
+ #if MDBX_ENABLE_REFUND
644
+ txn->tw.loose_refund_wl = 0;
645
+ #endif /* MDBX_ENABLE_REFUND */
646
+ MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0);
647
+ txn->tw.spilled.list = nullptr;
648
+ txn->tw.spilled.least_removed = 0;
649
+ txn->tw.gc.time_acc = 0;
650
+ txn->tw.gc.last_reclaimed = 0;
651
+ if (txn->tw.gc.retxl)
652
+ MDBX_PNL_SETSIZE(txn->tw.gc.retxl, 0);
653
+ env->txn = txn;
654
+ }
655
+
656
+ txn->front_txnid = txn->txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0);
657
+
658
+ /* Setup db info */
659
+ tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
660
+ tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
661
+ VALGRIND_MAKE_MEM_UNDEFINED(txn->dbi_state, env->max_dbi);
662
+ #if MDBX_ENABLE_DBI_SPARSE
663
+ txn->n_dbi = CORE_DBS;
664
+ VALGRIND_MAKE_MEM_UNDEFINED(txn->dbi_sparse,
665
+ ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT);
666
+ txn->dbi_sparse[0] = (1 << CORE_DBS) - 1;
667
+ #else
668
+ txn->n_dbi = (env->n_dbi < 8) ? env->n_dbi : 8;
669
+ if (txn->n_dbi > CORE_DBS)
670
+ memset(txn->dbi_state + CORE_DBS, 0, txn->n_dbi - CORE_DBS);
671
+ #endif /* MDBX_ENABLE_DBI_SPARSE */
672
+ txn->dbi_state[FREE_DBI] = DBI_LINDO | DBI_VALID;
673
+ txn->dbi_state[MAIN_DBI] = DBI_LINDO | DBI_VALID;
674
+ txn->cursors[FREE_DBI] = nullptr;
675
+ txn->cursors[MAIN_DBI] = nullptr;
676
+ txn->dbi_seqs[FREE_DBI] = 0;
677
+ txn->dbi_seqs[MAIN_DBI] = atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease);
678
+
679
+ if (unlikely(env->dbs_flags[MAIN_DBI] != (DB_VALID | txn->dbs[MAIN_DBI].flags) || !txn->dbi_seqs[MAIN_DBI])) {
680
+ const bool need_txn_lock = env->basal_txn && env->basal_txn->owner != osal_thread_self();
681
+ bool should_unlock = false;
682
+ if (need_txn_lock) {
683
+ rc = lck_txn_lock(env, true);
684
+ if (rc == MDBX_SUCCESS)
685
+ should_unlock = true;
686
+ else if (rc != MDBX_BUSY && rc != MDBX_EDEADLK)
687
+ goto bailout;
688
+ }
689
+ rc = osal_fastmutex_acquire(&env->dbi_lock);
690
+ if (likely(rc == MDBX_SUCCESS)) {
691
+ /* проверяем повторно после захвата блокировки */
692
+ uint32_t seq = atomic_load32(&env->dbi_seqs[MAIN_DBI], mo_AcquireRelease);
693
+ if (env->dbs_flags[MAIN_DBI] != (DB_VALID | txn->dbs[MAIN_DBI].flags)) {
694
+ if (!(env->dbs_flags[MAIN_DBI] & DB_VALID) || !need_txn_lock || should_unlock ||
695
+ /* если нет активной пишущей транзакции, * то следующая будет ждать на dbi_lock */ !env->txn) {
696
+ if (env->dbs_flags[MAIN_DBI] & DB_VALID) {
697
+ NOTICE("renew MainDB for %s-txn %" PRIaTXN " since db-flags changes 0x%x -> 0x%x",
698
+ (txn->flags & MDBX_TXN_RDONLY) ? "ro" : "rw", txn->txnid, env->dbs_flags[MAIN_DBI] & ~DB_VALID,
699
+ txn->dbs[MAIN_DBI].flags);
700
+ seq = dbi_seq_next(env, MAIN_DBI);
701
+ env->dbs_flags[MAIN_DBI] = DB_POISON;
702
+ atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease);
703
+ }
704
+ rc = tbl_setup(env, &env->kvs[MAIN_DBI], &txn->dbs[MAIN_DBI]);
705
+ if (likely(rc == MDBX_SUCCESS)) {
706
+ seq = dbi_seq_next(env, MAIN_DBI);
707
+ env->dbs_flags[MAIN_DBI] = DB_VALID | txn->dbs[MAIN_DBI].flags;
708
+ atomic_store32(&env->dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease);
709
+ }
710
+ } else {
711
+ ERROR("MainDB db-flags changes 0x%x -> 0x%x ahead of read-txn "
712
+ "%" PRIaTXN,
713
+ txn->dbs[MAIN_DBI].flags, env->dbs_flags[MAIN_DBI] & ~DB_VALID, txn->txnid);
714
+ rc = MDBX_INCOMPATIBLE;
715
+ }
716
+ }
717
+ txn->dbi_seqs[MAIN_DBI] = seq;
718
+ ENSURE(env, osal_fastmutex_release(&env->dbi_lock) == MDBX_SUCCESS);
719
+ } else {
720
+ DEBUG("dbi_lock failed, err %d", rc);
721
+ }
722
+ if (should_unlock)
723
+ lck_txn_unlock(env);
724
+ if (unlikely(rc != MDBX_SUCCESS))
725
+ goto bailout;
726
+ }
727
+
728
+ if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) {
729
+ ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags, "GC/FreeDB");
730
+ rc = MDBX_INCOMPATIBLE;
731
+ goto bailout;
732
+ }
733
+
734
+ tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
735
+ tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
736
+ if (unlikely(env->flags & ENV_FATAL_ERROR)) {
737
+ WARNING("%s", "environment had fatal error, must shutdown!");
738
+ rc = MDBX_PANIC;
739
+ } else {
740
+ const size_t size_bytes = pgno2bytes(env, txn->geo.end_pgno);
741
+ const size_t used_bytes = pgno2bytes(env, txn->geo.first_unallocated);
742
+ const size_t required_bytes = (txn->flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes;
743
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
744
+ if (unlikely(required_bytes > env->dxb_mmap.current)) {
745
+ /* Размер БД (для пишущих транзакций) или используемых данных (для
746
+ * читающих транзакций) больше предыдущего/текущего размера внутри
747
+ * процесса, увеличиваем. Сюда также попадает случай увеличения верхней
748
+ * границы размера БД и отображения. В читающих транзакциях нельзя
749
+ * изменять размер файла, который может быть больше необходимого этой
750
+ * транзакции. */
751
+ if (txn->geo.upper > MAX_PAGENO + 1 || bytes2pgno(env, pgno2bytes(env, txn->geo.upper)) != txn->geo.upper) {
752
+ rc = MDBX_UNABLE_EXTEND_MAPSIZE;
753
+ goto bailout;
754
+ }
755
+ rc = dxb_resize(env, txn->geo.first_unallocated, txn->geo.end_pgno, txn->geo.upper, implicit_grow);
756
+ if (unlikely(rc != MDBX_SUCCESS))
757
+ goto bailout;
758
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
759
+ } else if (unlikely(size_bytes < env->dxb_mmap.current)) {
760
+ /* Размер БД меньше предыдущего/текущего размера внутри процесса, можно
761
+ * уменьшить, но всё сложнее:
762
+ * - размер файла согласован со всеми читаемыми снимками на момент
763
+ * коммита последней транзакции;
764
+ * - в читающей транзакции размер файла может быть больше и него нельзя
765
+ * изменять, в том числе менять madvise (меньша размера файла нельзя,
766
+ * а за размером нет смысла).
767
+ * - в пишущей транзакции уменьшать размер файла можно только после
768
+ * проверки размера читаемых снимков, но в этом нет смысла, так как
769
+ * это будет сделано при фиксации транзакции.
770
+ *
771
+ * В сухом остатке, можно только установить dxb_mmap.current равным
772
+ * размеру файла, а это проще сделать без вызова dxb_resize() и усложения
773
+ * внутренней логики.
774
+ *
775
+ * В этой тактике есть недостаток: если пишущите транзакции не регулярны,
776
+ * и при завершении такой транзакции файл БД остаётся не-уменьшеным из-за
777
+ * читающих транзакций использующих предыдущие снимки. */
778
+ #if defined(_WIN32) || defined(_WIN64)
779
+ imports.srwl_AcquireShared(&env->remap_guard);
780
+ #else
781
+ rc = osal_fastmutex_acquire(&env->remap_guard);
782
+ #endif
783
+ if (likely(rc == MDBX_SUCCESS)) {
784
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
785
+ rc = osal_filesize(env->dxb_mmap.fd, &env->dxb_mmap.filesize);
786
+ if (likely(rc == MDBX_SUCCESS)) {
787
+ eASSERT(env, env->dxb_mmap.filesize >= required_bytes);
788
+ if (env->dxb_mmap.current > env->dxb_mmap.filesize)
789
+ env->dxb_mmap.current =
790
+ (env->dxb_mmap.limit < env->dxb_mmap.filesize) ? env->dxb_mmap.limit : (size_t)env->dxb_mmap.filesize;
791
+ }
792
+ #if defined(_WIN32) || defined(_WIN64)
793
+ imports.srwl_ReleaseShared(&env->remap_guard);
794
+ #else
795
+ int err = osal_fastmutex_release(&env->remap_guard);
796
+ if (unlikely(err) && likely(rc == MDBX_SUCCESS))
797
+ rc = err;
798
+ #endif
799
+ }
800
+ if (unlikely(rc != MDBX_SUCCESS))
801
+ goto bailout;
802
+ }
803
+ eASSERT(env, pgno2bytes(env, txn->geo.first_unallocated) <= env->dxb_mmap.current);
804
+ eASSERT(env, env->dxb_mmap.limit >= env->dxb_mmap.current);
805
+ if (txn->flags & MDBX_TXN_RDONLY) {
806
+ #if defined(_WIN32) || defined(_WIN64)
807
+ if (((used_bytes > env->geo_in_bytes.lower && env->geo_in_bytes.shrink) ||
808
+ (globals.running_under_Wine &&
809
+ /* under Wine acquisition of remap_guard is always required,
810
+ * since Wine don't support section extending,
811
+ * i.e. in both cases unmap+map are required. */
812
+ used_bytes < env->geo_in_bytes.upper && env->geo_in_bytes.grow)) &&
813
+ /* avoid recursive use SRW */ (txn->flags & MDBX_NOSTICKYTHREADS) == 0) {
814
+ txn->flags |= txn_shrink_allowed;
815
+ imports.srwl_AcquireShared(&env->remap_guard);
816
+ }
817
+ #endif /* Windows */
818
+ } else {
819
+ tASSERT(txn, txn == env->basal_txn);
820
+
821
+ if (env->options.need_dp_limit_adjust)
822
+ env_options_adjust_dp_limit(env);
823
+ if ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) {
824
+ rc = dpl_alloc(txn);
825
+ if (unlikely(rc != MDBX_SUCCESS))
826
+ goto bailout;
827
+ txn->tw.dirtyroom = txn->env->options.dp_limit;
828
+ txn->tw.dirtylru = MDBX_DEBUG ? UINT32_MAX / 3 - 42 : 0;
829
+ } else {
830
+ tASSERT(txn, txn->tw.dirtylist == nullptr);
831
+ txn->tw.dirtylist = nullptr;
832
+ txn->tw.dirtyroom = MAX_PAGENO;
833
+ txn->tw.dirtylru = 0;
834
+ }
835
+ eASSERT(env, txn->tw.writemap_dirty_npages == 0);
836
+ eASSERT(env, txn->tw.writemap_spilled_npages == 0);
837
+
838
+ MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn));
839
+ rc = cursor_init(gc, txn, FREE_DBI);
840
+ if (rc != MDBX_SUCCESS)
841
+ goto bailout;
842
+ }
843
+ dxb_sanitize_tail(env, txn);
844
+ return MDBX_SUCCESS;
845
+ }
846
+ bailout:
847
+ tASSERT(txn, rc != MDBX_SUCCESS);
848
+ txn_end(txn, TXN_END_SLOT | TXN_END_EOTDONE | TXN_END_FAIL_BEGIN);
849
+ return rc;
850
+ }
851
+
852
+ int txn_end(MDBX_txn *txn, unsigned mode) {
853
+ MDBX_env *env = txn->env;
854
+ static const char *const names[] = TXN_END_NAMES;
855
+
856
+ DEBUG("%s txn %" PRIaTXN "%c-0x%X %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK],
857
+ txn->txnid, (txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', txn->flags, (void *)txn, (void *)env,
858
+ txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root);
859
+
860
+ if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */
861
+ txn_done_cursors(txn, false);
862
+
863
+ int rc = MDBX_SUCCESS;
864
+ if (txn->flags & MDBX_TXN_RDONLY) {
865
+ if (txn->to.reader) {
866
+ reader_slot_t *slot = txn->to.reader;
867
+ eASSERT(env, slot->pid.weak == env->pid);
868
+ if (likely(!(txn->flags & MDBX_TXN_FINISHED))) {
869
+ if (likely((txn->flags & MDBX_TXN_PARKED) == 0)) {
870
+ ENSURE(env, txn->txnid >=
871
+ /* paranoia is appropriate here */ env->lck->cached_oldest.weak);
872
+ eASSERT(env, txn->txnid == slot->txnid.weak && slot->txnid.weak >= env->lck->cached_oldest.weak);
873
+ } else {
874
+ if ((mode & TXN_END_OPMASK) != TXN_END_OUSTED && safe64_read(&slot->tid) == MDBX_TID_TXN_OUSTED)
875
+ mode = (mode & ~TXN_END_OPMASK) | TXN_END_OUSTED;
876
+ do {
877
+ safe64_reset(&slot->txnid, false);
878
+ atomic_store64(&slot->tid, txn->owner, mo_AcquireRelease);
879
+ atomic_yield();
880
+ } while (
881
+ unlikely(safe64_read(&slot->txnid) < SAFE64_INVALID_THRESHOLD || safe64_read(&slot->tid) != txn->owner));
882
+ }
883
+ dxb_sanitize_tail(env, nullptr);
884
+ atomic_store32(&slot->snapshot_pages_used, 0, mo_Relaxed);
885
+ safe64_reset(&slot->txnid, true);
886
+ atomic_store32(&env->lck->rdt_refresh_flag, true, mo_Relaxed);
887
+ } else {
888
+ eASSERT(env, slot->pid.weak == env->pid);
889
+ eASSERT(env, slot->txnid.weak >= SAFE64_INVALID_THRESHOLD);
890
+ }
891
+ if (mode & TXN_END_SLOT) {
892
+ if ((env->flags & ENV_TXKEY) == 0)
893
+ atomic_store32(&slot->pid, 0, mo_Relaxed);
894
+ txn->to.reader = nullptr;
895
+ }
896
+ }
897
+ #if defined(_WIN32) || defined(_WIN64)
898
+ if (txn->flags & txn_shrink_allowed)
899
+ imports.srwl_ReleaseShared(&env->remap_guard);
900
+ #endif
901
+ txn->n_dbi = 0; /* prevent further DBI activity */
902
+ txn->flags = ((mode & TXN_END_OPMASK) != TXN_END_OUSTED) ? MDBX_TXN_RDONLY | MDBX_TXN_FINISHED
903
+ : MDBX_TXN_RDONLY | MDBX_TXN_FINISHED | MDBX_TXN_OUSTED;
904
+ txn->owner = 0;
905
+ } else if (!(txn->flags & MDBX_TXN_FINISHED)) {
906
+ ENSURE(env, txn->txnid >=
907
+ /* paranoia is appropriate here */ env->lck->cached_oldest.weak);
908
+ if (txn == env->basal_txn)
909
+ dxb_sanitize_tail(env, nullptr);
910
+
911
+ txn->flags = MDBX_TXN_FINISHED;
912
+ env->txn = txn->parent;
913
+ pnl_free(txn->tw.spilled.list);
914
+ txn->tw.spilled.list = nullptr;
915
+ if (txn == env->basal_txn) {
916
+ eASSERT(env, txn->parent == nullptr);
917
+ /* Export or close DBI handles created in this txn */
918
+ rc = dbi_update(txn, mode & TXN_END_UPDATE);
919
+ pnl_shrink(&txn->tw.retired_pages);
920
+ pnl_shrink(&txn->tw.repnl);
921
+ if (!(env->flags & MDBX_WRITEMAP))
922
+ dpl_release_shadows(txn);
923
+ /* The writer mutex was locked in mdbx_txn_begin. */
924
+ lck_txn_unlock(env);
925
+ } else {
926
+ eASSERT(env, txn->parent != nullptr);
927
+ MDBX_txn *const parent = txn->parent;
928
+ eASSERT(env, parent->signature == txn_signature);
929
+ eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0);
930
+ eASSERT(env, pnl_check_allocated(txn->tw.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
931
+ eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, sizeof(troika_t)) == 0);
932
+
933
+ txn->owner = 0;
934
+ if (txn->tw.gc.retxl) {
935
+ eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.gc.retxl) >= (uintptr_t)parent->tw.gc.retxl);
936
+ MDBX_PNL_SETSIZE(txn->tw.gc.retxl, (uintptr_t)parent->tw.gc.retxl);
937
+ parent->tw.gc.retxl = txn->tw.gc.retxl;
938
+ }
939
+
940
+ if (txn->tw.retired_pages) {
941
+ eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.retired_pages) >= (uintptr_t)parent->tw.retired_pages);
942
+ MDBX_PNL_SETSIZE(txn->tw.retired_pages, (uintptr_t)parent->tw.retired_pages);
943
+ parent->tw.retired_pages = txn->tw.retired_pages;
944
+ }
945
+
946
+ parent->nested = nullptr;
947
+ parent->flags &= ~MDBX_TXN_HAS_CHILD;
948
+ parent->tw.dirtylru = txn->tw.dirtylru;
949
+ tASSERT(parent, dpl_check(parent));
950
+ tASSERT(parent, audit_ex(parent, 0, false) == 0);
951
+ dpl_release_shadows(txn);
952
+ dpl_free(txn);
953
+ pnl_free(txn->tw.repnl);
954
+
955
+ if (parent->geo.upper != txn->geo.upper || parent->geo.now != txn->geo.now) {
956
+ /* undo resize performed by child txn */
957
+ rc = dxb_resize(env, parent->geo.first_unallocated, parent->geo.now, parent->geo.upper, impilict_shrink);
958
+ if (rc == MDBX_EPERM) {
959
+ /* unable undo resize (it is regular for Windows),
960
+ * therefore promote size changes from child to the parent txn */
961
+ WARNING("unable undo resize performed by child txn, promote to "
962
+ "the parent (%u->%u, %u->%u)",
963
+ txn->geo.now, parent->geo.now, txn->geo.upper, parent->geo.upper);
964
+ parent->geo.now = txn->geo.now;
965
+ parent->geo.upper = txn->geo.upper;
966
+ parent->flags |= MDBX_TXN_DIRTY;
967
+ rc = MDBX_SUCCESS;
968
+ } else if (unlikely(rc != MDBX_SUCCESS)) {
969
+ ERROR("error %d while undo resize performed by child txn, fail "
970
+ "the parent",
971
+ rc);
972
+ parent->flags |= MDBX_TXN_ERROR;
973
+ if (!env->dxb_mmap.base)
974
+ env->flags |= ENV_FATAL_ERROR;
975
+ }
976
+ }
977
+ }
978
+ }
979
+
980
+ eASSERT(env, txn == env->basal_txn || txn->owner == 0);
981
+ if ((mode & TXN_END_FREE) != 0 && txn != env->basal_txn) {
982
+ txn->signature = 0;
983
+ osal_free(txn);
984
+ }
985
+
986
+ return rc;
987
+ }
988
+
989
+ int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) {
990
+ tASSERT(txn, (bad_bits & MDBX_TXN_PARKED) && (txn->flags & bad_bits));
991
+ /* Здесь осознано заложено отличие в поведении припаркованных транзакций:
992
+ * - некоторые функции (например mdbx_env_info_ex()), допускают
993
+ * использование поломанных транзакций (с флагом MDBX_TXN_ERROR), но
994
+ * не могут работать с припаркованными транзакциями (требуют распарковки).
995
+ * - но при распарковке поломанные транзакции завершаются.
996
+ * - получается что транзакцию можно припарковать, потом поломать вызвав
997
+ * mdbx_txn_break(), но далее любое её использование приведет к завершению
998
+ * при распарковке.
999
+ *
1000
+ * Поэтому для припаркованных транзакций возвращается ошибка если не-включена
1001
+ * авто-распарковка, либо есть другие плохие биты. */
1002
+ if ((txn->flags & (bad_bits | MDBX_TXN_AUTOUNPARK)) != (MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK))
1003
+ return LOG_IFERR(MDBX_BAD_TXN);
1004
+
1005
+ tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR);
1006
+ return mdbx_txn_unpark((MDBX_txn *)txn, false);
1007
+ }
1008
+
1009
+ int txn_park(MDBX_txn *txn, bool autounpark) {
1010
+ reader_slot_t *const rslot = txn->to.reader;
1011
+ tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY);
1012
+ tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED);
1013
+ if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY))
1014
+ return MDBX_BAD_TXN;
1015
+
1016
+ const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed);
1017
+ const uint64_t tid = atomic_load64(&rslot->tid, mo_Relaxed);
1018
+ const uint64_t txnid = atomic_load64(&rslot->txnid, mo_Relaxed);
1019
+ if (unlikely(pid != txn->env->pid)) {
1020
+ ERROR("unexpected pid %u%s%u", pid, " != must ", txn->env->pid);
1021
+ return MDBX_PROBLEM;
1022
+ }
1023
+ if (unlikely(tid != txn->owner || txnid != txn->txnid)) {
1024
+ ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%0zx"
1025
+ " and/or txn-id %" PRIaTXN "%s%" PRIaTXN,
1026
+ tid, " != must ", txn->owner, txnid, " != must ", txn->txnid);
1027
+ return MDBX_BAD_RSLOT;
1028
+ }
1029
+
1030
+ atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease);
1031
+ atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed);
1032
+ txn->flags += autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED;
1033
+ return MDBX_SUCCESS;
1034
+ }
1035
+
1036
+ int txn_unpark(MDBX_txn *txn) {
1037
+ if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) !=
1038
+ (MDBX_TXN_RDONLY | MDBX_TXN_PARKED)))
1039
+ return MDBX_BAD_TXN;
1040
+
1041
+ for (reader_slot_t *const rslot = txn->to.reader; rslot; atomic_yield()) {
1042
+ const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed);
1043
+ uint64_t tid = safe64_read(&rslot->tid);
1044
+ uint64_t txnid = safe64_read(&rslot->txnid);
1045
+ if (unlikely(pid != txn->env->pid)) {
1046
+ ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid);
1047
+ return MDBX_PROBLEM;
1048
+ }
1049
+ if (unlikely(tid == MDBX_TID_TXN_OUSTED || txnid >= SAFE64_INVALID_THRESHOLD))
1050
+ break;
1051
+ if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) {
1052
+ ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, tid, " != must ",
1053
+ MDBX_TID_TXN_OUSTED, txnid, " != must ", txn->txnid);
1054
+ break;
1055
+ }
1056
+ if (unlikely((txn->flags & MDBX_TXN_ERROR)))
1057
+ break;
1058
+
1059
+ #if MDBX_64BIT_CAS
1060
+ if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner)))
1061
+ continue;
1062
+ #else
1063
+ atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), mo_Relaxed);
1064
+ if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)txn->owner))) {
1065
+ atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), mo_AcquireRelease);
1066
+ continue;
1067
+ }
1068
+ #endif
1069
+ txnid = safe64_read(&rslot->txnid);
1070
+ tid = safe64_read(&rslot->tid);
1071
+ if (unlikely(txnid != txn->txnid || tid != txn->owner)) {
1072
+ ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%zx"
1073
+ " and/or txn-id %" PRIaTXN "%s%" PRIaTXN,
1074
+ tid, " != must ", txn->owner, txnid, " != must ", txn->txnid);
1075
+ break;
1076
+ }
1077
+ txn->flags &= ~(MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK);
1078
+ return MDBX_SUCCESS;
1079
+ }
1080
+
1081
+ int err = txn_end(txn, TXN_END_OUSTED | TXN_END_RESET | TXN_END_UPDATE);
1082
+ return err ? err : MDBX_OUSTED;
1083
+ }