isomorfeus-ferret 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@
12
12
  * <http://www.OpenLDAP.org/license.html>. */
13
13
 
14
14
  #define xMDBX_ALLOY 1
15
- #define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
15
+ #define MDBX_BUILD_SOURCERY 56f8a04f0668bb80d0d3f24fd2c9958d9aeb83004b65badfd5ccfa80647a2218_v0_12_2_18_gb3248442
16
16
  #ifdef MDBX_CONFIG_H
17
17
  #include MDBX_CONFIG_H
18
18
  #endif
@@ -428,14 +428,13 @@ __extern_C key_t ftok(const char *, int);
428
428
  /* Byteorder */
429
429
 
430
430
  #if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
431
- defined(i486) || defined(__i486) || defined(__i486__) || \
432
- defined(i586) || defined(__i586) || defined(__i586__) || \
433
- defined(i686) || defined(__i686) || defined(__i686__) || \
434
- defined(_M_IX86) || defined(_X86_) || defined(__I86__) || \
435
- defined(__THW_INTEL__) || defined(__INTEL__) || \
436
- defined(__x86_64) || defined(__x86_64__) || \
431
+ defined(i486) || defined(__i486) || defined(__i486__) || defined(i586) || \
432
+ defined(__i586) || defined(__i586__) || defined(i686) || \
433
+ defined(__i686) || defined(__i686__) || defined(_M_IX86) || \
434
+ defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \
435
+ defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \
437
436
  defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
438
- defined(_M_AMD64) || defined(__IA32__)
437
+ defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
439
438
  #ifndef __ia32__
440
439
  /* LY: define neutral __ia32__ for x86 and x86-64 */
441
440
  #define __ia32__ 1
@@ -3138,13 +3137,9 @@ struct MDBX_txn {
3138
3137
  /* Additional flag for sync_locked() */
3139
3138
  #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
3140
3139
 
3141
- #define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
3142
- #define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
3143
-
3144
3140
  #define TXN_FLAGS \
3145
3141
  (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
3146
- MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
3147
- MDBX_TXN_FROZEN_RE)
3142
+ MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
3148
3143
 
3149
3144
  #if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
3150
3145
  ((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
@@ -3226,11 +3221,16 @@ struct MDBX_txn {
3226
3221
  MDBX_page *loose_pages;
3227
3222
  /* Number of loose pages (tw.loose_pages) */
3228
3223
  size_t loose_count;
3229
- size_t spill_least_removed;
3230
- /* The sorted list of dirty pages we temporarily wrote to disk
3231
- * because the dirty list was full. page numbers in here are
3232
- * shifted left by 1, deleted slots have the LSB set. */
3233
- MDBX_PNL spill_pages;
3224
+ union {
3225
+ struct {
3226
+ size_t least_removed;
3227
+ /* The sorted list of dirty pages we temporarily wrote to disk
3228
+ * because the dirty list was full. page numbers in here are
3229
+ * shifted left by 1, deleted slots have the LSB set. */
3230
+ MDBX_PNL list;
3231
+ } spilled;
3232
+ size_t writemap_dirty_npages;
3233
+ };
3234
3234
  } tw;
3235
3235
  };
3236
3236
  };
@@ -3280,6 +3280,9 @@ struct MDBX_cursor {
3280
3280
  #define C_SUB 0x04 /* Cursor is a sub-cursor */
3281
3281
  #define C_DEL 0x08 /* last op was a cursor_del */
3282
3282
  #define C_UNTRACK 0x10 /* Un-track cursor when closing */
3283
+ #define C_GCU \
3284
+ 0x20 /* Происходит подготовка к обновлению GC, поэтому \
3285
+ * можно брать страницы из GC даже для FREE_DBI */
3283
3286
  uint8_t mc_flags;
3284
3287
 
3285
3288
  /* Cursor checking flags. */
@@ -4643,7 +4646,7 @@ __cold static const char *pagetype_caption(const uint8_t type,
4643
4646
  }
4644
4647
  }
4645
4648
 
4646
- __cold static __must_check_result int MDBX_PRINTF_ARGS(2, 3)
4649
+ __cold static int MDBX_PRINTF_ARGS(2, 3)
4647
4650
  bad_page(const MDBX_page *mp, const char *fmt, ...) {
4648
4651
  if (LOG_ENABLED(MDBX_LOG_ERROR)) {
4649
4652
  static const MDBX_page *prev;
@@ -5257,7 +5260,7 @@ __cold void thread_dtor(void *rthc) {
5257
5260
  if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) {
5258
5261
  TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(),
5259
5262
  __Wpedantic_format_voidptr(reader));
5260
- atomic_cas32(&reader->mr_pid, self_pid, 0);
5263
+ (void)atomic_cas32(&reader->mr_pid, self_pid, 0);
5261
5264
  }
5262
5265
  }
5263
5266
 
@@ -6346,50 +6349,51 @@ __hot static size_t pnl_merge(MDBX_PNL dst, const MDBX_PNL src) {
6346
6349
  return total;
6347
6350
  }
6348
6351
 
6349
- static void spill_remove(MDBX_txn *txn, size_t idx, pgno_t npages) {
6350
- tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spill_pages) &&
6351
- txn->tw.spill_least_removed > 0);
6352
- txn->tw.spill_least_removed =
6353
- (idx < txn->tw.spill_least_removed) ? idx : txn->tw.spill_least_removed;
6354
- txn->tw.spill_pages[idx] |= 1;
6355
- MDBX_PNL_SETSIZE(txn->tw.spill_pages,
6356
- MDBX_PNL_GETSIZE(txn->tw.spill_pages) -
6357
- (idx == MDBX_PNL_GETSIZE(txn->tw.spill_pages)));
6352
+ static void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) {
6353
+ tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) &&
6354
+ txn->tw.spilled.least_removed > 0);
6355
+ txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
6356
+ ? idx
6357
+ : txn->tw.spilled.least_removed;
6358
+ txn->tw.spilled.list[idx] |= 1;
6359
+ MDBX_PNL_SETSIZE(txn->tw.spilled.list,
6360
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
6361
+ (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
6358
6362
 
6359
6363
  while (unlikely(npages > 1)) {
6360
- const pgno_t pgno = (txn->tw.spill_pages[idx] >> 1) + 1;
6364
+ const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1;
6361
6365
  if (MDBX_PNL_ASCENDING) {
6362
- if (++idx > MDBX_PNL_GETSIZE(txn->tw.spill_pages) ||
6363
- (txn->tw.spill_pages[idx] >> 1) != pgno)
6366
+ if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) ||
6367
+ (txn->tw.spilled.list[idx] >> 1) != pgno)
6364
6368
  return;
6365
6369
  } else {
6366
- if (--idx < 1 || (txn->tw.spill_pages[idx] >> 1) != pgno)
6370
+ if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno)
6367
6371
  return;
6368
- txn->tw.spill_least_removed = (idx < txn->tw.spill_least_removed)
6369
- ? idx
6370
- : txn->tw.spill_least_removed;
6371
- }
6372
- txn->tw.spill_pages[idx] |= 1;
6373
- MDBX_PNL_SETSIZE(txn->tw.spill_pages,
6374
- MDBX_PNL_GETSIZE(txn->tw.spill_pages) -
6375
- (idx == MDBX_PNL_GETSIZE(txn->tw.spill_pages)));
6372
+ txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
6373
+ ? idx
6374
+ : txn->tw.spilled.least_removed;
6375
+ }
6376
+ txn->tw.spilled.list[idx] |= 1;
6377
+ MDBX_PNL_SETSIZE(txn->tw.spilled.list,
6378
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
6379
+ (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
6376
6380
  --npages;
6377
6381
  }
6378
6382
  }
6379
6383
 
6380
6384
  static MDBX_PNL spill_purge(MDBX_txn *txn) {
6381
- tASSERT(txn, txn->tw.spill_least_removed > 0);
6382
- const MDBX_PNL sl = txn->tw.spill_pages;
6383
- if (txn->tw.spill_least_removed != INT_MAX) {
6385
+ tASSERT(txn, txn->tw.spilled.least_removed > 0);
6386
+ const MDBX_PNL sl = txn->tw.spilled.list;
6387
+ if (txn->tw.spilled.least_removed != INT_MAX) {
6384
6388
  size_t len = MDBX_PNL_GETSIZE(sl), r, w;
6385
- for (w = r = txn->tw.spill_least_removed; r <= len; ++r) {
6389
+ for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) {
6386
6390
  sl[w] = sl[r];
6387
6391
  w += 1 - (sl[r] & 1);
6388
6392
  }
6389
6393
  for (size_t i = 1; i < w; ++i)
6390
6394
  tASSERT(txn, (sl[i] & 1) == 0);
6391
6395
  MDBX_PNL_SETSIZE(sl, w - 1);
6392
- txn->tw.spill_least_removed = INT_MAX;
6396
+ txn->tw.spilled.least_removed = INT_MAX;
6393
6397
  } else {
6394
6398
  for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i)
6395
6399
  tASSERT(txn, (sl[i] & 1) == 0);
@@ -6445,7 +6449,8 @@ static __inline size_t pnl_search(const MDBX_PNL pnl, pgno_t pgno,
6445
6449
  }
6446
6450
 
6447
6451
  static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
6448
- const MDBX_PNL pnl = txn->tw.spill_pages;
6452
+ tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6453
+ const MDBX_PNL pnl = txn->tw.spilled.list;
6449
6454
  if (likely(!pnl))
6450
6455
  return 0;
6451
6456
  pgno <<= 1;
@@ -6454,8 +6459,8 @@ static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
6454
6459
  }
6455
6460
 
6456
6461
  static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
6457
- pgno_t npages) {
6458
- const MDBX_PNL pnl = txn->tw.spill_pages;
6462
+ size_t npages) {
6463
+ const MDBX_PNL pnl = txn->tw.spilled.list;
6459
6464
  if (likely(!pnl))
6460
6465
  return false;
6461
6466
  const size_t len = MDBX_PNL_GETSIZE(pnl);
@@ -6467,7 +6472,7 @@ static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
6467
6472
  DEBUG_EXTRA_PRINT("%s\n", "]");
6468
6473
  }
6469
6474
  const pgno_t spilled_range_begin = pgno << 1;
6470
- const pgno_t spilled_range_last = ((pgno + npages) << 1) - 1;
6475
+ const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1;
6471
6476
  #if MDBX_PNL_ASCENDING
6472
6477
  const size_t n =
6473
6478
  pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1);
@@ -6831,7 +6836,7 @@ dpl_endpgno(const MDBX_dpl *dl, size_t i) {
6831
6836
  }
6832
6837
 
6833
6838
  static __inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno,
6834
- pgno_t npages) {
6839
+ size_t npages) {
6835
6840
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
6836
6841
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6837
6842
 
@@ -6889,7 +6894,7 @@ MDBX_MAYBE_UNUSED static const MDBX_page *debug_dpl_find(const MDBX_txn *txn,
6889
6894
  return nullptr;
6890
6895
  }
6891
6896
 
6892
- static void dpl_remove_ex(const MDBX_txn *txn, size_t i, pgno_t npages) {
6897
+ static void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) {
6893
6898
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
6894
6899
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6895
6900
 
@@ -6911,7 +6916,7 @@ static void dpl_remove(const MDBX_txn *txn, size_t i) {
6911
6916
  static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
6912
6917
  pgno_t pgno,
6913
6918
  MDBX_page *page,
6914
- pgno_t npages) {
6919
+ size_t npages) {
6915
6920
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
6916
6921
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6917
6922
  MDBX_dpl *dl = txn->tw.dirtylist;
@@ -6980,7 +6985,7 @@ static __must_check_result __inline int page_retire(MDBX_cursor *mc,
6980
6985
  MDBX_page *mp);
6981
6986
 
6982
6987
  static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
6983
- pgno_t npages);
6988
+ size_t npages);
6984
6989
  typedef struct page_result {
6985
6990
  MDBX_page *page;
6986
6991
  int err;
@@ -6989,7 +6994,7 @@ typedef struct page_result {
6989
6994
  static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard);
6990
6995
 
6991
6996
  static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
6992
- static pgr_t page_new_large(MDBX_cursor *mc, const pgno_t npages);
6997
+ static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
6993
6998
  static int page_touch(MDBX_cursor *mc);
6994
6999
  static int cursor_touch(MDBX_cursor *mc);
6995
7000
  static int touch_dbi(MDBX_cursor *mc);
@@ -7588,7 +7593,7 @@ static MDBX_page *page_malloc(MDBX_txn *txn, size_t num) {
7588
7593
  }
7589
7594
 
7590
7595
  /* Free a shadow dirty page */
7591
- static void dpage_free(MDBX_env *env, MDBX_page *dp, pgno_t npages) {
7596
+ static void dpage_free(MDBX_env *env, MDBX_page *dp, size_t npages) {
7592
7597
  VALGRIND_MAKE_MEM_UNDEFINED(dp, pgno2bytes(env, npages));
7593
7598
  MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages));
7594
7599
  if (unlikely(env->me_flags & MDBX_PAGEPERTURB))
@@ -7910,7 +7915,7 @@ static bool txn_refund(MDBX_txn *txn) {
7910
7915
  if (before == txn->mt_next_pgno)
7911
7916
  return false;
7912
7917
 
7913
- if (txn->tw.spill_pages)
7918
+ if (txn->tw.spilled.list)
7914
7919
  /* Squash deleted pagenums if we refunded any */
7915
7920
  spill_purge(txn);
7916
7921
 
@@ -7925,9 +7930,9 @@ static __inline bool txn_refund(MDBX_txn *txn) {
7925
7930
  #endif /* MDBX_ENABLE_REFUND */
7926
7931
 
7927
7932
  __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
7928
- pgno_t npages) {
7933
+ size_t npages) {
7929
7934
  MDBX_env *const env = txn->mt_env;
7930
- DEBUG("kill %u page(s) %" PRIaPGNO, npages, pgno);
7935
+ DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno);
7931
7936
  eASSERT(env, pgno >= NUM_METAS && npages);
7932
7937
  if (!IS_FROZEN(txn, mp)) {
7933
7938
  const size_t bytes = pgno2bytes(env, npages);
@@ -7954,7 +7959,7 @@ __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
7954
7959
 
7955
7960
  /* Remove page from dirty list */
7956
7961
  static __inline void page_wash(MDBX_txn *txn, const size_t di,
7957
- MDBX_page *const mp, const pgno_t npages) {
7962
+ MDBX_page *const mp, const size_t npages) {
7958
7963
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
7959
7964
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
7960
7965
  tASSERT(txn, di && di <= txn->tw.dirtylist->length &&
@@ -8003,7 +8008,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
8003
8008
  * So for flexibility and avoid extra internal dependencies we just
8004
8009
  * fallback to reading if dirty list was not allocated yet. */
8005
8010
  size_t di = 0, si = 0;
8006
- pgno_t npages = 1;
8011
+ size_t npages = 1;
8007
8012
  bool is_frozen = false, is_spilled = false, is_shadowed = false;
8008
8013
  if (unlikely(!mp)) {
8009
8014
  if (ASSERT_ENABLED() && pageflags) {
@@ -8019,7 +8024,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
8019
8024
  is_frozen = true;
8020
8025
  if (ASSERT_ENABLED()) {
8021
8026
  for (MDBX_txn *scan = txn; scan; scan = scan->mt_parent) {
8022
- tASSERT(txn, !search_spilled(scan, pgno));
8027
+ tASSERT(txn, !txn->tw.spilled.list || !search_spilled(scan, pgno));
8023
8028
  tASSERT(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno));
8024
8029
  }
8025
8030
  }
@@ -8064,7 +8069,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
8064
8069
  is_shadowed = IS_SHADOWED(txn, mp);
8065
8070
  if (is_dirty) {
8066
8071
  tASSERT(txn, !is_spilled);
8067
- tASSERT(txn, !search_spilled(txn, pgno));
8072
+ tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno));
8068
8073
  tASSERT(txn, debug_dpl_find(txn, pgno) == mp || txn->mt_parent ||
8069
8074
  (txn->mt_flags & MDBX_WRITEMAP));
8070
8075
  } else {
@@ -8098,12 +8103,12 @@ status_done:
8098
8103
  } else {
8099
8104
  npages = mp->mp_pages;
8100
8105
  cASSERT(mc, mc->mc_db->md_overflow_pages >= npages);
8101
- mc->mc_db->md_overflow_pages -= npages;
8106
+ mc->mc_db->md_overflow_pages -= (pgno_t)npages;
8102
8107
  }
8103
8108
 
8104
8109
  if (is_frozen) {
8105
8110
  retire:
8106
- DEBUG("retire %u page %" PRIaPGNO, npages, pgno);
8111
+ DEBUG("retire %zu page %" PRIaPGNO, npages, pgno);
8107
8112
  rc = pnl_append_range(false, &txn->tw.retired_pages, pgno, npages);
8108
8113
  tASSERT(txn, dirtylist_check(txn));
8109
8114
  return rc;
@@ -8154,7 +8159,7 @@ status_done:
8154
8159
  }
8155
8160
  tASSERT(txn, is_spilled || is_shadowed || (mp && IS_SHADOWED(txn, mp)));
8156
8161
  }
8157
- DEBUG("refunded %u %s page %" PRIaPGNO, npages, kind, pgno);
8162
+ DEBUG("refunded %zu %s page %" PRIaPGNO, npages, kind, pgno);
8158
8163
  txn->mt_next_pgno = pgno;
8159
8164
  txn_refund(txn);
8160
8165
  return MDBX_SUCCESS;
@@ -8223,7 +8228,7 @@ status_done:
8223
8228
  page_wash(txn, di, mp, npages);
8224
8229
 
8225
8230
  reclaim:
8226
- DEBUG("reclaim %u %s page %" PRIaPGNO, npages, "dirty", pgno);
8231
+ DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno);
8227
8232
  rc = pnl_insert_range(&txn->tw.relist, pgno, npages);
8228
8233
  tASSERT(txn, pnl_check_allocated(txn->tw.relist,
8229
8234
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
@@ -8330,7 +8335,7 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
8330
8335
  osal_flush_incoherent_mmap(env->me_map + offset, bytes, env->me_os_psize);
8331
8336
  const MDBX_page *const rp = (const MDBX_page *)(env->me_map + offset);
8332
8337
  /* check with timeout as the workaround
8333
- * for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269 */
8338
+ * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
8334
8339
  if (unlikely(memcmp(wp, rp, bytes))) {
8335
8340
  ctx->coherency_timestamp = 0;
8336
8341
  WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->mp_pgno,
@@ -8351,11 +8356,12 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
8351
8356
  do {
8352
8357
  eASSERT(env, wp->mp_pgno == bytes2pgno(env, offset));
8353
8358
  eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0);
8354
- unsigned npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
8359
+ size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
8355
8360
  size_t chunk = pgno2bytes(env, npages);
8356
8361
  eASSERT(env, bytes >= chunk);
8362
+ MDBX_page *next = (MDBX_page *)((char *)wp + chunk);
8357
8363
  dpage_free(env, wp, npages);
8358
- wp = (MDBX_page *)((char *)wp + chunk);
8364
+ wp = next;
8359
8365
  offset += chunk;
8360
8366
  bytes -= chunk;
8361
8367
  } while (bytes);
@@ -8384,7 +8390,7 @@ __must_check_result static int iov_write(iov_ctx_t *ctx) {
8384
8390
  }
8385
8391
 
8386
8392
  __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
8387
- MDBX_page *dp, pgno_t npages) {
8393
+ MDBX_page *dp, size_t npages) {
8388
8394
  MDBX_env *const env = txn->mt_env;
8389
8395
  tASSERT(txn, ctx->err == MDBX_SUCCESS);
8390
8396
  tASSERT(txn, dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno);
@@ -8428,16 +8434,16 @@ __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
8428
8434
  #if MDBX_NEED_WRITTEN_RANGE
8429
8435
  ctx->flush_begin =
8430
8436
  (ctx->flush_begin < dp->mp_pgno) ? ctx->flush_begin : dp->mp_pgno;
8431
- ctx->flush_end = (ctx->flush_end > dp->mp_pgno + npages)
8437
+ ctx->flush_end = (ctx->flush_end > dp->mp_pgno + (pgno_t)npages)
8432
8438
  ? ctx->flush_end
8433
- : dp->mp_pgno + npages;
8439
+ : dp->mp_pgno + (pgno_t)npages;
8434
8440
  #endif /* MDBX_NEED_WRITTEN_RANGE */
8435
8441
  env->me_lck->mti_unsynced_pages.weak += npages;
8436
8442
  return MDBX_SUCCESS;
8437
8443
  }
8438
8444
 
8439
8445
  static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
8440
- const pgno_t npages) {
8446
+ const size_t npages) {
8441
8447
  tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP) || MDBX_AVOID_MSYNC);
8442
8448
  #if MDBX_ENABLE_PGOP_STAT
8443
8449
  txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages;
@@ -8446,7 +8452,7 @@ static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
8446
8452
  int err = iov_page(txn, ctx, dp, npages);
8447
8453
  if (likely(err == MDBX_SUCCESS) &&
8448
8454
  (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)))
8449
- err = pnl_append_range(true, &txn->tw.spill_pages, pgno << 1, npages);
8455
+ err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages);
8450
8456
  return err;
8451
8457
  }
8452
8458
 
@@ -8496,16 +8502,16 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8496
8502
  const uint32_t reciprocal) {
8497
8503
  MDBX_dpl *const dl = txn->tw.dirtylist;
8498
8504
  const uint32_t age = dpl_age(txn, i);
8499
- const unsigned npages = dpl_npages(dl, i);
8505
+ const size_t npages = dpl_npages(dl, i);
8500
8506
  const pgno_t pgno = dl->items[i].pgno;
8501
8507
  if (age == 0) {
8502
- DEBUG("skip %s %u page %" PRIaPGNO, "keep", npages, pgno);
8508
+ DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno);
8503
8509
  return 256;
8504
8510
  }
8505
8511
 
8506
8512
  MDBX_page *const dp = dl->items[i].ptr;
8507
8513
  if (dp->mp_flags & (P_LOOSE | P_SPILLED)) {
8508
- DEBUG("skip %s %u page %" PRIaPGNO,
8514
+ DEBUG("skip %s %zu page %" PRIaPGNO,
8509
8515
  (dp->mp_flags & P_LOOSE) ? "loose"
8510
8516
  : (dp->mp_flags & P_LOOSE) ? "loose"
8511
8517
  : "parent-spilled",
@@ -8519,7 +8525,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8519
8525
  if (parent && (parent->mt_flags & MDBX_TXN_SPILLS)) {
8520
8526
  do
8521
8527
  if (intersect_spilled(parent, pgno, npages)) {
8522
- DEBUG("skip-2 parent-spilled %u page %" PRIaPGNO, npages, pgno);
8528
+ DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno);
8523
8529
  dp->mp_flags |= P_SPILLED;
8524
8530
  return 256;
8525
8531
  }
@@ -8533,7 +8539,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8533
8539
  return prio = 256 - prio;
8534
8540
 
8535
8541
  /* make a large/overflow pages be likely to spill */
8536
- uint32_t factor = npages | npages >> 1;
8542
+ size_t factor = npages | npages >> 1;
8537
8543
  factor |= factor >> 2;
8538
8544
  factor |= factor >> 4;
8539
8545
  factor |= factor >> 8;
@@ -8541,7 +8547,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8541
8547
  factor = prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
8542
8548
  factor = (factor < 256) ? 255 - factor : 0;
8543
8549
  tASSERT(txn, factor < 256 && factor < (256 - prio));
8544
- return prio = factor;
8550
+ return prio = (unsigned)factor;
8545
8551
  }
8546
8552
 
8547
8553
  /* Spill pages from the dirty list back to disk.
@@ -8645,7 +8651,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8645
8651
  if (txn->mt_flags & MDBX_WRITEMAP) {
8646
8652
  NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
8647
8653
  dirty_entries, dirty_npages);
8648
- tASSERT(txn, txn->tw.spill_pages == nullptr);
8654
+ tASSERT(txn, txn->tw.spilled.list == nullptr);
8649
8655
  const MDBX_env *env = txn->mt_env;
8650
8656
  rc =
8651
8657
  osal_msync(&txn->mt_env->me_dxb_mmap, 0,
@@ -8669,10 +8675,10 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8669
8675
  tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
8670
8676
  need_spill_npages);
8671
8677
  if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
8672
- if (!txn->tw.spill_pages) {
8673
- txn->tw.spill_least_removed = INT_MAX;
8674
- txn->tw.spill_pages = pnl_alloc(need_spill);
8675
- if (unlikely(!txn->tw.spill_pages)) {
8678
+ if (!txn->tw.spilled.list) {
8679
+ txn->tw.spilled.least_removed = INT_MAX;
8680
+ txn->tw.spilled.list = pnl_alloc(need_spill);
8681
+ if (unlikely(!txn->tw.spilled.list)) {
8676
8682
  rc = MDBX_ENOMEM;
8677
8683
  bailout:
8678
8684
  txn->mt_flags |= MDBX_TXN_ERROR;
@@ -8681,7 +8687,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8681
8687
  } else {
8682
8688
  /* purge deleted slots */
8683
8689
  spill_purge(txn);
8684
- rc = pnl_reserve(&txn->tw.spill_pages, need_spill);
8690
+ rc = pnl_reserve(&txn->tw.spilled.list, need_spill);
8685
8691
  (void)rc /* ignore since the resulting list may be shorter
8686
8692
  and pnl_append() will increase pnl on demand */
8687
8693
  ;
@@ -8865,7 +8871,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8865
8871
  goto bailout;
8866
8872
 
8867
8873
  if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
8868
- pnl_sort(txn->tw.spill_pages, (size_t)txn->mt_next_pgno << 1);
8874
+ pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1);
8869
8875
  txn->mt_flags |= MDBX_TXN_SPILLS;
8870
8876
  }
8871
8877
  NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
@@ -9279,6 +9285,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) {
9279
9285
  MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
9280
9286
  if (unlikely(lck == NULL /* exclusive without-lck mode */)) {
9281
9287
  eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub);
9288
+ env->me_lck->mti_readers_refresh_flag.weak = nothing_changed;
9282
9289
  return env->me_lck->mti_oldest_reader.weak = steady;
9283
9290
  }
9284
9291
 
@@ -9367,10 +9374,13 @@ __cold static pgno_t find_largest_snapshot(const MDBX_env *env,
9367
9374
 
9368
9375
  /* Add a page to the txn's dirty list */
9369
9376
  __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
9370
- pgno_t npages) {
9377
+ size_t npages) {
9371
9378
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
9379
+ mp->mp_txnid = txn->mt_front;
9372
9380
  if (!txn->tw.dirtylist) {
9373
9381
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
9382
+ txn->tw.writemap_dirty_npages += npages;
9383
+ tASSERT(txn, txn->tw.spilled.list == nullptr);
9374
9384
  return MDBX_SUCCESS;
9375
9385
  }
9376
9386
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
@@ -9383,7 +9393,6 @@ __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
9383
9393
  #endif /* xMDBX_DEBUG_SPILLING == 2 */
9384
9394
 
9385
9395
  int rc;
9386
- mp->mp_txnid = txn->mt_front;
9387
9396
  if (unlikely(txn->tw.dirtyroom == 0)) {
9388
9397
  if (txn->tw.loose_count) {
9389
9398
  MDBX_page *loose = txn->tw.loose_pages;
@@ -10093,6 +10102,8 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) {
10093
10102
  }
10094
10103
  #endif /* _MSC_VER */
10095
10104
 
10105
+ #if !MDBX_PNL_ASCENDING
10106
+
10096
10107
  #if !defined(MDBX_ATTRIBUTE_TARGET) && \
10097
10108
  (__has_attribute(__target__) || __GNUC_PREREQ(5, 0))
10098
10109
  #define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
@@ -10406,6 +10417,8 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len,
10406
10417
  /* Choosing of another variants should be added here. */
10407
10418
  #endif /* scan4seq_default */
10408
10419
 
10420
+ #endif /* MDBX_PNL_ASCENDING */
10421
+
10409
10422
  #ifndef scan4seq_default
10410
10423
  #define scan4seq_default scan4seq_fallback
10411
10424
  #endif /* scan4seq_default */
@@ -10469,45 +10482,39 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len,
10469
10482
  *
10470
10483
  * Returns 0 on success, non-zero on failure.*/
10471
10484
 
10472
- #define MDBX_ALLOC_GC 1
10473
- #define MDBX_ALLOC_NEW 2
10474
- #define MDBX_ALLOC_COALESCE 4
10475
- #define MDBX_ALLOC_SLOT 8
10476
- #define MDBX_ALLOC_RESERVE 16
10477
- #define MDBX_ALLOC_BACKLOG 32
10478
- #define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
10479
- #define MDBX_ALLOC_LIFO 128
10485
+ #define MDBX_ALLOC_DEFAULT 0
10486
+ #define MDBX_ALLOC_RESERVE 1
10487
+ #define MDBX_ALLOC_UNIMPORTANT 2
10488
+ #define MDBX_ALLOC_COALESCE 4 /* внутреннее состояние */
10489
+ #define MDBX_ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
10490
+ #define MDBX_ALLOC_LIFO 16 /* внутреннее состояние */
10480
10491
 
10481
- static __inline bool is_gc_usable(const MDBX_txn *txn) {
10492
+ static __inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc,
10493
+ const uint8_t flags) {
10482
10494
  /* If txn is updating the GC, then the retired-list cannot play catch-up with
10483
10495
  * itself by growing while trying to save it. */
10484
- if (txn->mt_flags & (MDBX_TXN_UPDATE_GC | MDBX_TXN_FROZEN_RE))
10496
+ if (mc->mc_dbi == FREE_DBI && !(flags & MDBX_ALLOC_RESERVE) &&
10497
+ !(mc->mc_flags & C_GCU))
10485
10498
  return false;
10486
10499
 
10487
10500
  /* avoid (recursive) search inside empty tree and while tree is
10488
- updating, https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/31 */
10501
+ updating, https://libmdbx.dqdkfa.ru/dead-github/issues/31 */
10489
10502
  if (txn->mt_dbs[FREE_DBI].md_entries == 0)
10490
10503
  return false;
10491
10504
 
10492
- /* If our dirty list is already full, we can't touch GC */
10493
- if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
10494
- !(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY))
10495
- return false;
10496
-
10497
10505
  return true;
10498
10506
  }
10499
10507
 
10500
- static int gc_cursor_init(MDBX_cursor *mc, MDBX_txn *txn) {
10501
- if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) {
10502
- ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB",
10503
- txn->mt_dbs[FREE_DBI].md_flags);
10504
- return MDBX_CORRUPTED;
10505
- }
10506
- return cursor_init(mc, txn, FREE_DBI);
10508
+ __hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) {
10509
+ const size_t len = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed);
10510
+ for (size_t i = 1; i <= len; ++i)
10511
+ if (txn->tw.lifo_reclaimed[i] == id)
10512
+ return true;
10513
+ return false;
10507
10514
  }
10508
10515
 
10509
10516
  static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
10510
- char flags) {
10517
+ uint8_t flags) {
10511
10518
  #if MDBX_ENABLE_PROFGC
10512
10519
  const uint64_t monotime_before = osal_monotime();
10513
10520
  size_t majflt_before;
@@ -10525,21 +10532,13 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
10525
10532
  prof->spe_counter += 1;
10526
10533
  #endif /* MDBX_ENABLE_PROFGC */
10527
10534
 
10528
- eASSERT(env, num == 0 || !(flags & MDBX_ALLOC_SLOT));
10529
- eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
10530
- eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
10531
- MDBX_ALLOC_BACKLOG)) == 0 ||
10532
- (flags & MDBX_ALLOC_GC));
10533
- eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
10534
- MDBX_ALLOC_BACKLOG)) == 0 ||
10535
- (flags & MDBX_ALLOC_NEW) == 0);
10535
+ eASSERT(env, num > 0 || (flags & MDBX_ALLOC_RESERVE));
10536
10536
  eASSERT(env, pnl_check_allocated(txn->tw.relist,
10537
10537
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10538
10538
 
10539
10539
  pgno_t pgno = 0, *range = nullptr;
10540
- size_t re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10540
+ size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10541
10541
  if (num > 1) {
10542
- eASSERT(env, !(flags & MDBX_ALLOC_SLOT));
10543
10542
  #if MDBX_ENABLE_PROFGC
10544
10543
  prof->xpages += 1;
10545
10544
  #endif /* MDBX_ENABLE_PROFGC */
@@ -10555,347 +10554,363 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
10555
10554
  }
10556
10555
  }
10557
10556
  } else {
10558
- eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) ||
10559
- MDBX_PNL_GETSIZE(txn->tw.relist) == 0);
10557
+ eASSERT(env, num == 0 || re_len == 0);
10560
10558
  }
10561
10559
 
10562
10560
  //---------------------------------------------------------------------------
10563
10561
 
10564
- if (likely(flags & MDBX_ALLOC_GC)) {
10565
- if (unlikely(!is_gc_usable(txn)))
10566
- goto no_gc;
10562
+ if (unlikely(!is_gc_usable(txn, mc, flags)))
10563
+ goto no_gc;
10567
10564
 
10568
- eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO)) == 0);
10569
- flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
10565
+ eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
10566
+ MDBX_ALLOC_SHOULD_SCAN)) == 0);
10567
+ flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
10570
10568
 
10571
- const unsigned coalesce_threshold = env->me_maxgc_ov1page >> 2;
10569
+ if (/* Не коагулируем записи при подготовке резерва для обновления GC.
10570
+ * Иначе попытка увеличить резерв может приводить к необходимости ещё
10571
+ * большего резерва из-за увеличения списка переработанных страниц. */
10572
+ (flags & MDBX_ALLOC_RESERVE) == 0) {
10572
10573
  if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
10573
- MDBX_PNL_GETSIZE(txn->tw.relist) < coalesce_threshold && num)
10574
+ re_len < env->me_maxgc_ov1page / 2)
10574
10575
  flags += MDBX_ALLOC_COALESCE;
10576
+ }
10575
10577
 
10576
- MDBX_cursor recur;
10577
- ret.err = gc_cursor_init(&recur, txn);
10578
- if (unlikely(ret.err != MDBX_SUCCESS))
10579
- goto fail;
10578
+ MDBX_cursor *const gc =
10579
+ (MDBX_cursor *)((char *)env->me_txn0 + sizeof(MDBX_txn));
10580
+ gc->mc_txn = txn;
10581
+ gc->mc_flags = 0;
10580
10582
 
10581
- retry_gc_refresh_oldest:;
10582
- txnid_t oldest = txn_oldest_reader(txn);
10583
- if (unlikely(!oldest))
10584
- goto no_gc;
10585
-
10586
- retry_gc_have_oldest:
10587
- if (unlikely(oldest >= txn->mt_txnid)) {
10588
- ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
10589
- " for current-txnid %" PRIaTXN,
10590
- oldest, txn->mt_txnid);
10591
- ret.err = MDBX_PROBLEM;
10592
- goto fail;
10593
- }
10594
- const txnid_t detent = oldest + 1;
10583
+ retry_gc_refresh_oldest:;
10584
+ txnid_t oldest = txn_oldest_reader(txn);
10585
+ retry_gc_have_oldest:
10586
+ if (unlikely(oldest >= txn->mt_txnid)) {
10587
+ ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
10588
+ " for current-txnid %" PRIaTXN,
10589
+ oldest, txn->mt_txnid);
10590
+ ret.err = MDBX_PROBLEM;
10591
+ goto fail;
10592
+ }
10593
+ const txnid_t detent = oldest + 1;
10595
10594
 
10596
- txnid_t last = 0;
10597
- bool should_scan = false;
10598
- MDBX_cursor_op op = MDBX_FIRST;
10599
- if (flags & MDBX_ALLOC_LIFO) {
10600
- if (!txn->tw.lifo_reclaimed) {
10601
- txn->tw.lifo_reclaimed = txl_alloc();
10602
- if (unlikely(!txn->tw.lifo_reclaimed)) {
10603
- ret.err = MDBX_ENOMEM;
10604
- goto fail;
10605
- }
10595
+ txnid_t id = 0;
10596
+ MDBX_cursor_op op = MDBX_FIRST;
10597
+ if (flags & MDBX_ALLOC_LIFO) {
10598
+ if (!txn->tw.lifo_reclaimed) {
10599
+ txn->tw.lifo_reclaimed = txl_alloc();
10600
+ if (unlikely(!txn->tw.lifo_reclaimed)) {
10601
+ ret.err = MDBX_ENOMEM;
10602
+ goto fail;
10606
10603
  }
10607
- /* Begin lookup backward from oldest reader */
10608
- last = detent - 1;
10609
- op = MDBX_SET_RANGE;
10610
- } else if (txn->tw.last_reclaimed) {
10611
- /* Continue lookup forward from last-reclaimed */
10612
- last = txn->tw.last_reclaimed + 1;
10613
- if (last >= detent)
10614
- goto no_gc;
10615
- op = MDBX_SET_RANGE;
10616
10604
  }
10605
+ /* Begin lookup backward from oldest reader */
10606
+ id = detent - 1;
10607
+ op = MDBX_SET_RANGE;
10608
+ } else if (txn->tw.last_reclaimed) {
10609
+ /* Continue lookup forward from last-reclaimed */
10610
+ id = txn->tw.last_reclaimed + 1;
10611
+ if (id >= detent)
10612
+ goto depleted_gc;
10613
+ op = MDBX_SET_RANGE;
10614
+ }
10617
10615
 
10618
- next_gc:;
10619
- MDBX_val key;
10620
- key.iov_base = &last;
10621
- key.iov_len = sizeof(last);
10616
+ next_gc:;
10617
+ MDBX_val key;
10618
+ key.iov_base = &id;
10619
+ key.iov_len = sizeof(id);
10622
10620
 
10623
10621
  #if MDBX_ENABLE_PROFGC
10624
- prof->rsteps += 1;
10622
+ prof->rsteps += 1;
10625
10623
  #endif /* MDBX_ENABLE_PROFGC */
10626
10624
 
10627
- /* Seek first/next GC record */
10628
- ret.err = mdbx_cursor_get(&recur, &key, NULL, op);
10629
- if (unlikely(ret.err != MDBX_SUCCESS)) {
10630
- if (unlikely(ret.err != MDBX_NOTFOUND))
10631
- goto fail;
10632
- if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
10633
- op = MDBX_PREV;
10634
- goto next_gc;
10635
- }
10636
- goto depleted_gc;
10637
- }
10638
- if (unlikely(key.iov_len != sizeof(txnid_t))) {
10639
- ret.err = MDBX_CORRUPTED;
10625
+ /* Seek first/next GC record */
10626
+ ret.err = mdbx_cursor_get(gc, &key, NULL, op);
10627
+ if (unlikely(ret.err != MDBX_SUCCESS)) {
10628
+ if (unlikely(ret.err != MDBX_NOTFOUND))
10640
10629
  goto fail;
10641
- }
10642
- last = unaligned_peek_u64(4, key.iov_base);
10643
- if (flags & MDBX_ALLOC_LIFO) {
10630
+ if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
10644
10631
  op = MDBX_PREV;
10645
- if (last >= detent)
10646
- goto next_gc;
10647
- /* skip IDs of records that already reclaimed */
10648
- for (size_t i = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); i > 0; --i)
10649
- if (txn->tw.lifo_reclaimed[i] == last)
10650
- goto next_gc;
10651
- } else {
10652
- op = MDBX_NEXT;
10653
- if (unlikely(last >= detent))
10654
- goto depleted_gc;
10632
+ goto next_gc;
10655
10633
  }
10634
+ goto depleted_gc;
10635
+ }
10636
+ if (unlikely(key.iov_len != sizeof(txnid_t))) {
10637
+ ret.err = MDBX_CORRUPTED;
10638
+ goto fail;
10639
+ }
10640
+ id = unaligned_peek_u64(4, key.iov_base);
10641
+ if (flags & MDBX_ALLOC_LIFO) {
10642
+ op = MDBX_PREV;
10643
+ if (id >= detent || is_already_reclaimed(txn, id))
10644
+ goto next_gc;
10645
+ } else {
10646
+ op = MDBX_NEXT;
10647
+ if (unlikely(id >= detent))
10648
+ goto depleted_gc;
10649
+ }
10656
10650
 
10657
- /* Reading next GC record */
10658
- MDBX_val data;
10659
- MDBX_page *const mp = recur.mc_pg[recur.mc_top];
10660
- if (unlikely((ret.err = node_read(&recur,
10661
- page_node(mp, recur.mc_ki[recur.mc_top]),
10662
- &data, mp)) != MDBX_SUCCESS))
10663
- goto fail;
10651
+ /* Reading next GC record */
10652
+ MDBX_val data;
10653
+ MDBX_page *const mp = gc->mc_pg[gc->mc_top];
10654
+ if (unlikely((ret.err = node_read(gc, page_node(mp, gc->mc_ki[gc->mc_top]),
10655
+ &data, mp)) != MDBX_SUCCESS))
10656
+ goto fail;
10664
10657
 
10665
- eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
10666
- pgno_t *gc_pnl = (pgno_t *)data.iov_base;
10667
- if (unlikely(data.iov_len % sizeof(pgno_t) ||
10668
- data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
10669
- !pnl_check(gc_pnl, txn->mt_next_pgno))) {
10670
- ret.err = MDBX_CORRUPTED;
10671
- goto fail;
10658
+ pgno_t *gc_pnl = (pgno_t *)data.iov_base;
10659
+ if (unlikely(data.iov_len % sizeof(pgno_t) ||
10660
+ data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
10661
+ !pnl_check(gc_pnl, txn->mt_next_pgno))) {
10662
+ ret.err = MDBX_CORRUPTED;
10663
+ goto fail;
10664
+ }
10665
+
10666
+ const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
10667
+ TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len,
10668
+ gc_len + re_len);
10669
+
10670
+ eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10671
+ if (unlikely(gc_len + re_len >= env->me_maxgc_ov1page)) {
10672
+ /* Don't try to coalesce too much. */
10673
+ if (flags & MDBX_ALLOC_SHOULD_SCAN) {
10674
+ eASSERT(env, flags & MDBX_ALLOC_COALESCE);
10675
+ eASSERT(env, num > 0);
10676
+ #if MDBX_ENABLE_PROFGC
10677
+ env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
10678
+ #endif /* MDBX_ENABLE_PROFGC */
10679
+ TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
10680
+ if (re_len >= num) {
10681
+ eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
10682
+ MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
10683
+ range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
10684
+ pgno = *range;
10685
+ if (num == 1)
10686
+ goto done;
10687
+ range = scan4seq(range, re_len, num - 1);
10688
+ eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
10689
+ if (likely(range)) {
10690
+ pgno = *range;
10691
+ goto done;
10692
+ }
10693
+ }
10694
+ flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN;
10672
10695
  }
10673
- const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
10674
- if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(
10675
- txn->tw.relist) >= env->me_options.rp_augment_limit) &&
10676
- ((/* not a slot-request from gc-update */
10677
- (flags & MDBX_ALLOC_SLOT) == 0 &&
10696
+ if (unlikely(/* list is too long already */ re_len >=
10697
+ env->me_options.rp_augment_limit) &&
10698
+ ((/* not a slot-request from gc-update */ num &&
10678
10699
  /* have enough unallocated space */ txn->mt_geo.upper >=
10679
10700
  txn->mt_next_pgno + num) ||
10680
- gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) {
10701
+ gc_len + re_len >= MDBX_PGL_LIMIT)) {
10681
10702
  /* Stop reclaiming to avoid large/overflow the page list.
10682
10703
  * This is a rare case while search for a continuously multi-page region
10683
10704
  * in a large database.
10684
- * https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123 */
10705
+ * https://libmdbx.dqdkfa.ru/dead-github/issues/123
10706
+ */
10685
10707
  NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
10686
10708
  "(chunk) -> %zu",
10687
- MDBX_PNL_GETSIZE(txn->tw.relist), gc_len,
10688
- gc_len + MDBX_PNL_GETSIZE(txn->tw.relist));
10709
+ re_len, gc_len, gc_len + re_len);
10689
10710
  goto depleted_gc;
10690
10711
  }
10712
+ }
10691
10713
 
10692
- /* Remember ID of readed GC record */
10693
- txn->tw.last_reclaimed = last;
10694
- if (flags & MDBX_ALLOC_LIFO) {
10695
- ret.err = txl_append(&txn->tw.lifo_reclaimed, last);
10696
- if (unlikely(ret.err != MDBX_SUCCESS))
10697
- goto fail;
10698
- }
10699
-
10700
- /* Append PNL from GC record to tw.relist */
10701
- ret.err = pnl_need(&txn->tw.relist, gc_len);
10714
+ /* Remember ID of readed GC record */
10715
+ txn->tw.last_reclaimed = id;
10716
+ if (flags & MDBX_ALLOC_LIFO) {
10717
+ ret.err = txl_append(&txn->tw.lifo_reclaimed, id);
10702
10718
  if (unlikely(ret.err != MDBX_SUCCESS))
10703
10719
  goto fail;
10704
- txn->tw.relist = txn->tw.relist;
10720
+ }
10705
10721
 
10706
- if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
10707
- DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
10708
- " len %zu, PNL",
10709
- last, txn->mt_dbs[FREE_DBI].md_root, gc_len);
10710
- for (size_t i = gc_len; i; i--)
10711
- DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
10712
- DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
10713
- }
10722
+ /* Append PNL from GC record to tw.relist */
10723
+ ret.err = pnl_need(&txn->tw.relist, gc_len);
10724
+ if (unlikely(ret.err != MDBX_SUCCESS))
10725
+ goto fail;
10714
10726
 
10715
- /* Merge in descending sorted order */
10716
- re_len = pnl_merge(txn->tw.relist, gc_pnl);
10717
- should_scan = true;
10718
- if (AUDIT_ENABLED()) {
10719
- if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
10720
- ret.err = MDBX_CORRUPTED;
10721
- goto fail;
10722
- }
10723
- } else {
10724
- eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno));
10727
+ if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
10728
+ DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
10729
+ " len %zu, PNL",
10730
+ id, txn->mt_dbs[FREE_DBI].md_root, gc_len);
10731
+ for (size_t i = gc_len; i; i--)
10732
+ DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
10733
+ DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
10734
+ }
10735
+
10736
+ /* Merge in descending sorted order */
10737
+ re_len = pnl_merge(txn->tw.relist, gc_pnl);
10738
+ flags |= MDBX_ALLOC_SHOULD_SCAN;
10739
+ if (AUDIT_ENABLED()) {
10740
+ if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
10741
+ ret.err = MDBX_CORRUPTED;
10742
+ goto fail;
10725
10743
  }
10726
- eASSERT(env, dirtylist_check(txn));
10744
+ } else {
10745
+ eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno));
10746
+ }
10747
+ eASSERT(env, dirtylist_check(txn));
10727
10748
 
10728
- eASSERT(env,
10729
- re_len == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno);
10730
- if (MDBX_ENABLE_REFUND && re_len &&
10731
- unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
10732
- /* Refund suitable pages into "unallocated" space */
10733
- if (txn_refund(txn))
10734
- re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10735
- }
10736
- eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10737
- eASSERT(env, pnl_check_allocated(txn->tw.relist,
10738
- txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10749
+ eASSERT(env,
10750
+ re_len == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno);
10751
+ if (MDBX_ENABLE_REFUND && re_len &&
10752
+ unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
10753
+ /* Refund suitable pages into "unallocated" space */
10754
+ txn_refund(txn);
10755
+ re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10756
+ }
10757
+ eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10758
+ eASSERT(env, pnl_check_allocated(txn->tw.relist,
10759
+ txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10739
10760
 
10740
- /* Done for a kick-reclaim mode, actually no page needed */
10741
- if (unlikely(flags & MDBX_ALLOC_SLOT)) {
10742
- eASSERT(env, ret.err == MDBX_SUCCESS);
10743
- goto early_exit;
10744
- }
10761
+ /* Done for a kick-reclaim mode, actually no page needed */
10762
+ if (unlikely(num == 0)) {
10763
+ eASSERT(env, ret.err == MDBX_SUCCESS);
10764
+ TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
10765
+ re_len);
10766
+ goto early_exit;
10767
+ }
10745
10768
 
10746
- /* TODO: delete reclaimed records */
10769
+ /* TODO: delete reclaimed records */
10747
10770
 
10748
- /* Don't try to coalesce too much. */
10749
- eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
10750
- if (flags & MDBX_ALLOC_COALESCE) {
10751
- if (re_len /* current size */ < coalesce_threshold) {
10752
- #if MDBX_ENABLE_PROFGC
10753
- env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
10754
- #endif /* MDBX_ENABLE_PROFGC */
10755
- goto next_gc;
10756
- }
10757
- TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
10758
- flags &= ~MDBX_ALLOC_COALESCE;
10759
- }
10771
+ eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
10772
+ if (flags & MDBX_ALLOC_COALESCE) {
10773
+ TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id,
10774
+ re_len);
10775
+ goto next_gc;
10776
+ }
10760
10777
 
10761
- scan:
10762
- eASSERT(env, should_scan);
10763
- if (re_len >= num) {
10764
- eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
10765
- MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
10766
- range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
10778
+ scan:
10779
+ eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
10780
+ eASSERT(env, num > 0);
10781
+ if (re_len >= num) {
10782
+ eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
10783
+ MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
10784
+ range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
10785
+ pgno = *range;
10786
+ if (num == 1)
10787
+ goto done;
10788
+ range = scan4seq(range, re_len, num - 1);
10789
+ eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
10790
+ if (likely(range)) {
10767
10791
  pgno = *range;
10768
- if (num == 1)
10769
- goto done;
10770
- range = scan4seq(range, re_len, num - 1);
10771
- eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
10772
- if (likely(range)) {
10773
- pgno = *range;
10774
- goto done;
10775
- }
10792
+ goto done;
10776
10793
  }
10777
- should_scan = false;
10778
- if (ret.err == MDBX_SUCCESS)
10779
- goto next_gc;
10794
+ }
10795
+ flags -= MDBX_ALLOC_SHOULD_SCAN;
10796
+ if (ret.err == MDBX_SUCCESS) {
10797
+ TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id,
10798
+ re_len);
10799
+ goto next_gc;
10800
+ }
10780
10801
 
10781
- depleted_gc:
10782
- ret.err = MDBX_NOTFOUND;
10783
- if (should_scan)
10784
- goto scan;
10802
+ depleted_gc:
10803
+ ret.err = MDBX_NOTFOUND;
10804
+ if (flags & MDBX_ALLOC_SHOULD_SCAN)
10805
+ goto scan;
10785
10806
 
10786
- //-------------------------------------------------------------------------
10807
+ //-------------------------------------------------------------------------
10808
+
10809
+ /* There is no suitable pages in the GC and to be able to allocate
10810
+ * we should CHOICE one of:
10811
+ * - make a new steady checkpoint if reclaiming was stopped by
10812
+ * the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
10813
+ * - kick lagging reader(s) if reclaiming was stopped by ones of it.
10814
+ * - extend the database file. */
10787
10815
 
10788
- /* There is no suitable pages in the GC and to be able to allocate
10789
- * we should CHOICE one of:
10790
- * - make a new steady checkpoint if reclaiming was stopped by
10791
- * the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
10792
- * - kick lagging reader(s) if reclaiming was stopped by ones of it.
10793
- * - extend the database file. */
10794
-
10795
- /* Will use new pages from the map if nothing is suitable in the GC. */
10796
- pgno = txn->mt_next_pgno;
10797
- const size_t newnext = num + pgno;
10798
-
10799
- const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
10800
- const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
10801
- /* does reclaiming stopped at the last steady point? */
10802
- if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
10803
- detent == prefer_steady.txnid + 1) {
10804
- DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
10805
- "-%s, detent %" PRIaTXN,
10806
- recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid,
10807
- durable_caption(prefer_steady.ptr_c), detent);
10808
- const pgno_t autosync_threshold =
10809
- atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed);
10810
- const uint64_t autosync_period =
10811
- atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed);
10812
- uint64_t eoos_timestamp;
10813
- /* wipe the last steady-point if one of:
10814
- * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
10815
- * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
10816
- * otherwise, make a new steady-point if one of:
10817
- * - auto-sync threshold is specified and reached;
10818
- * - upper limit of database size is reached;
10819
- * - database is full (with the current file size)
10820
- * AND auto-sync threshold it NOT specified */
10821
- if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
10822
- ((autosync_threshold | autosync_period) == 0 ||
10823
- newnext >= prefer_steady.ptr_c->mm_geo.now)) {
10824
- /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
10825
- * without any auto-sync threshold(s). */
10816
+ /* Will use new pages from the map if nothing is suitable in the GC. */
10817
+ newnext = (pgno = txn->mt_next_pgno) + num;
10818
+
10819
+ /* Does reclaiming stopped at the last steady point? */
10820
+ const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
10821
+ const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
10822
+ if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
10823
+ detent == prefer_steady.txnid + 1) {
10824
+ DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
10825
+ "-%s, detent %" PRIaTXN,
10826
+ recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid,
10827
+ durable_caption(prefer_steady.ptr_c), detent);
10828
+ const pgno_t autosync_threshold =
10829
+ atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed);
10830
+ const uint64_t autosync_period =
10831
+ atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed);
10832
+ uint64_t eoos_timestamp;
10833
+ /* wipe the last steady-point if one of:
10834
+ * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
10835
+ * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
10836
+ * otherwise, make a new steady-point if one of:
10837
+ * - auto-sync threshold is specified and reached;
10838
+ * - upper limit of database size is reached;
10839
+ * - database is full (with the current file size)
10840
+ * AND auto-sync threshold it NOT specified */
10841
+ if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
10842
+ ((autosync_threshold | autosync_period) == 0 ||
10843
+ newnext >= prefer_steady.ptr_c->mm_geo.now)) {
10844
+ /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
10845
+ * without any auto-sync threshold(s). */
10826
10846
  #if MDBX_ENABLE_PROFGC
10827
- env->me_lck->mti_pgop_stat.gc_prof.wipes += 1;
10847
+ env->me_lck->mti_pgop_stat.gc_prof.wipes += 1;
10828
10848
  #endif /* MDBX_ENABLE_PROFGC */
10829
- ret.err = wipe_steady(txn, detent);
10830
- DEBUG("gc-wipe-steady, rc %d", ret.err);
10831
- if (unlikely(ret.err != MDBX_SUCCESS))
10832
- goto fail;
10833
- eASSERT(env, prefer_steady.ptr_c !=
10834
- meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10835
- goto retry_gc_refresh_oldest;
10836
- }
10837
- if ((flags & (MDBX_ALLOC_BACKLOG | MDBX_ALLOC_NEW)) == 0 ||
10838
- (autosync_threshold &&
10839
- atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
10840
- autosync_threshold) ||
10841
- (autosync_period &&
10842
- (eoos_timestamp =
10843
- atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
10844
- osal_monotime() - eoos_timestamp >= autosync_period) ||
10845
- newnext >= txn->mt_geo.upper ||
10846
- (newnext >= txn->mt_end_pgno &&
10847
- (autosync_threshold | autosync_period) == 0)) {
10848
- /* make steady checkpoint. */
10849
+ ret.err = wipe_steady(txn, detent);
10850
+ DEBUG("gc-wipe-steady, rc %d", ret.err);
10851
+ if (unlikely(ret.err != MDBX_SUCCESS))
10852
+ goto fail;
10853
+ eASSERT(env, prefer_steady.ptr_c !=
10854
+ meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10855
+ goto retry_gc_refresh_oldest;
10856
+ }
10857
+ if ((autosync_threshold &&
10858
+ atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
10859
+ autosync_threshold) ||
10860
+ (autosync_period &&
10861
+ (eoos_timestamp =
10862
+ atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
10863
+ osal_monotime() - eoos_timestamp >= autosync_period) ||
10864
+ newnext >= txn->mt_geo.upper ||
10865
+ ((num == 0 || newnext >= txn->mt_end_pgno) &&
10866
+ (autosync_threshold | autosync_period) == 0)) {
10867
+ /* make steady checkpoint. */
10849
10868
  #if MDBX_ENABLE_PROFGC
10850
- env->me_lck->mti_pgop_stat.gc_prof.flushes += 1;
10869
+ env->me_lck->mti_pgop_stat.gc_prof.flushes += 1;
10851
10870
  #endif /* MDBX_ENABLE_PROFGC */
10852
- MDBX_meta meta = *recent.ptr_c;
10853
- ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta,
10854
- &txn->tw.troika);
10855
- DEBUG("gc-make-steady, rc %d", ret.err);
10856
- eASSERT(env, ret.err != MDBX_RESULT_TRUE);
10857
- if (unlikely(ret.err != MDBX_SUCCESS))
10858
- goto fail;
10859
- eASSERT(env, prefer_steady.ptr_c !=
10860
- meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10861
- goto retry_gc_refresh_oldest;
10862
- }
10871
+ MDBX_meta meta = *recent.ptr_c;
10872
+ ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta,
10873
+ &txn->tw.troika);
10874
+ DEBUG("gc-make-steady, rc %d", ret.err);
10875
+ eASSERT(env, ret.err != MDBX_RESULT_TRUE);
10876
+ if (unlikely(ret.err != MDBX_SUCCESS))
10877
+ goto fail;
10878
+ eASSERT(env, prefer_steady.ptr_c !=
10879
+ meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10880
+ goto retry_gc_refresh_oldest;
10863
10881
  }
10882
+ }
10864
10883
 
10865
- if (env->me_lck_mmap.lck &&
10866
- unlikely(true ==
10867
- atomic_load32(&env->me_lck_mmap.lck->mti_readers_refresh_flag,
10868
- mo_AcquireRelease))) {
10869
- oldest = txn_oldest_reader(txn);
10870
- if (oldest >= detent)
10871
- goto retry_gc_have_oldest;
10872
- }
10884
+ if (unlikely(true == atomic_load32(&env->me_lck->mti_readers_refresh_flag,
10885
+ mo_AcquireRelease))) {
10886
+ oldest = txn_oldest_reader(txn);
10887
+ if (oldest >= detent)
10888
+ goto retry_gc_have_oldest;
10889
+ }
10873
10890
 
10874
- /* avoid kick lagging reader(s) if is enough unallocated space
10875
- * at the end of database file. */
10876
- if ((flags & MDBX_ALLOC_NEW) && newnext <= txn->mt_end_pgno) {
10877
- eASSERT(env, range == nullptr);
10878
- goto done;
10879
- }
10891
+ /* Avoid kick lagging reader(s) if is enough unallocated space
10892
+ * at the end of database file. */
10893
+ if (!(flags & MDBX_ALLOC_RESERVE) && newnext <= txn->mt_end_pgno) {
10894
+ eASSERT(env, range == nullptr);
10895
+ goto done;
10896
+ }
10880
10897
 
10881
- if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) {
10882
- oldest = kick_longlived_readers(env, oldest);
10883
- if (oldest >= detent)
10884
- goto retry_gc_have_oldest;
10885
- }
10898
+ if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) {
10899
+ oldest = kick_longlived_readers(env, oldest);
10900
+ if (oldest >= detent)
10901
+ goto retry_gc_have_oldest;
10886
10902
  }
10887
10903
 
10888
10904
  //---------------------------------------------------------------------------
10889
10905
 
10890
10906
  no_gc:
10891
- if ((flags & MDBX_ALLOC_NEW) == 0) {
10907
+ if (flags & MDBX_ALLOC_RESERVE) {
10892
10908
  ret.err = MDBX_NOTFOUND;
10893
10909
  goto fail;
10894
10910
  }
10895
10911
 
10896
10912
  /* Will use new pages from the map if nothing is suitable in the GC. */
10897
- pgno = txn->mt_next_pgno;
10898
- const size_t newnext = num + pgno;
10913
+ newnext = (pgno = txn->mt_next_pgno) + num;
10899
10914
  if (newnext <= txn->mt_end_pgno)
10900
10915
  goto done;
10901
10916
 
@@ -10932,12 +10947,12 @@ no_gc:
10932
10947
 
10933
10948
  done:
10934
10949
  ret.err = MDBX_SUCCESS;
10935
- if (likely((flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) == 0)) {
10950
+ if (likely((flags & MDBX_ALLOC_RESERVE) == 0)) {
10936
10951
  ENSURE(env, pgno >= NUM_METAS);
10937
10952
  if (range) {
10938
- eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
10939
10953
  eASSERT(env, pgno == *range);
10940
10954
  eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
10955
+ eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10941
10956
  /* Cutoff allocated pages from tw.relist */
10942
10957
  #if MDBX_PNL_ASCENDING
10943
10958
  for (const pgno_t *const end = re_list + re_len - num; range <= end;
@@ -10951,7 +10966,6 @@ done:
10951
10966
  eASSERT(env, pnl_check_allocated(txn->tw.relist,
10952
10967
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10953
10968
  } else {
10954
- eASSERT(env, flags & MDBX_ALLOC_NEW);
10955
10969
  pgno = txn->mt_next_pgno;
10956
10970
  txn->mt_next_pgno += (pgno_t)num;
10957
10971
  eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno);
@@ -10995,8 +11009,9 @@ done:
10995
11009
  int level;
10996
11010
  const char *what;
10997
11011
  if (flags & MDBX_ALLOC_RESERVE) {
10998
- level = (flags & MDBX_ALLOC_BACKLOG) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
10999
- what = (flags & MDBX_ALLOC_SLOT) ? "gc-slot/backlog" : "backlog-pages";
11012
+ level =
11013
+ (flags & MDBX_ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
11014
+ what = num ? "reserve-pages" : "fetch-slot";
11000
11015
  } else {
11001
11016
  txn->mt_flags |= MDBX_TXN_ERROR;
11002
11017
  level = MDBX_LOG_ERROR;
@@ -11011,7 +11026,7 @@ done:
11011
11026
  } else {
11012
11027
  early_exit:
11013
11028
  DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num,
11014
- (flags & MDBX_ALLOC_SLOT) ? "SLOT" : "RESERVE", ret.err);
11029
+ num ? "RESERVE" : "SLOT", ret.err);
11015
11030
  ret.page = NULL;
11016
11031
  }
11017
11032
 
@@ -11057,84 +11072,103 @@ __hot static pgr_t page_alloc(const MDBX_cursor *mc) {
11057
11072
  return ret;
11058
11073
  }
11059
11074
 
11060
- if (likely(!(txn->mt_flags & MDBX_TXN_FROZEN_RE))) {
11061
- MDBX_PNL pnl = txn->tw.relist;
11062
- const size_t len = MDBX_PNL_GETSIZE(pnl);
11063
- if (likely(len > 0)) {
11064
- MDBX_env *const env = txn->mt_env;
11075
+ MDBX_PNL pnl = txn->tw.relist;
11076
+ const size_t len = MDBX_PNL_GETSIZE(pnl);
11077
+ if (likely(len > 0)) {
11078
+ MDBX_env *const env = txn->mt_env;
11065
11079
 
11066
- MDBX_PNL_SETSIZE(pnl, len - 1);
11080
+ MDBX_PNL_SETSIZE(pnl, len - 1);
11067
11081
  #if MDBX_PNL_ASCENDING
11068
- const pgno_t pgno = pnl[1];
11069
- for (size_t i = 1; i < len; ++i)
11070
- pnl[i] = pnl[i + 1];
11082
+ const pgno_t pgno = pnl[1];
11083
+ for (size_t i = 1; i < len; ++i)
11084
+ pnl[i] = pnl[i + 1];
11071
11085
  #else
11072
- const pgno_t pgno = pnl[len];
11086
+ const pgno_t pgno = pnl[len];
11073
11087
  #endif
11074
11088
 
11075
11089
  #if MDBX_ENABLE_PROFGC
11076
- const uint64_t monotime_before = osal_monotime();
11077
- size_t majflt_before;
11078
- const uint64_t cputime_before = osal_cputime(&majflt_before);
11079
- profgc_stat_t *const prof =
11080
- (mc->mc_dbi == FREE_DBI) ? &env->me_lck->mti_pgop_stat.gc_prof.self
11081
- : &env->me_lck->mti_pgop_stat.gc_prof.work;
11090
+ const uint64_t monotime_before = osal_monotime();
11091
+ size_t majflt_before;
11092
+ const uint64_t cputime_before = osal_cputime(&majflt_before);
11093
+ profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI)
11094
+ ? &env->me_lck->mti_pgop_stat.gc_prof.self
11095
+ : &env->me_lck->mti_pgop_stat.gc_prof.work;
11082
11096
  #endif /* MDBX_ENABLE_PROFGC */
11083
- pgr_t ret;
11084
- if (env->me_flags & MDBX_WRITEMAP) {
11085
- ret.page = pgno2page(env, pgno);
11086
- MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
11087
- } else {
11088
- ret.page = page_malloc(txn, 1);
11089
- if (unlikely(!ret.page)) {
11090
- ret.err = MDBX_ENOMEM;
11091
- goto bailout;
11092
- }
11097
+ pgr_t ret;
11098
+ if (env->me_flags & MDBX_WRITEMAP) {
11099
+ ret.page = pgno2page(env, pgno);
11100
+ MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
11101
+ } else {
11102
+ ret.page = page_malloc(txn, 1);
11103
+ if (unlikely(!ret.page)) {
11104
+ ret.err = MDBX_ENOMEM;
11105
+ goto bailout;
11093
11106
  }
11107
+ }
11094
11108
 
11095
- VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
11096
- ret.page->mp_pgno = pgno;
11097
- ret.page->mp_leaf2_ksize = 0;
11098
- ret.page->mp_flags = 0;
11099
- tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
11109
+ VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
11110
+ ret.page->mp_pgno = pgno;
11111
+ ret.page->mp_leaf2_ksize = 0;
11112
+ ret.page->mp_flags = 0;
11113
+ tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
11100
11114
 
11101
- ret.err = page_dirty(txn, ret.page, 1);
11102
- bailout:
11103
- tASSERT(txn, pnl_check_allocated(txn->tw.relist,
11104
- txn->mt_next_pgno - MDBX_ENABLE_REFUND));
11115
+ ret.err = page_dirty(txn, ret.page, 1);
11116
+ bailout:
11117
+ tASSERT(txn, pnl_check_allocated(txn->tw.relist,
11118
+ txn->mt_next_pgno - MDBX_ENABLE_REFUND));
11105
11119
  #if MDBX_ENABLE_PROFGC
11106
- size_t majflt_after;
11107
- prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
11108
- prof->majflt += majflt_after - majflt_before;
11109
- prof->xtime_monotonic += osal_monotime() - monotime_before;
11120
+ size_t majflt_after;
11121
+ prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
11122
+ prof->majflt += majflt_after - majflt_before;
11123
+ prof->xtime_monotonic += osal_monotime() - monotime_before;
11110
11124
  #endif /* MDBX_ENABLE_PROFGC */
11111
- return ret;
11112
- }
11125
+ return ret;
11113
11126
  }
11114
11127
 
11115
- return page_alloc_slowpath(mc, 1, MDBX_ALLOC_ALL);
11128
+ return page_alloc_slowpath(mc, 1, MDBX_ALLOC_DEFAULT);
11116
11129
  }
11117
11130
 
11118
- /* Copy the used portions of a non-large/overflow page. */
11119
- __hot static void page_copy(MDBX_page *dst, const MDBX_page *src,
11120
- size_t psize) {
11131
+ /* Copy the used portions of a page. */
11132
+ __hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src,
11133
+ const size_t size) {
11121
11134
  STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ);
11122
11135
  STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4);
11136
+ char *copy_dst = (void *)dst;
11137
+ const char *copy_src = (const void *)src;
11138
+ size_t copy_len = size;
11139
+ if (src->mp_flags & P_LEAF2) {
11140
+ copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src);
11141
+ if (unlikely(copy_len > size))
11142
+ goto bailout;
11143
+ }
11123
11144
  if ((src->mp_flags & (P_LEAF2 | P_OVERFLOW)) == 0) {
11124
- size_t upper = src->mp_upper, lower = src->mp_lower, unused = upper - lower;
11125
-
11145
+ size_t upper = src->mp_upper, lower = src->mp_lower;
11146
+ intptr_t unused = upper - lower;
11126
11147
  /* If page isn't full, just copy the used portion. Adjust
11127
11148
  * alignment so memcpy may copy words instead of bytes. */
11128
- if (unused >= MDBX_CACHELINE_SIZE * 2) {
11149
+ if (unused > MDBX_CACHELINE_SIZE * 3) {
11129
11150
  lower = ceil_powerof2(lower + PAGEHDRSZ, sizeof(void *));
11130
11151
  upper = floor_powerof2(upper + PAGEHDRSZ, sizeof(void *));
11131
- memcpy(dst, src, lower);
11132
- dst = (void *)((char *)dst + upper);
11133
- src = (void *)((char *)src + upper);
11134
- psize -= upper;
11152
+ if (unlikely(upper > copy_len))
11153
+ goto bailout;
11154
+ memcpy(copy_dst, copy_src, lower);
11155
+ copy_dst += upper;
11156
+ copy_src += upper;
11157
+ copy_len -= upper;
11135
11158
  }
11136
11159
  }
11137
- memcpy(dst, src, psize);
11160
+ memcpy(copy_dst, copy_src, copy_len);
11161
+ return;
11162
+
11163
+ bailout:
11164
+ if (src->mp_flags & P_LEAF2)
11165
+ bad_page(src, "%s addr %p, n-keys %zu, ksize %u",
11166
+ "invalid/corrupted source page", __Wpedantic_format_voidptr(src),
11167
+ page_numkeys(src), src->mp_leaf2_ksize);
11168
+ else
11169
+ bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page",
11170
+ __Wpedantic_format_voidptr(src), src->mp_upper);
11171
+ memset(dst, -1, size);
11138
11172
  }
11139
11173
 
11140
11174
  /* Pull a page off the txn's spill list, if present.
@@ -11541,7 +11575,9 @@ __cold int mdbx_env_sync_poll(MDBX_env *env) {
11541
11575
 
11542
11576
  /* Back up parent txn's cursors, then grab the originals for tracking */
11543
11577
  static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
11544
- for (int i = parent->mt_numdbs; --i >= 0;) {
11578
+ tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr);
11579
+ nested->mt_cursors[FREE_DBI] = nullptr;
11580
+ for (int i = parent->mt_numdbs; --i > FREE_DBI;) {
11545
11581
  nested->mt_cursors[i] = NULL;
11546
11582
  MDBX_cursor *mc = parent->mt_cursors[i];
11547
11583
  if (mc != NULL) {
@@ -11586,7 +11622,8 @@ static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
11586
11622
  *
11587
11623
  * Returns 0 on success, non-zero on failure. */
11588
11624
  static void cursors_eot(MDBX_txn *txn, const bool merge) {
11589
- for (intptr_t i = txn->mt_numdbs; --i >= 0;) {
11625
+ tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr);
11626
+ for (intptr_t i = txn->mt_numdbs; --i > FREE_DBI;) {
11590
11627
  MDBX_cursor *next, *mc = txn->mt_cursors[i];
11591
11628
  if (!mc)
11592
11629
  continue;
@@ -11856,7 +11893,7 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) {
11856
11893
  return MDBX_SUCCESS;
11857
11894
  }
11858
11895
 
11859
- /* check against https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269 */
11896
+ /* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
11860
11897
  static bool coherency_check(const MDBX_env *env, const txnid_t txnid,
11861
11898
  const volatile MDBX_db *dbs,
11862
11899
  const volatile MDBX_meta *meta, bool report) {
@@ -11957,7 +11994,7 @@ __cold static int coherency_timeout(uint64_t *timestamp, pgno_t pgno) {
11957
11994
  }
11958
11995
 
11959
11996
  /* check with timeout as the workaround
11960
- * for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269 */
11997
+ * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
11961
11998
  __hot static int coherency_check_readed(const MDBX_env *env,
11962
11999
  const txnid_t txnid,
11963
12000
  const volatile MDBX_db *dbs,
@@ -12193,8 +12230,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
12193
12230
  txn->tw.troika = meta_tap(env);
12194
12231
  const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
12195
12232
  uint64_t timestamp = 0;
12196
- while (
12197
- "workaround for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
12233
+ while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
12198
12234
  rc = coherency_check_readed(env, head.txnid, head.ptr_v->mm_dbs,
12199
12235
  head.ptr_v, &timestamp);
12200
12236
  if (likely(rc == MDBX_SUCCESS))
@@ -12219,8 +12255,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
12219
12255
  txn->tw.loose_refund_wl = 0;
12220
12256
  #endif /* MDBX_ENABLE_REFUND */
12221
12257
  MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0);
12222
- txn->tw.spill_pages = NULL;
12223
- txn->tw.spill_least_removed = 0;
12258
+ txn->tw.spilled.list = NULL;
12259
+ txn->tw.spilled.least_removed = 0;
12224
12260
  txn->tw.last_reclaimed = 0;
12225
12261
  if (txn->tw.lifo_reclaimed)
12226
12262
  MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0);
@@ -12297,6 +12333,19 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
12297
12333
  osal_srwlock_AcquireShared(&env->me_remap_guard);
12298
12334
  }
12299
12335
  #endif /* Windows */
12336
+ } else {
12337
+ if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) {
12338
+ ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB",
12339
+ txn->mt_dbs[FREE_DBI].md_flags);
12340
+ rc = MDBX_INCOMPATIBLE;
12341
+ goto bailout;
12342
+ }
12343
+
12344
+ tASSERT(txn, txn == env->me_txn0);
12345
+ MDBX_cursor *const gc = (MDBX_cursor *)((char *)txn + sizeof(MDBX_txn));
12346
+ rc = cursor_init(gc, txn, FREE_DBI);
12347
+ if (rc != MDBX_SUCCESS)
12348
+ goto bailout;
12300
12349
  }
12301
12350
  #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
12302
12351
  txn_valgrind(env, txn);
@@ -12514,7 +12563,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
12514
12563
  txn->tw.dirtylru = parent->tw.dirtylru;
12515
12564
 
12516
12565
  dpl_sort(parent);
12517
- if (parent->tw.spill_pages)
12566
+ if (parent->tw.spilled.list)
12518
12567
  spill_purge(parent);
12519
12568
 
12520
12569
  tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >=
@@ -12591,7 +12640,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
12591
12640
  eASSERT(env, (txn->mt_flags &
12592
12641
  ~(MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
12593
12642
  MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0);
12594
- assert(!txn->tw.spill_pages && !txn->tw.spill_least_removed);
12643
+ assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed);
12595
12644
  }
12596
12645
  txn->mt_signature = MDBX_MT_SIGNATURE;
12597
12646
  txn->mt_userctx = context;
@@ -12696,10 +12745,9 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
12696
12745
  env, txn->mt_child ? (size_t)txn->tw.retired_pages
12697
12746
  : MDBX_PNL_GETSIZE(txn->tw.retired_pages));
12698
12747
  info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
12699
- info->txn_space_dirty =
12700
- txn->tw.dirtylist
12701
- ? pgno2bytes(env, txn->tw.dirtylist->pages_including_loose)
12702
- : 0;
12748
+ info->txn_space_dirty = pgno2bytes(
12749
+ env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
12750
+ : txn->tw.writemap_dirty_npages);
12703
12751
  info->txn_reader_lag = INT64_MAX;
12704
12752
  MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
12705
12753
  if (scan_rlt && lck) {
@@ -13015,8 +13063,8 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
13015
13063
  txn->mt_flags = MDBX_TXN_FINISHED;
13016
13064
  txn->mt_owner = 0;
13017
13065
  env->me_txn = txn->mt_parent;
13018
- pnl_free(txn->tw.spill_pages);
13019
- txn->tw.spill_pages = nullptr;
13066
+ pnl_free(txn->tw.spilled.list);
13067
+ txn->tw.spilled.list = nullptr;
13020
13068
  if (txn == env->me_txn0) {
13021
13069
  eASSERT(env, txn->mt_parent == NULL);
13022
13070
  /* Export or close DBI handles created in this txn */
@@ -13283,7 +13331,7 @@ typedef struct gc_update_context {
13283
13331
  #if MDBX_ENABLE_BIGFOOT
13284
13332
  txnid_t bigfoot;
13285
13333
  #endif /* MDBX_ENABLE_BIGFOOT */
13286
- MDBX_cursor_couple cursor;
13334
+ MDBX_cursor cursor;
13287
13335
  } gcu_context_t;
13288
13336
 
13289
13337
  static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
@@ -13292,7 +13340,7 @@ static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
13292
13340
  #if MDBX_ENABLE_BIGFOOT
13293
13341
  ctx->bigfoot = txn->mt_txnid;
13294
13342
  #endif /* MDBX_ENABLE_BIGFOOT */
13295
- return cursor_init(&ctx->cursor.outer, txn, FREE_DBI);
13343
+ return cursor_init(&ctx->cursor, txn, FREE_DBI);
13296
13344
  }
13297
13345
 
13298
13346
  static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) {
@@ -13311,10 +13359,10 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
13311
13359
  #endif /* MDBX_ENABLE_BIGFOOT */
13312
13360
  key.iov_len = sizeof(txnid_t);
13313
13361
  const struct cursor_set_result csr =
13314
- cursor_set(&ctx->cursor.outer, &key, &val, MDBX_SET);
13362
+ cursor_set(&ctx->cursor, &key, &val, MDBX_SET);
13315
13363
  if (csr.err == MDBX_SUCCESS && csr.exact) {
13316
13364
  ctx->retired_stored = 0;
13317
- err = mdbx_cursor_del(&ctx->cursor.outer, 0);
13365
+ err = mdbx_cursor_del(&ctx->cursor, 0);
13318
13366
  TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn),
13319
13367
  err);
13320
13368
  }
@@ -13327,6 +13375,13 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
13327
13375
  return err;
13328
13376
  }
13329
13377
 
13378
+ static int gcu_touch(gcu_context_t *ctx) {
13379
+ ctx->cursor.mc_flags |= C_GCU;
13380
+ int err = cursor_touch(&ctx->cursor);
13381
+ ctx->cursor.mc_flags -= C_GCU;
13382
+ return err;
13383
+ }
13384
+
13330
13385
  /* Prepare a backlog of pages to modify GC itself, while reclaiming is
13331
13386
  * prohibited. It should be enough to prevent search in page_alloc_slowpath()
13332
13387
  * during a deleting, when GC tree is unbalanced. */
@@ -13356,14 +13411,12 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
13356
13411
  key.iov_base = val.iov_base = nullptr;
13357
13412
  key.iov_len = sizeof(txnid_t);
13358
13413
  val.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
13359
- err = cursor_spill(&ctx->cursor.outer, &key, &val);
13414
+ err = cursor_spill(&ctx->cursor, &key, &val);
13360
13415
  if (unlikely(err != MDBX_SUCCESS))
13361
13416
  return err;
13362
13417
  }
13363
13418
 
13364
- tASSERT(txn, txn->mt_flags & MDBX_TXN_UPDATE_GC);
13365
- txn->mt_flags -= MDBX_TXN_UPDATE_GC;
13366
- err = cursor_touch(&ctx->cursor.outer);
13419
+ err = gcu_touch(ctx);
13367
13420
  TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
13368
13421
 
13369
13422
  if (unlikely(pages4retiredlist > 1) &&
@@ -13373,22 +13426,20 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
13373
13426
  err = gcu_clean_stored_retired(txn, ctx);
13374
13427
  if (unlikely(err != MDBX_SUCCESS))
13375
13428
  return err;
13376
- err = page_alloc_slowpath(&ctx->cursor.outer, (pgno_t)pages4retiredlist,
13377
- MDBX_ALLOC_GC | MDBX_ALLOC_RESERVE)
13378
- .err;
13429
+ err =
13430
+ page_alloc_slowpath(&ctx->cursor, pages4retiredlist, MDBX_ALLOC_RESERVE)
13431
+ .err;
13379
13432
  TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err);
13380
- cASSERT(&ctx->cursor.outer,
13433
+ cASSERT(&ctx->cursor,
13381
13434
  gcu_backlog_size(txn) >= pages4retiredlist || err != MDBX_SUCCESS);
13382
13435
  }
13383
13436
 
13384
13437
  while (gcu_backlog_size(txn) < backlog4cow + pages4retiredlist &&
13385
13438
  err == MDBX_SUCCESS)
13386
- err = page_alloc_slowpath(&ctx->cursor.outer, 0,
13387
- MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
13388
- MDBX_ALLOC_RESERVE | MDBX_ALLOC_BACKLOG)
13439
+ err = page_alloc_slowpath(&ctx->cursor, 0,
13440
+ MDBX_ALLOC_RESERVE | MDBX_ALLOC_UNIMPORTANT)
13389
13441
  .err;
13390
13442
 
13391
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13392
13443
  TRACE("<< backlog %zu, err %d", gcu_backlog_size(txn), err);
13393
13444
  return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
13394
13445
  }
@@ -13417,9 +13468,8 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) {
13417
13468
  MDBX_env *const env = txn->mt_env;
13418
13469
  const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo";
13419
13470
  (void)dbg_prefix_mode;
13420
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13421
- ctx->cursor.outer.mc_next = txn->mt_cursors[FREE_DBI];
13422
- txn->mt_cursors[FREE_DBI] = &ctx->cursor.outer;
13471
+ ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI];
13472
+ txn->mt_cursors[FREE_DBI] = &ctx->cursor;
13423
13473
 
13424
13474
  /* txn->tw.relist[] can grow and shrink during this call.
13425
13475
  * txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow.
@@ -13481,7 +13531,7 @@ retry:
13481
13531
  ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
13482
13532
  key.iov_base = &ctx->cleaned_id;
13483
13533
  key.iov_len = sizeof(ctx->cleaned_id);
13484
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, NULL, MDBX_SET);
13534
+ rc = mdbx_cursor_get(&ctx->cursor, &key, NULL, MDBX_SET);
13485
13535
  if (rc == MDBX_NOTFOUND)
13486
13536
  continue;
13487
13537
  if (unlikely(rc != MDBX_SUCCESS))
@@ -13494,18 +13544,17 @@ retry:
13494
13544
  tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
13495
13545
  TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix_mode,
13496
13546
  ctx->cleaned_slot, ctx->cleaned_id);
13497
- tASSERT(txn, *txn->mt_cursors == &ctx->cursor.outer);
13498
- rc = mdbx_cursor_del(&ctx->cursor.outer, 0);
13547
+ tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
13548
+ rc = mdbx_cursor_del(&ctx->cursor, 0);
13499
13549
  if (unlikely(rc != MDBX_SUCCESS))
13500
13550
  goto bailout;
13501
13551
  } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
13502
13552
  txl_sort(txn->tw.lifo_reclaimed);
13503
13553
  }
13504
13554
  } else {
13505
- /* If using records from GC which we have not yet deleted,
13506
- * now delete them and any we reserved for tw.relist. */
13555
+ /* Удаляем оставшиеся вынутые из GC записи. */
13507
13556
  while (ctx->cleaned_id <= txn->tw.last_reclaimed) {
13508
- rc = cursor_first(&ctx->cursor.outer, &key, NULL);
13557
+ rc = cursor_first(&ctx->cursor, &key, NULL);
13509
13558
  if (rc == MDBX_NOTFOUND)
13510
13559
  break;
13511
13560
  if (unlikely(rc != MDBX_SUCCESS))
@@ -13530,8 +13579,8 @@ retry:
13530
13579
  tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
13531
13580
  TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode,
13532
13581
  ctx->cleaned_id);
13533
- tASSERT(txn, *txn->mt_cursors == &ctx->cursor.outer);
13534
- rc = mdbx_cursor_del(&ctx->cursor.outer, 0);
13582
+ tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
13583
+ rc = mdbx_cursor_del(&ctx->cursor, 0);
13535
13584
  if (unlikely(rc != MDBX_SUCCESS))
13536
13585
  goto bailout;
13537
13586
  }
@@ -13566,10 +13615,7 @@ retry:
13566
13615
  if (txn->tw.loose_count > 0) {
13567
13616
  TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode,
13568
13617
  txn->tw.loose_count);
13569
- rc = page_alloc_slowpath(&ctx->cursor.outer, 0,
13570
- MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
13571
- MDBX_ALLOC_RESERVE)
13572
- .err;
13618
+ rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
13573
13619
  if (rc == MDBX_SUCCESS) {
13574
13620
  TRACE("%s: retry since gc-slot for %zu loose-pages available",
13575
13621
  dbg_prefix_mode, txn->tw.loose_count);
@@ -13651,10 +13697,9 @@ retry:
13651
13697
  if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
13652
13698
  if (unlikely(!ctx->retired_stored)) {
13653
13699
  /* Make sure last page of GC is touched and on retired-list */
13654
- txn->mt_flags -= MDBX_TXN_UPDATE_GC;
13655
- rc = page_search(&ctx->cursor.outer, NULL,
13656
- MDBX_PS_LAST | MDBX_PS_MODIFY);
13657
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13700
+ rc = cursor_last(&ctx->cursor, nullptr, nullptr);
13701
+ if (likely(rc != MDBX_SUCCESS))
13702
+ rc = gcu_touch(ctx);
13658
13703
  if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
13659
13704
  goto bailout;
13660
13705
  }
@@ -13664,6 +13709,8 @@ retry:
13664
13709
  do {
13665
13710
  if (ctx->bigfoot > txn->mt_txnid) {
13666
13711
  rc = gcu_clean_stored_retired(txn, ctx);
13712
+ if (unlikely(rc != MDBX_SUCCESS))
13713
+ goto bailout;
13667
13714
  tASSERT(txn, ctx->bigfoot <= txn->mt_txnid);
13668
13715
  }
13669
13716
 
@@ -13685,7 +13732,7 @@ retry:
13685
13732
  ? env->me_maxgc_ov1page
13686
13733
  : left;
13687
13734
  data.iov_len = (chunk + 1) * sizeof(pgno_t);
13688
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data, MDBX_RESERVE);
13735
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
13689
13736
  if (unlikely(rc != MDBX_SUCCESS))
13690
13737
  goto bailout;
13691
13738
 
@@ -13723,7 +13770,7 @@ retry:
13723
13770
  do {
13724
13771
  gcu_prepare_backlog(txn, ctx, true);
13725
13772
  data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
13726
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data, MDBX_RESERVE);
13773
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
13727
13774
  if (unlikely(rc != MDBX_SUCCESS))
13728
13775
  goto bailout;
13729
13776
  /* Retry if tw.retired_pages[] grew during the Put() */
@@ -13790,17 +13837,13 @@ retry:
13790
13837
  left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
13791
13838
  env->me_maxgc_ov1page &&
13792
13839
  !ctx->dense) {
13793
- /* LY: need just a txn-id for save page list. */
13840
+ /* Hужен свободный для для сохранения списка страниц. */
13794
13841
  bool need_cleanup = false;
13795
- txnid_t snap_oldest;
13842
+ txnid_t snap_oldest = 0;
13796
13843
  retry_rid:
13797
- txn->mt_flags -= MDBX_TXN_UPDATE_GC;
13798
13844
  do {
13799
- snap_oldest = txn_oldest_reader(txn);
13800
- rc = page_alloc_slowpath(&ctx->cursor.outer, 0,
13801
- MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
13802
- MDBX_ALLOC_RESERVE)
13803
- .err;
13845
+ rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
13846
+ snap_oldest = env->me_lck->mti_oldest_reader.weak;
13804
13847
  if (likely(rc == MDBX_SUCCESS)) {
13805
13848
  TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
13806
13849
  MDBX_PNL_LAST(txn->tw.lifo_reclaimed));
@@ -13812,7 +13855,6 @@ retry:
13812
13855
  left >
13813
13856
  (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
13814
13857
  env->me_maxgc_ov1page);
13815
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13816
13858
 
13817
13859
  if (likely(rc == MDBX_SUCCESS)) {
13818
13860
  TRACE("%s: got enough from GC.", dbg_prefix_mode);
@@ -13830,7 +13872,7 @@ retry:
13830
13872
  } else {
13831
13873
  tASSERT(txn, txn->tw.last_reclaimed == 0);
13832
13874
  if (unlikely(txn_oldest_reader(txn) != snap_oldest))
13833
- /* should retry page_alloc_slowpath(MDBX_ALLOC_GC)
13875
+ /* should retry page_alloc_slowpath()
13834
13876
  * if the oldest reader changes since the last attempt */
13835
13877
  goto retry_rid;
13836
13878
  /* no reclaimable GC entries,
@@ -13840,7 +13882,8 @@ retry:
13840
13882
  ctx->rid);
13841
13883
  }
13842
13884
 
13843
- /* LY: GC is empty, will look any free txn-id in high2low order. */
13885
+ /* В GC нет годных к переработке записей,
13886
+ * будем использовать свободные id в обратном порядке. */
13844
13887
  while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter &&
13845
13888
  left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) -
13846
13889
  ctx->reused_slot) *
@@ -13858,26 +13901,20 @@ retry:
13858
13901
  }
13859
13902
 
13860
13903
  tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID);
13861
- --ctx->rid;
13904
+ ctx->rid -= 1;
13862
13905
  key.iov_base = &ctx->rid;
13863
13906
  key.iov_len = sizeof(ctx->rid);
13864
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_SET_KEY);
13907
+ rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
13865
13908
  if (unlikely(rc == MDBX_SUCCESS)) {
13866
- DEBUG("%s: GC's id %" PRIaTXN " is used, continue bottom-up search",
13909
+ DEBUG("%s: GC's id %" PRIaTXN " is present, going to first",
13867
13910
  dbg_prefix_mode, ctx->rid);
13868
- ++ctx->rid;
13869
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_FIRST);
13870
- if (rc == MDBX_NOTFOUND) {
13871
- DEBUG("%s: GC is empty (going dense-mode)", dbg_prefix_mode);
13872
- ctx->dense = true;
13873
- break;
13874
- }
13911
+ rc = cursor_first(&ctx->cursor, &key, nullptr);
13875
13912
  if (unlikely(rc != MDBX_SUCCESS ||
13876
13913
  key.iov_len != sizeof(txnid_t))) {
13877
13914
  rc = MDBX_CORRUPTED;
13878
13915
  goto bailout;
13879
13916
  }
13880
- txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13917
+ const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13881
13918
  if (gc_first <= MIN_TXNID) {
13882
13919
  DEBUG("%s: no free GC's id(s) less than %" PRIaTXN
13883
13920
  " (going dense-mode)",
@@ -13925,13 +13962,13 @@ retry:
13925
13962
  tASSERT(txn, txn->tw.lifo_reclaimed == NULL);
13926
13963
  if (unlikely(ctx->rid == 0)) {
13927
13964
  ctx->rid = txn_oldest_reader(txn);
13928
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, NULL, MDBX_FIRST);
13929
- if (rc == MDBX_SUCCESS) {
13965
+ rc = cursor_first(&ctx->cursor, &key, nullptr);
13966
+ if (likely(rc == MDBX_SUCCESS)) {
13930
13967
  if (unlikely(key.iov_len != sizeof(txnid_t))) {
13931
13968
  rc = MDBX_CORRUPTED;
13932
13969
  goto bailout;
13933
13970
  }
13934
- txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13971
+ const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13935
13972
  if (ctx->rid >= gc_first)
13936
13973
  ctx->rid = gc_first - 1;
13937
13974
  if (unlikely(ctx->rid == 0)) {
@@ -14022,7 +14059,7 @@ retry:
14022
14059
  TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk,
14023
14060
  ctx->settled + 1, ctx->settled + chunk + 1, reservation_gc_id);
14024
14061
  gcu_prepare_backlog(txn, ctx, true);
14025
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data,
14062
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
14026
14063
  MDBX_RESERVE | MDBX_NOOVERWRITE);
14027
14064
  tASSERT(txn, pnl_check_allocated(txn->tw.relist,
14028
14065
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
@@ -14070,7 +14107,7 @@ retry:
14070
14107
  size_t left = amount;
14071
14108
  if (txn->tw.lifo_reclaimed == nullptr) {
14072
14109
  tASSERT(txn, ctx->lifo == 0);
14073
- rc = cursor_first(&ctx->cursor.outer, &key, &data);
14110
+ rc = cursor_first(&ctx->cursor, &key, &data);
14074
14111
  if (unlikely(rc != MDBX_SUCCESS))
14075
14112
  goto bailout;
14076
14113
  } else {
@@ -14104,7 +14141,7 @@ retry:
14104
14141
  dbg_prefix_mode, fill_gc_id, ctx->filled_slot);
14105
14142
  key.iov_base = &fill_gc_id;
14106
14143
  key.iov_len = sizeof(fill_gc_id);
14107
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_SET_KEY);
14144
+ rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
14108
14145
  if (unlikely(rc != MDBX_SUCCESS))
14109
14146
  goto bailout;
14110
14147
  }
@@ -14118,7 +14155,6 @@ retry:
14118
14155
  key.iov_len = sizeof(fill_gc_id);
14119
14156
 
14120
14157
  tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
14121
- txn->mt_flags += MDBX_TXN_FROZEN_RE;
14122
14158
  size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
14123
14159
  if (unlikely(chunk > left)) {
14124
14160
  TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk,
@@ -14126,14 +14162,11 @@ retry:
14126
14162
  if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) ||
14127
14163
  chunk - left > env->me_maxgc_ov1page) {
14128
14164
  data.iov_len = (left + 1) * sizeof(pgno_t);
14129
- if (ctx->loop < 7)
14130
- txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
14131
14165
  }
14132
14166
  chunk = left;
14133
14167
  }
14134
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data,
14168
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
14135
14169
  MDBX_CURRENT | MDBX_RESERVE);
14136
- txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
14137
14170
  if (unlikely(rc != MDBX_SUCCESS))
14138
14171
  goto bailout;
14139
14172
  gcu_clean_reserved(env, data);
@@ -14182,7 +14215,7 @@ retry:
14182
14215
 
14183
14216
  if (txn->tw.lifo_reclaimed == nullptr) {
14184
14217
  tASSERT(txn, ctx->lifo == 0);
14185
- rc = cursor_next(&ctx->cursor.outer, &key, &data, MDBX_NEXT);
14218
+ rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT);
14186
14219
  if (unlikely(rc != MDBX_SUCCESS))
14187
14220
  goto bailout;
14188
14221
  } else {
@@ -14213,7 +14246,7 @@ retry:
14213
14246
  ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
14214
14247
 
14215
14248
  bailout:
14216
- txn->mt_cursors[FREE_DBI] = ctx->cursor.outer.mc_next;
14249
+ txn->mt_cursors[FREE_DBI] = ctx->cursor.mc_next;
14217
14250
 
14218
14251
  MDBX_PNL_SETSIZE(txn->tw.relist, 0);
14219
14252
  #if MDBX_ENABLE_PROFGC
@@ -14363,7 +14396,8 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14363
14396
  MDBX_PNL_SETSIZE(parent->tw.retired_pages, w);
14364
14397
 
14365
14398
  /* Filter-out parent spill list */
14366
- if (parent->tw.spill_pages && MDBX_PNL_GETSIZE(parent->tw.spill_pages) > 0) {
14399
+ if (parent->tw.spilled.list &&
14400
+ MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) {
14367
14401
  const MDBX_PNL sl = spill_purge(parent);
14368
14402
  size_t len = MDBX_PNL_GETSIZE(sl);
14369
14403
  if (len) {
@@ -14378,7 +14412,7 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14378
14412
  DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1);
14379
14413
  i -= 1;
14380
14414
  } while (i && sl[i] >= (parent->mt_next_pgno << 1));
14381
- MDBX_PNL_GETSIZE(sl) = i;
14415
+ MDBX_PNL_SETSIZE(sl, i);
14382
14416
  #else
14383
14417
  assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl));
14384
14418
  size_t i = 0;
@@ -14451,10 +14485,10 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14451
14485
  }
14452
14486
 
14453
14487
  /* Remove anything in our spill list from parent's dirty list */
14454
- if (txn->tw.spill_pages) {
14455
- tASSERT(txn, pnl_check_allocated(txn->tw.spill_pages,
14488
+ if (txn->tw.spilled.list) {
14489
+ tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list,
14456
14490
  (size_t)parent->mt_next_pgno << 1));
14457
- dpl_sift(parent, txn->tw.spill_pages, true);
14491
+ dpl_sift(parent, txn->tw.spilled.list, true);
14458
14492
  tASSERT(parent,
14459
14493
  parent->tw.dirtyroom + parent->tw.dirtylist->length ==
14460
14494
  (parent->mt_parent ? parent->mt_parent->tw.dirtyroom
@@ -14606,23 +14640,23 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14606
14640
  tASSERT(parent, dirtylist_check(parent));
14607
14641
  dpl_free(txn);
14608
14642
 
14609
- if (txn->tw.spill_pages) {
14610
- if (parent->tw.spill_pages) {
14643
+ if (txn->tw.spilled.list) {
14644
+ if (parent->tw.spilled.list) {
14611
14645
  /* Must not fail since space was preserved above. */
14612
- pnl_merge(parent->tw.spill_pages, txn->tw.spill_pages);
14613
- pnl_free(txn->tw.spill_pages);
14646
+ pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list);
14647
+ pnl_free(txn->tw.spilled.list);
14614
14648
  } else {
14615
- parent->tw.spill_pages = txn->tw.spill_pages;
14616
- parent->tw.spill_least_removed = txn->tw.spill_least_removed;
14649
+ parent->tw.spilled.list = txn->tw.spilled.list;
14650
+ parent->tw.spilled.least_removed = txn->tw.spilled.least_removed;
14617
14651
  }
14618
14652
  tASSERT(parent, dirtylist_check(parent));
14619
14653
  }
14620
14654
 
14621
14655
  parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
14622
- if (parent->tw.spill_pages) {
14623
- assert(pnl_check_allocated(parent->tw.spill_pages,
14656
+ if (parent->tw.spilled.list) {
14657
+ assert(pnl_check_allocated(parent->tw.spilled.list,
14624
14658
  (size_t)parent->mt_next_pgno << 1));
14625
- if (MDBX_PNL_GETSIZE(parent->tw.spill_pages))
14659
+ if (MDBX_PNL_GETSIZE(parent->tw.spilled.list))
14626
14660
  parent->mt_flags |= MDBX_TXN_SPILLS;
14627
14661
  }
14628
14662
  }
@@ -14693,8 +14727,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
14693
14727
  sizeof(parent->mt_geo)) == 0);
14694
14728
  tASSERT(txn, memcmp(&parent->mt_canary, &txn->mt_canary,
14695
14729
  sizeof(parent->mt_canary)) == 0);
14696
- tASSERT(txn, !txn->tw.spill_pages ||
14697
- MDBX_PNL_GETSIZE(txn->tw.spill_pages) == 0);
14730
+ tASSERT(txn, !txn->tw.spilled.list ||
14731
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0);
14698
14732
  tASSERT(txn, txn->tw.loose_count == 0);
14699
14733
 
14700
14734
  /* fast completion of pure nested transaction */
@@ -14714,10 +14748,10 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
14714
14748
  goto fail;
14715
14749
  }
14716
14750
 
14717
- if (txn->tw.spill_pages) {
14718
- if (parent->tw.spill_pages) {
14719
- rc = pnl_need(&parent->tw.spill_pages,
14720
- MDBX_PNL_GETSIZE(txn->tw.spill_pages));
14751
+ if (txn->tw.spilled.list) {
14752
+ if (parent->tw.spilled.list) {
14753
+ rc = pnl_need(&parent->tw.spilled.list,
14754
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list));
14721
14755
  if (unlikely(rc != MDBX_SUCCESS))
14722
14756
  goto fail;
14723
14757
  }
@@ -15837,7 +15871,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
15837
15871
  }
15838
15872
 
15839
15873
  uint64_t timestamp = 0;
15840
- while ("workaround for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
15874
+ while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
15841
15875
  rc =
15842
15876
  coherency_check_written(env, pending->unsafe_txnid, target, &timestamp);
15843
15877
  if (likely(rc == MDBX_SUCCESS))
@@ -16359,7 +16393,7 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
16359
16393
 
16360
16394
  uint64_t timestamp = 0;
16361
16395
  while ("workaround for "
16362
- "https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
16396
+ "https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
16363
16397
  meta = *head.ptr_c;
16364
16398
  rc = coherency_check_readed(env, head.txnid, meta.mm_dbs, &meta,
16365
16399
  &timestamp);
@@ -17503,13 +17537,13 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx,
17503
17537
  }
17504
17538
  #else
17505
17539
  struct stat st;
17506
- if (stat(pathname, &st)) {
17540
+ if (stat(pathname, &st) != 0) {
17507
17541
  rc = errno;
17508
17542
  if (rc != MDBX_ENOFILE)
17509
17543
  return rc;
17510
17544
  if (mode == 0 || (*flags & MDBX_RDONLY) != 0)
17511
17545
  /* can't open existing */
17512
- return rc;
17546
+ return rc /* MDBX_ENOFILE */;
17513
17547
 
17514
17548
  /* auto-create directory if requested */
17515
17549
  const mdbx_mode_t dir_mode =
@@ -17702,7 +17736,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
17702
17736
  } else {
17703
17737
  #if MDBX_MMAP_INCOHERENT_FILE_WRITE
17704
17738
  /* Temporary `workaround` for OpenBSD kernel's flaw.
17705
- * See https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/67 */
17739
+ * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */
17706
17740
  if ((flags & MDBX_WRITEMAP) == 0) {
17707
17741
  if (flags & MDBX_ACCEDE)
17708
17742
  flags |= MDBX_WRITEMAP;
@@ -18014,7 +18048,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
18014
18048
  }
18015
18049
 
18016
18050
  if ((flags & MDBX_RDONLY) == 0) {
18017
- const size_t tsize = sizeof(MDBX_txn),
18051
+ const size_t tsize = sizeof(MDBX_txn) + sizeof(MDBX_cursor),
18018
18052
  size = tsize + env->me_maxdbs *
18019
18053
  (sizeof(MDBX_db) + sizeof(MDBX_cursor *) +
18020
18054
  sizeof(MDBX_atomic_uint32_t) + 1);
@@ -18139,9 +18173,10 @@ __cold static int env_close(MDBX_env *env) {
18139
18173
  }
18140
18174
 
18141
18175
  if (env->me_dbxs) {
18142
- for (size_t i = env->me_numdbs; --i >= CORE_DBS;)
18176
+ for (size_t i = CORE_DBS; i < env->me_numdbs; ++i)
18143
18177
  osal_free(env->me_dbxs[i].md_name.iov_base);
18144
18178
  osal_free(env->me_dbxs);
18179
+ env->me_numdbs = CORE_DBS;
18145
18180
  env->me_dbxs = nullptr;
18146
18181
  }
18147
18182
  if (env->me_pbuf) {
@@ -18164,7 +18199,7 @@ __cold static int env_close(MDBX_env *env) {
18164
18199
  dpl_free(env->me_txn0);
18165
18200
  txl_free(env->me_txn0->tw.lifo_reclaimed);
18166
18201
  pnl_free(env->me_txn0->tw.retired_pages);
18167
- pnl_free(env->me_txn0->tw.spill_pages);
18202
+ pnl_free(env->me_txn0->tw.spilled.list);
18168
18203
  pnl_free(env->me_txn0->tw.relist);
18169
18204
  osal_free(env->me_txn0);
18170
18205
  env->me_txn0 = nullptr;
@@ -18907,7 +18942,8 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
18907
18942
  if (!MDBX_DISABLE_VALIDATION) {
18908
18943
  const MDBX_env *env = mc->mc_txn->mt_env;
18909
18944
  const size_t dsize = data->iov_len;
18910
- if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax))
18945
+ if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax) &&
18946
+ mc->mc_dbi != FREE_DBI)
18911
18947
  poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
18912
18948
  const unsigned npages = number_of_ovpages(env, dsize);
18913
18949
  if (unlikely(lp.page->mp_pages != npages)) {
@@ -18915,7 +18951,7 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
18915
18951
  return bad_page(lp.page,
18916
18952
  "too less n-pages %u for bigdata-node (%zu bytes)",
18917
18953
  lp.page->mp_pages, dsize);
18918
- else
18954
+ else if (mc->mc_dbi != FREE_DBI)
18919
18955
  poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)",
18920
18956
  lp.page->mp_pages, dsize);
18921
18957
  }
@@ -20011,7 +20047,6 @@ static int touch_dbi(MDBX_cursor *mc) {
20011
20047
  *mc->mc_dbistate |= DBI_DIRTY;
20012
20048
  mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
20013
20049
  if (mc->mc_dbi >= CORE_DBS) {
20014
- cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_UPDATE_GC) == 0);
20015
20050
  /* Touch DB record of named DB */
20016
20051
  MDBX_cursor_couple cx;
20017
20052
  int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI);
@@ -20424,9 +20459,9 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
20424
20459
 
20425
20460
  /* Large/Overflow page overwrites need special handling */
20426
20461
  if (unlikely(node_flags(node) & F_BIGDATA)) {
20427
- int dpages = (node_size(key, data) > env->me_leaf_nodemax)
20428
- ? number_of_ovpages(env, data->iov_len)
20429
- : 0;
20462
+ const size_t dpages = (node_size(key, data) > env->me_leaf_nodemax)
20463
+ ? number_of_ovpages(env, data->iov_len)
20464
+ : 0;
20430
20465
 
20431
20466
  const pgno_t pgno = node_largedata_pgno(node);
20432
20467
  pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid);
@@ -20435,13 +20470,13 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
20435
20470
  cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW);
20436
20471
 
20437
20472
  /* Is the ov page from this txn (or a parent) and big enough? */
20438
- int ovpages = lp.page->mp_pages;
20439
- if (!IS_FROZEN(mc->mc_txn, lp.page) &&
20440
- (unlikely(mc->mc_txn->mt_flags & MDBX_TXN_FROZEN_RE)
20441
- ? (ovpages >= dpages)
20442
- : (ovpages ==
20443
- /* LY: add configurable threshold to keep reserve space */
20444
- dpages))) {
20473
+ const size_t ovpages = lp.page->mp_pages;
20474
+ const size_t extra_threshold =
20475
+ (mc->mc_dbi == FREE_DBI)
20476
+ ? 1
20477
+ : /* LY: add configurable threshold to keep reserve space */ 0;
20478
+ if (!IS_FROZEN(mc->mc_txn, lp.page) && ovpages >= dpages &&
20479
+ ovpages <= dpages + extra_threshold) {
20445
20480
  /* yes, overwrite it. */
20446
20481
  if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) {
20447
20482
  if (IS_SPILLED(mc->mc_txn, lp.page)) {
@@ -20972,7 +21007,6 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
20972
21007
 
20973
21008
  DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno);
20974
21009
  ret.page->mp_flags = (uint16_t)flags;
20975
- ret.page->mp_txnid = mc->mc_txn->mt_front;
20976
21010
  cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
20977
21011
  cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
20978
21012
  #if MDBX_ENABLE_PGOP_STAT
@@ -20994,25 +21028,24 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
20994
21028
  return ret;
20995
21029
  }
20996
21030
 
20997
- static pgr_t page_new_large(MDBX_cursor *mc, const unsigned npages) {
21031
+ static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) {
20998
21032
  pgr_t ret = likely(npages == 1)
20999
21033
  ? page_alloc(mc)
21000
- : page_alloc_slowpath(mc, npages, MDBX_ALLOC_ALL);
21034
+ : page_alloc_slowpath(mc, npages, MDBX_ALLOC_DEFAULT);
21001
21035
  if (unlikely(ret.err != MDBX_SUCCESS))
21002
21036
  return ret;
21003
21037
 
21004
- DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %u", mc->mc_dbi,
21038
+ DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi,
21005
21039
  ret.page->mp_pgno, npages);
21006
21040
  ret.page->mp_flags = P_OVERFLOW;
21007
- ret.page->mp_txnid = mc->mc_txn->mt_front;
21008
21041
  cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
21009
21042
  cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
21010
21043
  #if MDBX_ENABLE_PGOP_STAT
21011
21044
  mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages;
21012
21045
  #endif /* MDBX_ENABLE_PGOP_STAT */
21013
21046
 
21014
- mc->mc_db->md_overflow_pages += npages;
21015
- ret.page->mp_pages = npages;
21047
+ mc->mc_db->md_overflow_pages += (pgno_t)npages;
21048
+ ret.page->mp_pages = (pgno_t)npages;
21016
21049
  cASSERT(mc, !(mc->mc_flags & C_SUB));
21017
21050
  return ret;
21018
21051
  }
@@ -21109,7 +21142,6 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21109
21142
  key ? key->iov_len : 0, DKEY_DEBUG(key));
21110
21143
  cASSERT(mc, key != NULL && data != NULL);
21111
21144
  cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF);
21112
- cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
21113
21145
  MDBX_page *largepage = NULL;
21114
21146
 
21115
21147
  size_t node_bytes;
@@ -21118,6 +21150,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21118
21150
  STATIC_ASSERT(sizeof(pgno_t) % 2 == 0);
21119
21151
  node_bytes =
21120
21152
  node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
21153
+ cASSERT(mc, page_room(mp) >= node_bytes);
21121
21154
  } else if (unlikely(node_size(key, data) >
21122
21155
  mc->mc_txn->mt_env->me_leaf_nodemax)) {
21123
21156
  /* Put data on large/overflow page. */
@@ -21131,6 +21164,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21131
21164
  flags);
21132
21165
  return MDBX_PROBLEM;
21133
21166
  }
21167
+ cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
21134
21168
  const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len);
21135
21169
  const pgr_t npr = page_new_large(mc, ovpages);
21136
21170
  if (unlikely(npr.err != MDBX_SUCCESS))
@@ -21142,10 +21176,12 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21142
21176
  flags |= F_BIGDATA;
21143
21177
  node_bytes =
21144
21178
  node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
21179
+ cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
21145
21180
  } else {
21181
+ cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
21146
21182
  node_bytes = node_size(key, data) + sizeof(indx_t);
21183
+ cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
21147
21184
  }
21148
- cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
21149
21185
 
21150
21186
  /* Move higher pointers up one slot. */
21151
21187
  const size_t nkeys = page_numkeys(mp);
@@ -22886,7 +22922,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
22886
22922
  "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
22887
22923
  dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
22888
22924
  if (unlikely(node_size_len(node_ks(node), dsize) <=
22889
- mc->mc_txn->mt_env->me_leaf_nodemax))
22925
+ mc->mc_txn->mt_env->me_leaf_nodemax) &&
22926
+ mc->mc_dbi != FREE_DBI)
22890
22927
  poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
22891
22928
 
22892
22929
  if ((mc->mc_checking & CC_RETIRING) == 0) {
@@ -22901,7 +22938,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
22901
22938
  rc = bad_page(lp.page,
22902
22939
  "too less n-pages %u for bigdata-node (%zu bytes)",
22903
22940
  lp.page->mp_pages, dsize);
22904
- else
22941
+ else if (mc->mc_dbi != FREE_DBI)
22905
22942
  poor_page(lp.page,
22906
22943
  "extra n-pages %u for bigdata-node (%zu bytes)",
22907
22944
  lp.page->mp_pages, dsize);
@@ -23327,7 +23364,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
23327
23364
  int rc = MDBX_SUCCESS, foliage = 0;
23328
23365
  size_t i, ptop;
23329
23366
  MDBX_env *const env = mc->mc_txn->mt_env;
23330
- MDBX_val sepkey, rkey, xdata;
23367
+ MDBX_val rkey, xdata;
23331
23368
  MDBX_page *tmp_ki_copy = NULL;
23332
23369
  DKBUF;
23333
23370
 
@@ -23419,6 +23456,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
23419
23456
  eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1);
23420
23457
 
23421
23458
  cASSERT(mc, !IS_BRANCH(mp) || newindx > 0);
23459
+ MDBX_val sepkey = {nullptr, 0};
23422
23460
  /* It is reasonable and possible to split the page at the begin */
23423
23461
  if (unlikely(newindx < minkeys)) {
23424
23462
  split_indx = minkeys;
@@ -23751,7 +23789,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
23751
23789
  break;
23752
23790
  }
23753
23791
  }
23754
- } else if (!IS_LEAF2(mp)) {
23792
+ } else if (tmp_ki_copy /* !IS_LEAF2(mp) */) {
23755
23793
  /* Move nodes */
23756
23794
  mc->mc_pg[mc->mc_top] = sister;
23757
23795
  i = split_indx;
@@ -25053,7 +25091,7 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn,
25053
25091
  const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
25054
25092
 
25055
25093
  /* is the environment open?
25056
- * (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/171) */
25094
+ * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */
25057
25095
  if (unlikely(!env->me_map)) {
25058
25096
  /* environment not yet opened */
25059
25097
  #if 1
@@ -27864,7 +27902,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn,
27864
27902
  if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) {
27865
27903
  rss.rlim_cur = estimated_rss;
27866
27904
  if (rss.rlim_max < estimated_rss)
27867
- rss.rlim_max = used_range;
27905
+ rss.rlim_max = estimated_rss;
27868
27906
  if (setrlimit(RLIMIT_RSS, &rss)) {
27869
27907
  rc = errno;
27870
27908
  WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS",
@@ -29696,7 +29734,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
29696
29734
  flags |= O_CLOEXEC;
29697
29735
  #endif /* O_CLOEXEC */
29698
29736
 
29699
- /* Safeguard for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/144 */
29737
+ /* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
29700
29738
  #if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
29701
29739
  int stub_fd0 = -1, stub_fd1 = -1, stub_fd2 = -1;
29702
29740
  static const char dev_null[] = "/dev/null";
@@ -29734,7 +29772,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
29734
29772
  errno = EACCES /* restore errno if file exists */;
29735
29773
  }
29736
29774
 
29737
- /* Safeguard for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/144 */
29775
+ /* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
29738
29776
  #if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
29739
29777
  if (*fd == STDIN_FILENO) {
29740
29778
  WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "IN",
@@ -30091,10 +30129,15 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
30091
30129
  return (int)GetLastError();
30092
30130
  #else
30093
30131
  #if defined(__linux__) || defined(__gnu_linux__)
30094
- assert(linux_kernel_version > 0x02061300);
30095
30132
  /* Since Linux 2.6.19, MS_ASYNC is in fact a no-op. The kernel properly
30096
- * tracks dirty pages and flushes them to storage as necessary. */
30097
- return MDBX_SUCCESS;
30133
+ * tracks dirty pages and flushes ones as necessary. */
30134
+ //
30135
+ // However, this behavior may be changed in custom kernels,
30136
+ // so just leave such optimization to the libc discretion.
30137
+ //
30138
+ // assert(linux_kernel_version > 0x02061300);
30139
+ // if (mode_bits == MDBX_SYNC_NONE)
30140
+ // return MDBX_SUCCESS;
30098
30141
  #endif /* Linux */
30099
30142
  if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
30100
30143
  return errno;
@@ -30577,7 +30620,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
30577
30620
  VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
30578
30621
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30579
30622
  * when this memory will re-used by malloc or another mmapping.
30580
- * See https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
30623
+ * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30581
30624
  */
30582
30625
  MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address,
30583
30626
  (map->filesize && map->filesize < map->limit)
@@ -30656,7 +30699,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
30656
30699
 
30657
30700
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30658
30701
  * when this memory will re-used by malloc or another mmapping.
30659
- * See https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
30702
+ * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30660
30703
  */
30661
30704
  MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
30662
30705
  status = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
@@ -30937,7 +30980,7 @@ retry_mapview:;
30937
30980
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30938
30981
  * when this memory will re-used by malloc or another mmapping.
30939
30982
  * See
30940
- * https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
30983
+ * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30941
30984
  */
30942
30985
  MDBX_ASAN_UNPOISON_MEMORY_REGION(
30943
30986
  map->address,
@@ -30959,7 +31002,7 @@ retry_mapview:;
30959
31002
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30960
31003
  * when this memory will re-used by malloc or another mmapping.
30961
31004
  * See
30962
- * https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
31005
+ * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30963
31006
  */
30964
31007
  MDBX_ASAN_UNPOISON_MEMORY_REGION(
30965
31008
  map->address, (map->current < map->limit) ? map->current : map->limit);
@@ -31782,9 +31825,9 @@ __dll_export
31782
31825
  0,
31783
31826
  12,
31784
31827
  2,
31785
- 0,
31786
- {"2022-11-11T17:35:32+03:00", "cd8aa216aff5c70b45bd3afd46d417a95126dcc3", "9b062cf0c7d41297f756c7f7b897ed981022bdbf",
31787
- "v0.12.2-0-g9b062cf0"},
31828
+ 18,
31829
+ {"2022-11-28T15:45:29+03:00", "9558651eb24ab172a73a7bc6149cadad4c4df990", "b3248442962cfdda728656d6d9085147a7d42b63",
31830
+ "v0.12.2-18-gb3248442"},
31788
31831
  sourcery};
31789
31832
 
31790
31833
  __dll_export