isomorfeus-ferret 0.15.0 → 0.16.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,7 +12,7 @@
12
12
  * <http://www.OpenLDAP.org/license.html>. */
13
13
 
14
14
  #define xMDBX_ALLOY 1
15
- #define MDBX_BUILD_SOURCERY e17be563de6f6f85e208ded5aacc1387bc0addf6ce5540c99d0d15db2c3e8edd_v0_12_2_0_g9b062cf0
15
+ #define MDBX_BUILD_SOURCERY 56f8a04f0668bb80d0d3f24fd2c9958d9aeb83004b65badfd5ccfa80647a2218_v0_12_2_18_gb3248442
16
16
  #ifdef MDBX_CONFIG_H
17
17
  #include MDBX_CONFIG_H
18
18
  #endif
@@ -428,14 +428,13 @@ __extern_C key_t ftok(const char *, int);
428
428
  /* Byteorder */
429
429
 
430
430
  #if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
431
- defined(i486) || defined(__i486) || defined(__i486__) || \
432
- defined(i586) || defined(__i586) || defined(__i586__) || \
433
- defined(i686) || defined(__i686) || defined(__i686__) || \
434
- defined(_M_IX86) || defined(_X86_) || defined(__I86__) || \
435
- defined(__THW_INTEL__) || defined(__INTEL__) || \
436
- defined(__x86_64) || defined(__x86_64__) || \
431
+ defined(i486) || defined(__i486) || defined(__i486__) || defined(i586) || \
432
+ defined(__i586) || defined(__i586__) || defined(i686) || \
433
+ defined(__i686) || defined(__i686__) || defined(_M_IX86) || \
434
+ defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \
435
+ defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \
437
436
  defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
438
- defined(_M_AMD64) || defined(__IA32__)
437
+ defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
439
438
  #ifndef __ia32__
440
439
  /* LY: define neutral __ia32__ for x86 and x86-64 */
441
440
  #define __ia32__ 1
@@ -3138,13 +3137,9 @@ struct MDBX_txn {
3138
3137
  /* Additional flag for sync_locked() */
3139
3138
  #define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
3140
3139
 
3141
- #define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
3142
- #define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
3143
-
3144
3140
  #define TXN_FLAGS \
3145
3141
  (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
3146
- MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
3147
- MDBX_TXN_FROZEN_RE)
3142
+ MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
3148
3143
 
3149
3144
  #if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
3150
3145
  ((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
@@ -3226,11 +3221,16 @@ struct MDBX_txn {
3226
3221
  MDBX_page *loose_pages;
3227
3222
  /* Number of loose pages (tw.loose_pages) */
3228
3223
  size_t loose_count;
3229
- size_t spill_least_removed;
3230
- /* The sorted list of dirty pages we temporarily wrote to disk
3231
- * because the dirty list was full. page numbers in here are
3232
- * shifted left by 1, deleted slots have the LSB set. */
3233
- MDBX_PNL spill_pages;
3224
+ union {
3225
+ struct {
3226
+ size_t least_removed;
3227
+ /* The sorted list of dirty pages we temporarily wrote to disk
3228
+ * because the dirty list was full. page numbers in here are
3229
+ * shifted left by 1, deleted slots have the LSB set. */
3230
+ MDBX_PNL list;
3231
+ } spilled;
3232
+ size_t writemap_dirty_npages;
3233
+ };
3234
3234
  } tw;
3235
3235
  };
3236
3236
  };
@@ -3280,6 +3280,9 @@ struct MDBX_cursor {
3280
3280
  #define C_SUB 0x04 /* Cursor is a sub-cursor */
3281
3281
  #define C_DEL 0x08 /* last op was a cursor_del */
3282
3282
  #define C_UNTRACK 0x10 /* Un-track cursor when closing */
3283
+ #define C_GCU \
3284
+ 0x20 /* Происходит подготовка к обновлению GC, поэтому \
3285
+ * можно брать страницы из GC даже для FREE_DBI */
3283
3286
  uint8_t mc_flags;
3284
3287
 
3285
3288
  /* Cursor checking flags. */
@@ -4643,7 +4646,7 @@ __cold static const char *pagetype_caption(const uint8_t type,
4643
4646
  }
4644
4647
  }
4645
4648
 
4646
- __cold static __must_check_result int MDBX_PRINTF_ARGS(2, 3)
4649
+ __cold static int MDBX_PRINTF_ARGS(2, 3)
4647
4650
  bad_page(const MDBX_page *mp, const char *fmt, ...) {
4648
4651
  if (LOG_ENABLED(MDBX_LOG_ERROR)) {
4649
4652
  static const MDBX_page *prev;
@@ -5257,7 +5260,7 @@ __cold void thread_dtor(void *rthc) {
5257
5260
  if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) {
5258
5261
  TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(),
5259
5262
  __Wpedantic_format_voidptr(reader));
5260
- atomic_cas32(&reader->mr_pid, self_pid, 0);
5263
+ (void)atomic_cas32(&reader->mr_pid, self_pid, 0);
5261
5264
  }
5262
5265
  }
5263
5266
 
@@ -6346,50 +6349,51 @@ __hot static size_t pnl_merge(MDBX_PNL dst, const MDBX_PNL src) {
6346
6349
  return total;
6347
6350
  }
6348
6351
 
6349
- static void spill_remove(MDBX_txn *txn, size_t idx, pgno_t npages) {
6350
- tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spill_pages) &&
6351
- txn->tw.spill_least_removed > 0);
6352
- txn->tw.spill_least_removed =
6353
- (idx < txn->tw.spill_least_removed) ? idx : txn->tw.spill_least_removed;
6354
- txn->tw.spill_pages[idx] |= 1;
6355
- MDBX_PNL_SETSIZE(txn->tw.spill_pages,
6356
- MDBX_PNL_GETSIZE(txn->tw.spill_pages) -
6357
- (idx == MDBX_PNL_GETSIZE(txn->tw.spill_pages)));
6352
+ static void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) {
6353
+ tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) &&
6354
+ txn->tw.spilled.least_removed > 0);
6355
+ txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
6356
+ ? idx
6357
+ : txn->tw.spilled.least_removed;
6358
+ txn->tw.spilled.list[idx] |= 1;
6359
+ MDBX_PNL_SETSIZE(txn->tw.spilled.list,
6360
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
6361
+ (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
6358
6362
 
6359
6363
  while (unlikely(npages > 1)) {
6360
- const pgno_t pgno = (txn->tw.spill_pages[idx] >> 1) + 1;
6364
+ const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1;
6361
6365
  if (MDBX_PNL_ASCENDING) {
6362
- if (++idx > MDBX_PNL_GETSIZE(txn->tw.spill_pages) ||
6363
- (txn->tw.spill_pages[idx] >> 1) != pgno)
6366
+ if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) ||
6367
+ (txn->tw.spilled.list[idx] >> 1) != pgno)
6364
6368
  return;
6365
6369
  } else {
6366
- if (--idx < 1 || (txn->tw.spill_pages[idx] >> 1) != pgno)
6370
+ if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno)
6367
6371
  return;
6368
- txn->tw.spill_least_removed = (idx < txn->tw.spill_least_removed)
6369
- ? idx
6370
- : txn->tw.spill_least_removed;
6371
- }
6372
- txn->tw.spill_pages[idx] |= 1;
6373
- MDBX_PNL_SETSIZE(txn->tw.spill_pages,
6374
- MDBX_PNL_GETSIZE(txn->tw.spill_pages) -
6375
- (idx == MDBX_PNL_GETSIZE(txn->tw.spill_pages)));
6372
+ txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
6373
+ ? idx
6374
+ : txn->tw.spilled.least_removed;
6375
+ }
6376
+ txn->tw.spilled.list[idx] |= 1;
6377
+ MDBX_PNL_SETSIZE(txn->tw.spilled.list,
6378
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
6379
+ (idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
6376
6380
  --npages;
6377
6381
  }
6378
6382
  }
6379
6383
 
6380
6384
  static MDBX_PNL spill_purge(MDBX_txn *txn) {
6381
- tASSERT(txn, txn->tw.spill_least_removed > 0);
6382
- const MDBX_PNL sl = txn->tw.spill_pages;
6383
- if (txn->tw.spill_least_removed != INT_MAX) {
6385
+ tASSERT(txn, txn->tw.spilled.least_removed > 0);
6386
+ const MDBX_PNL sl = txn->tw.spilled.list;
6387
+ if (txn->tw.spilled.least_removed != INT_MAX) {
6384
6388
  size_t len = MDBX_PNL_GETSIZE(sl), r, w;
6385
- for (w = r = txn->tw.spill_least_removed; r <= len; ++r) {
6389
+ for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) {
6386
6390
  sl[w] = sl[r];
6387
6391
  w += 1 - (sl[r] & 1);
6388
6392
  }
6389
6393
  for (size_t i = 1; i < w; ++i)
6390
6394
  tASSERT(txn, (sl[i] & 1) == 0);
6391
6395
  MDBX_PNL_SETSIZE(sl, w - 1);
6392
- txn->tw.spill_least_removed = INT_MAX;
6396
+ txn->tw.spilled.least_removed = INT_MAX;
6393
6397
  } else {
6394
6398
  for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i)
6395
6399
  tASSERT(txn, (sl[i] & 1) == 0);
@@ -6445,7 +6449,8 @@ static __inline size_t pnl_search(const MDBX_PNL pnl, pgno_t pgno,
6445
6449
  }
6446
6450
 
6447
6451
  static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
6448
- const MDBX_PNL pnl = txn->tw.spill_pages;
6452
+ tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6453
+ const MDBX_PNL pnl = txn->tw.spilled.list;
6449
6454
  if (likely(!pnl))
6450
6455
  return 0;
6451
6456
  pgno <<= 1;
@@ -6454,8 +6459,8 @@ static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
6454
6459
  }
6455
6460
 
6456
6461
  static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
6457
- pgno_t npages) {
6458
- const MDBX_PNL pnl = txn->tw.spill_pages;
6462
+ size_t npages) {
6463
+ const MDBX_PNL pnl = txn->tw.spilled.list;
6459
6464
  if (likely(!pnl))
6460
6465
  return false;
6461
6466
  const size_t len = MDBX_PNL_GETSIZE(pnl);
@@ -6467,7 +6472,7 @@ static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
6467
6472
  DEBUG_EXTRA_PRINT("%s\n", "]");
6468
6473
  }
6469
6474
  const pgno_t spilled_range_begin = pgno << 1;
6470
- const pgno_t spilled_range_last = ((pgno + npages) << 1) - 1;
6475
+ const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1;
6471
6476
  #if MDBX_PNL_ASCENDING
6472
6477
  const size_t n =
6473
6478
  pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1);
@@ -6831,7 +6836,7 @@ dpl_endpgno(const MDBX_dpl *dl, size_t i) {
6831
6836
  }
6832
6837
 
6833
6838
  static __inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno,
6834
- pgno_t npages) {
6839
+ size_t npages) {
6835
6840
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
6836
6841
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6837
6842
 
@@ -6889,7 +6894,7 @@ MDBX_MAYBE_UNUSED static const MDBX_page *debug_dpl_find(const MDBX_txn *txn,
6889
6894
  return nullptr;
6890
6895
  }
6891
6896
 
6892
- static void dpl_remove_ex(const MDBX_txn *txn, size_t i, pgno_t npages) {
6897
+ static void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) {
6893
6898
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
6894
6899
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6895
6900
 
@@ -6911,7 +6916,7 @@ static void dpl_remove(const MDBX_txn *txn, size_t i) {
6911
6916
  static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
6912
6917
  pgno_t pgno,
6913
6918
  MDBX_page *page,
6914
- pgno_t npages) {
6919
+ size_t npages) {
6915
6920
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
6916
6921
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
6917
6922
  MDBX_dpl *dl = txn->tw.dirtylist;
@@ -6980,7 +6985,7 @@ static __must_check_result __inline int page_retire(MDBX_cursor *mc,
6980
6985
  MDBX_page *mp);
6981
6986
 
6982
6987
  static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
6983
- pgno_t npages);
6988
+ size_t npages);
6984
6989
  typedef struct page_result {
6985
6990
  MDBX_page *page;
6986
6991
  int err;
@@ -6989,7 +6994,7 @@ typedef struct page_result {
6989
6994
  static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard);
6990
6995
 
6991
6996
  static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
6992
- static pgr_t page_new_large(MDBX_cursor *mc, const pgno_t npages);
6997
+ static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
6993
6998
  static int page_touch(MDBX_cursor *mc);
6994
6999
  static int cursor_touch(MDBX_cursor *mc);
6995
7000
  static int touch_dbi(MDBX_cursor *mc);
@@ -7588,7 +7593,7 @@ static MDBX_page *page_malloc(MDBX_txn *txn, size_t num) {
7588
7593
  }
7589
7594
 
7590
7595
  /* Free a shadow dirty page */
7591
- static void dpage_free(MDBX_env *env, MDBX_page *dp, pgno_t npages) {
7596
+ static void dpage_free(MDBX_env *env, MDBX_page *dp, size_t npages) {
7592
7597
  VALGRIND_MAKE_MEM_UNDEFINED(dp, pgno2bytes(env, npages));
7593
7598
  MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages));
7594
7599
  if (unlikely(env->me_flags & MDBX_PAGEPERTURB))
@@ -7910,7 +7915,7 @@ static bool txn_refund(MDBX_txn *txn) {
7910
7915
  if (before == txn->mt_next_pgno)
7911
7916
  return false;
7912
7917
 
7913
- if (txn->tw.spill_pages)
7918
+ if (txn->tw.spilled.list)
7914
7919
  /* Squash deleted pagenums if we refunded any */
7915
7920
  spill_purge(txn);
7916
7921
 
@@ -7925,9 +7930,9 @@ static __inline bool txn_refund(MDBX_txn *txn) {
7925
7930
  #endif /* MDBX_ENABLE_REFUND */
7926
7931
 
7927
7932
  __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
7928
- pgno_t npages) {
7933
+ size_t npages) {
7929
7934
  MDBX_env *const env = txn->mt_env;
7930
- DEBUG("kill %u page(s) %" PRIaPGNO, npages, pgno);
7935
+ DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno);
7931
7936
  eASSERT(env, pgno >= NUM_METAS && npages);
7932
7937
  if (!IS_FROZEN(txn, mp)) {
7933
7938
  const size_t bytes = pgno2bytes(env, npages);
@@ -7954,7 +7959,7 @@ __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
7954
7959
 
7955
7960
  /* Remove page from dirty list */
7956
7961
  static __inline void page_wash(MDBX_txn *txn, const size_t di,
7957
- MDBX_page *const mp, const pgno_t npages) {
7962
+ MDBX_page *const mp, const size_t npages) {
7958
7963
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
7959
7964
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
7960
7965
  tASSERT(txn, di && di <= txn->tw.dirtylist->length &&
@@ -8003,7 +8008,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
8003
8008
  * So for flexibility and avoid extra internal dependencies we just
8004
8009
  * fallback to reading if dirty list was not allocated yet. */
8005
8010
  size_t di = 0, si = 0;
8006
- pgno_t npages = 1;
8011
+ size_t npages = 1;
8007
8012
  bool is_frozen = false, is_spilled = false, is_shadowed = false;
8008
8013
  if (unlikely(!mp)) {
8009
8014
  if (ASSERT_ENABLED() && pageflags) {
@@ -8019,7 +8024,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
8019
8024
  is_frozen = true;
8020
8025
  if (ASSERT_ENABLED()) {
8021
8026
  for (MDBX_txn *scan = txn; scan; scan = scan->mt_parent) {
8022
- tASSERT(txn, !search_spilled(scan, pgno));
8027
+ tASSERT(txn, !txn->tw.spilled.list || !search_spilled(scan, pgno));
8023
8028
  tASSERT(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno));
8024
8029
  }
8025
8030
  }
@@ -8064,7 +8069,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
8064
8069
  is_shadowed = IS_SHADOWED(txn, mp);
8065
8070
  if (is_dirty) {
8066
8071
  tASSERT(txn, !is_spilled);
8067
- tASSERT(txn, !search_spilled(txn, pgno));
8072
+ tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno));
8068
8073
  tASSERT(txn, debug_dpl_find(txn, pgno) == mp || txn->mt_parent ||
8069
8074
  (txn->mt_flags & MDBX_WRITEMAP));
8070
8075
  } else {
@@ -8098,12 +8103,12 @@ status_done:
8098
8103
  } else {
8099
8104
  npages = mp->mp_pages;
8100
8105
  cASSERT(mc, mc->mc_db->md_overflow_pages >= npages);
8101
- mc->mc_db->md_overflow_pages -= npages;
8106
+ mc->mc_db->md_overflow_pages -= (pgno_t)npages;
8102
8107
  }
8103
8108
 
8104
8109
  if (is_frozen) {
8105
8110
  retire:
8106
- DEBUG("retire %u page %" PRIaPGNO, npages, pgno);
8111
+ DEBUG("retire %zu page %" PRIaPGNO, npages, pgno);
8107
8112
  rc = pnl_append_range(false, &txn->tw.retired_pages, pgno, npages);
8108
8113
  tASSERT(txn, dirtylist_check(txn));
8109
8114
  return rc;
@@ -8154,7 +8159,7 @@ status_done:
8154
8159
  }
8155
8160
  tASSERT(txn, is_spilled || is_shadowed || (mp && IS_SHADOWED(txn, mp)));
8156
8161
  }
8157
- DEBUG("refunded %u %s page %" PRIaPGNO, npages, kind, pgno);
8162
+ DEBUG("refunded %zu %s page %" PRIaPGNO, npages, kind, pgno);
8158
8163
  txn->mt_next_pgno = pgno;
8159
8164
  txn_refund(txn);
8160
8165
  return MDBX_SUCCESS;
@@ -8223,7 +8228,7 @@ status_done:
8223
8228
  page_wash(txn, di, mp, npages);
8224
8229
 
8225
8230
  reclaim:
8226
- DEBUG("reclaim %u %s page %" PRIaPGNO, npages, "dirty", pgno);
8231
+ DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno);
8227
8232
  rc = pnl_insert_range(&txn->tw.relist, pgno, npages);
8228
8233
  tASSERT(txn, pnl_check_allocated(txn->tw.relist,
8229
8234
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
@@ -8330,7 +8335,7 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
8330
8335
  osal_flush_incoherent_mmap(env->me_map + offset, bytes, env->me_os_psize);
8331
8336
  const MDBX_page *const rp = (const MDBX_page *)(env->me_map + offset);
8332
8337
  /* check with timeout as the workaround
8333
- * for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269 */
8338
+ * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
8334
8339
  if (unlikely(memcmp(wp, rp, bytes))) {
8335
8340
  ctx->coherency_timestamp = 0;
8336
8341
  WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->mp_pgno,
@@ -8351,11 +8356,12 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
8351
8356
  do {
8352
8357
  eASSERT(env, wp->mp_pgno == bytes2pgno(env, offset));
8353
8358
  eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0);
8354
- unsigned npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
8359
+ size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
8355
8360
  size_t chunk = pgno2bytes(env, npages);
8356
8361
  eASSERT(env, bytes >= chunk);
8362
+ MDBX_page *next = (MDBX_page *)((char *)wp + chunk);
8357
8363
  dpage_free(env, wp, npages);
8358
- wp = (MDBX_page *)((char *)wp + chunk);
8364
+ wp = next;
8359
8365
  offset += chunk;
8360
8366
  bytes -= chunk;
8361
8367
  } while (bytes);
@@ -8384,7 +8390,7 @@ __must_check_result static int iov_write(iov_ctx_t *ctx) {
8384
8390
  }
8385
8391
 
8386
8392
  __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
8387
- MDBX_page *dp, pgno_t npages) {
8393
+ MDBX_page *dp, size_t npages) {
8388
8394
  MDBX_env *const env = txn->mt_env;
8389
8395
  tASSERT(txn, ctx->err == MDBX_SUCCESS);
8390
8396
  tASSERT(txn, dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno);
@@ -8428,16 +8434,16 @@ __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
8428
8434
  #if MDBX_NEED_WRITTEN_RANGE
8429
8435
  ctx->flush_begin =
8430
8436
  (ctx->flush_begin < dp->mp_pgno) ? ctx->flush_begin : dp->mp_pgno;
8431
- ctx->flush_end = (ctx->flush_end > dp->mp_pgno + npages)
8437
+ ctx->flush_end = (ctx->flush_end > dp->mp_pgno + (pgno_t)npages)
8432
8438
  ? ctx->flush_end
8433
- : dp->mp_pgno + npages;
8439
+ : dp->mp_pgno + (pgno_t)npages;
8434
8440
  #endif /* MDBX_NEED_WRITTEN_RANGE */
8435
8441
  env->me_lck->mti_unsynced_pages.weak += npages;
8436
8442
  return MDBX_SUCCESS;
8437
8443
  }
8438
8444
 
8439
8445
  static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
8440
- const pgno_t npages) {
8446
+ const size_t npages) {
8441
8447
  tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP) || MDBX_AVOID_MSYNC);
8442
8448
  #if MDBX_ENABLE_PGOP_STAT
8443
8449
  txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages;
@@ -8446,7 +8452,7 @@ static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
8446
8452
  int err = iov_page(txn, ctx, dp, npages);
8447
8453
  if (likely(err == MDBX_SUCCESS) &&
8448
8454
  (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)))
8449
- err = pnl_append_range(true, &txn->tw.spill_pages, pgno << 1, npages);
8455
+ err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages);
8450
8456
  return err;
8451
8457
  }
8452
8458
 
@@ -8496,16 +8502,16 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8496
8502
  const uint32_t reciprocal) {
8497
8503
  MDBX_dpl *const dl = txn->tw.dirtylist;
8498
8504
  const uint32_t age = dpl_age(txn, i);
8499
- const unsigned npages = dpl_npages(dl, i);
8505
+ const size_t npages = dpl_npages(dl, i);
8500
8506
  const pgno_t pgno = dl->items[i].pgno;
8501
8507
  if (age == 0) {
8502
- DEBUG("skip %s %u page %" PRIaPGNO, "keep", npages, pgno);
8508
+ DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno);
8503
8509
  return 256;
8504
8510
  }
8505
8511
 
8506
8512
  MDBX_page *const dp = dl->items[i].ptr;
8507
8513
  if (dp->mp_flags & (P_LOOSE | P_SPILLED)) {
8508
- DEBUG("skip %s %u page %" PRIaPGNO,
8514
+ DEBUG("skip %s %zu page %" PRIaPGNO,
8509
8515
  (dp->mp_flags & P_LOOSE) ? "loose"
8510
8516
  : (dp->mp_flags & P_LOOSE) ? "loose"
8511
8517
  : "parent-spilled",
@@ -8519,7 +8525,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8519
8525
  if (parent && (parent->mt_flags & MDBX_TXN_SPILLS)) {
8520
8526
  do
8521
8527
  if (intersect_spilled(parent, pgno, npages)) {
8522
- DEBUG("skip-2 parent-spilled %u page %" PRIaPGNO, npages, pgno);
8528
+ DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno);
8523
8529
  dp->mp_flags |= P_SPILLED;
8524
8530
  return 256;
8525
8531
  }
@@ -8533,7 +8539,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8533
8539
  return prio = 256 - prio;
8534
8540
 
8535
8541
  /* make a large/overflow pages be likely to spill */
8536
- uint32_t factor = npages | npages >> 1;
8542
+ size_t factor = npages | npages >> 1;
8537
8543
  factor |= factor >> 2;
8538
8544
  factor |= factor >> 4;
8539
8545
  factor |= factor >> 8;
@@ -8541,7 +8547,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
8541
8547
  factor = prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
8542
8548
  factor = (factor < 256) ? 255 - factor : 0;
8543
8549
  tASSERT(txn, factor < 256 && factor < (256 - prio));
8544
- return prio = factor;
8550
+ return prio = (unsigned)factor;
8545
8551
  }
8546
8552
 
8547
8553
  /* Spill pages from the dirty list back to disk.
@@ -8645,7 +8651,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8645
8651
  if (txn->mt_flags & MDBX_WRITEMAP) {
8646
8652
  NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
8647
8653
  dirty_entries, dirty_npages);
8648
- tASSERT(txn, txn->tw.spill_pages == nullptr);
8654
+ tASSERT(txn, txn->tw.spilled.list == nullptr);
8649
8655
  const MDBX_env *env = txn->mt_env;
8650
8656
  rc =
8651
8657
  osal_msync(&txn->mt_env->me_dxb_mmap, 0,
@@ -8669,10 +8675,10 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8669
8675
  tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
8670
8676
  need_spill_npages);
8671
8677
  if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
8672
- if (!txn->tw.spill_pages) {
8673
- txn->tw.spill_least_removed = INT_MAX;
8674
- txn->tw.spill_pages = pnl_alloc(need_spill);
8675
- if (unlikely(!txn->tw.spill_pages)) {
8678
+ if (!txn->tw.spilled.list) {
8679
+ txn->tw.spilled.least_removed = INT_MAX;
8680
+ txn->tw.spilled.list = pnl_alloc(need_spill);
8681
+ if (unlikely(!txn->tw.spilled.list)) {
8676
8682
  rc = MDBX_ENOMEM;
8677
8683
  bailout:
8678
8684
  txn->mt_flags |= MDBX_TXN_ERROR;
@@ -8681,7 +8687,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8681
8687
  } else {
8682
8688
  /* purge deleted slots */
8683
8689
  spill_purge(txn);
8684
- rc = pnl_reserve(&txn->tw.spill_pages, need_spill);
8690
+ rc = pnl_reserve(&txn->tw.spilled.list, need_spill);
8685
8691
  (void)rc /* ignore since the resulting list may be shorter
8686
8692
  and pnl_append() will increase pnl on demand */
8687
8693
  ;
@@ -8865,7 +8871,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
8865
8871
  goto bailout;
8866
8872
 
8867
8873
  if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
8868
- pnl_sort(txn->tw.spill_pages, (size_t)txn->mt_next_pgno << 1);
8874
+ pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1);
8869
8875
  txn->mt_flags |= MDBX_TXN_SPILLS;
8870
8876
  }
8871
8877
  NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
@@ -9279,6 +9285,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) {
9279
9285
  MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
9280
9286
  if (unlikely(lck == NULL /* exclusive without-lck mode */)) {
9281
9287
  eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub);
9288
+ env->me_lck->mti_readers_refresh_flag.weak = nothing_changed;
9282
9289
  return env->me_lck->mti_oldest_reader.weak = steady;
9283
9290
  }
9284
9291
 
@@ -9367,10 +9374,13 @@ __cold static pgno_t find_largest_snapshot(const MDBX_env *env,
9367
9374
 
9368
9375
  /* Add a page to the txn's dirty list */
9369
9376
  __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
9370
- pgno_t npages) {
9377
+ size_t npages) {
9371
9378
  tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
9379
+ mp->mp_txnid = txn->mt_front;
9372
9380
  if (!txn->tw.dirtylist) {
9373
9381
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
9382
+ txn->tw.writemap_dirty_npages += npages;
9383
+ tASSERT(txn, txn->tw.spilled.list == nullptr);
9374
9384
  return MDBX_SUCCESS;
9375
9385
  }
9376
9386
  tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
@@ -9383,7 +9393,6 @@ __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
9383
9393
  #endif /* xMDBX_DEBUG_SPILLING == 2 */
9384
9394
 
9385
9395
  int rc;
9386
- mp->mp_txnid = txn->mt_front;
9387
9396
  if (unlikely(txn->tw.dirtyroom == 0)) {
9388
9397
  if (txn->tw.loose_count) {
9389
9398
  MDBX_page *loose = txn->tw.loose_pages;
@@ -10093,6 +10102,8 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) {
10093
10102
  }
10094
10103
  #endif /* _MSC_VER */
10095
10104
 
10105
+ #if !MDBX_PNL_ASCENDING
10106
+
10096
10107
  #if !defined(MDBX_ATTRIBUTE_TARGET) && \
10097
10108
  (__has_attribute(__target__) || __GNUC_PREREQ(5, 0))
10098
10109
  #define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
@@ -10406,6 +10417,8 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len,
10406
10417
  /* Choosing of another variants should be added here. */
10407
10418
  #endif /* scan4seq_default */
10408
10419
 
10420
+ #endif /* MDBX_PNL_ASCENDING */
10421
+
10409
10422
  #ifndef scan4seq_default
10410
10423
  #define scan4seq_default scan4seq_fallback
10411
10424
  #endif /* scan4seq_default */
@@ -10469,45 +10482,39 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len,
10469
10482
  *
10470
10483
  * Returns 0 on success, non-zero on failure.*/
10471
10484
 
10472
- #define MDBX_ALLOC_GC 1
10473
- #define MDBX_ALLOC_NEW 2
10474
- #define MDBX_ALLOC_COALESCE 4
10475
- #define MDBX_ALLOC_SLOT 8
10476
- #define MDBX_ALLOC_RESERVE 16
10477
- #define MDBX_ALLOC_BACKLOG 32
10478
- #define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
10479
- #define MDBX_ALLOC_LIFO 128
10485
+ #define MDBX_ALLOC_DEFAULT 0
10486
+ #define MDBX_ALLOC_RESERVE 1
10487
+ #define MDBX_ALLOC_UNIMPORTANT 2
10488
+ #define MDBX_ALLOC_COALESCE 4 /* внутреннее состояние */
10489
+ #define MDBX_ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
10490
+ #define MDBX_ALLOC_LIFO 16 /* внутреннее состояние */
10480
10491
 
10481
- static __inline bool is_gc_usable(const MDBX_txn *txn) {
10492
+ static __inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc,
10493
+ const uint8_t flags) {
10482
10494
  /* If txn is updating the GC, then the retired-list cannot play catch-up with
10483
10495
  * itself by growing while trying to save it. */
10484
- if (txn->mt_flags & (MDBX_TXN_UPDATE_GC | MDBX_TXN_FROZEN_RE))
10496
+ if (mc->mc_dbi == FREE_DBI && !(flags & MDBX_ALLOC_RESERVE) &&
10497
+ !(mc->mc_flags & C_GCU))
10485
10498
  return false;
10486
10499
 
10487
10500
  /* avoid (recursive) search inside empty tree and while tree is
10488
- updating, https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/31 */
10501
+ updating, https://libmdbx.dqdkfa.ru/dead-github/issues/31 */
10489
10502
  if (txn->mt_dbs[FREE_DBI].md_entries == 0)
10490
10503
  return false;
10491
10504
 
10492
- /* If our dirty list is already full, we can't touch GC */
10493
- if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
10494
- !(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY))
10495
- return false;
10496
-
10497
10505
  return true;
10498
10506
  }
10499
10507
 
10500
- static int gc_cursor_init(MDBX_cursor *mc, MDBX_txn *txn) {
10501
- if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) {
10502
- ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB",
10503
- txn->mt_dbs[FREE_DBI].md_flags);
10504
- return MDBX_CORRUPTED;
10505
- }
10506
- return cursor_init(mc, txn, FREE_DBI);
10508
+ __hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) {
10509
+ const size_t len = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed);
10510
+ for (size_t i = 1; i <= len; ++i)
10511
+ if (txn->tw.lifo_reclaimed[i] == id)
10512
+ return true;
10513
+ return false;
10507
10514
  }
10508
10515
 
10509
10516
  static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
10510
- char flags) {
10517
+ uint8_t flags) {
10511
10518
  #if MDBX_ENABLE_PROFGC
10512
10519
  const uint64_t monotime_before = osal_monotime();
10513
10520
  size_t majflt_before;
@@ -10525,21 +10532,13 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
10525
10532
  prof->spe_counter += 1;
10526
10533
  #endif /* MDBX_ENABLE_PROFGC */
10527
10534
 
10528
- eASSERT(env, num == 0 || !(flags & MDBX_ALLOC_SLOT));
10529
- eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
10530
- eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
10531
- MDBX_ALLOC_BACKLOG)) == 0 ||
10532
- (flags & MDBX_ALLOC_GC));
10533
- eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
10534
- MDBX_ALLOC_BACKLOG)) == 0 ||
10535
- (flags & MDBX_ALLOC_NEW) == 0);
10535
+ eASSERT(env, num > 0 || (flags & MDBX_ALLOC_RESERVE));
10536
10536
  eASSERT(env, pnl_check_allocated(txn->tw.relist,
10537
10537
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10538
10538
 
10539
10539
  pgno_t pgno = 0, *range = nullptr;
10540
- size_t re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10540
+ size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10541
10541
  if (num > 1) {
10542
- eASSERT(env, !(flags & MDBX_ALLOC_SLOT));
10543
10542
  #if MDBX_ENABLE_PROFGC
10544
10543
  prof->xpages += 1;
10545
10544
  #endif /* MDBX_ENABLE_PROFGC */
@@ -10555,347 +10554,363 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
10555
10554
  }
10556
10555
  }
10557
10556
  } else {
10558
- eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) ||
10559
- MDBX_PNL_GETSIZE(txn->tw.relist) == 0);
10557
+ eASSERT(env, num == 0 || re_len == 0);
10560
10558
  }
10561
10559
 
10562
10560
  //---------------------------------------------------------------------------
10563
10561
 
10564
- if (likely(flags & MDBX_ALLOC_GC)) {
10565
- if (unlikely(!is_gc_usable(txn)))
10566
- goto no_gc;
10562
+ if (unlikely(!is_gc_usable(txn, mc, flags)))
10563
+ goto no_gc;
10567
10564
 
10568
- eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO)) == 0);
10569
- flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
10565
+ eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
10566
+ MDBX_ALLOC_SHOULD_SCAN)) == 0);
10567
+ flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
10570
10568
 
10571
- const unsigned coalesce_threshold = env->me_maxgc_ov1page >> 2;
10569
+ if (/* Не коагулируем записи при подготовке резерва для обновления GC.
10570
+ * Иначе попытка увеличить резерв может приводить к необходимости ещё
10571
+ * большего резерва из-за увеличения списка переработанных страниц. */
10572
+ (flags & MDBX_ALLOC_RESERVE) == 0) {
10572
10573
  if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
10573
- MDBX_PNL_GETSIZE(txn->tw.relist) < coalesce_threshold && num)
10574
+ re_len < env->me_maxgc_ov1page / 2)
10574
10575
  flags += MDBX_ALLOC_COALESCE;
10576
+ }
10575
10577
 
10576
- MDBX_cursor recur;
10577
- ret.err = gc_cursor_init(&recur, txn);
10578
- if (unlikely(ret.err != MDBX_SUCCESS))
10579
- goto fail;
10578
+ MDBX_cursor *const gc =
10579
+ (MDBX_cursor *)((char *)env->me_txn0 + sizeof(MDBX_txn));
10580
+ gc->mc_txn = txn;
10581
+ gc->mc_flags = 0;
10580
10582
 
10581
- retry_gc_refresh_oldest:;
10582
- txnid_t oldest = txn_oldest_reader(txn);
10583
- if (unlikely(!oldest))
10584
- goto no_gc;
10585
-
10586
- retry_gc_have_oldest:
10587
- if (unlikely(oldest >= txn->mt_txnid)) {
10588
- ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
10589
- " for current-txnid %" PRIaTXN,
10590
- oldest, txn->mt_txnid);
10591
- ret.err = MDBX_PROBLEM;
10592
- goto fail;
10593
- }
10594
- const txnid_t detent = oldest + 1;
10583
+ retry_gc_refresh_oldest:;
10584
+ txnid_t oldest = txn_oldest_reader(txn);
10585
+ retry_gc_have_oldest:
10586
+ if (unlikely(oldest >= txn->mt_txnid)) {
10587
+ ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
10588
+ " for current-txnid %" PRIaTXN,
10589
+ oldest, txn->mt_txnid);
10590
+ ret.err = MDBX_PROBLEM;
10591
+ goto fail;
10592
+ }
10593
+ const txnid_t detent = oldest + 1;
10595
10594
 
10596
- txnid_t last = 0;
10597
- bool should_scan = false;
10598
- MDBX_cursor_op op = MDBX_FIRST;
10599
- if (flags & MDBX_ALLOC_LIFO) {
10600
- if (!txn->tw.lifo_reclaimed) {
10601
- txn->tw.lifo_reclaimed = txl_alloc();
10602
- if (unlikely(!txn->tw.lifo_reclaimed)) {
10603
- ret.err = MDBX_ENOMEM;
10604
- goto fail;
10605
- }
10595
+ txnid_t id = 0;
10596
+ MDBX_cursor_op op = MDBX_FIRST;
10597
+ if (flags & MDBX_ALLOC_LIFO) {
10598
+ if (!txn->tw.lifo_reclaimed) {
10599
+ txn->tw.lifo_reclaimed = txl_alloc();
10600
+ if (unlikely(!txn->tw.lifo_reclaimed)) {
10601
+ ret.err = MDBX_ENOMEM;
10602
+ goto fail;
10606
10603
  }
10607
- /* Begin lookup backward from oldest reader */
10608
- last = detent - 1;
10609
- op = MDBX_SET_RANGE;
10610
- } else if (txn->tw.last_reclaimed) {
10611
- /* Continue lookup forward from last-reclaimed */
10612
- last = txn->tw.last_reclaimed + 1;
10613
- if (last >= detent)
10614
- goto no_gc;
10615
- op = MDBX_SET_RANGE;
10616
10604
  }
10605
+ /* Begin lookup backward from oldest reader */
10606
+ id = detent - 1;
10607
+ op = MDBX_SET_RANGE;
10608
+ } else if (txn->tw.last_reclaimed) {
10609
+ /* Continue lookup forward from last-reclaimed */
10610
+ id = txn->tw.last_reclaimed + 1;
10611
+ if (id >= detent)
10612
+ goto depleted_gc;
10613
+ op = MDBX_SET_RANGE;
10614
+ }
10617
10615
 
10618
- next_gc:;
10619
- MDBX_val key;
10620
- key.iov_base = &last;
10621
- key.iov_len = sizeof(last);
10616
+ next_gc:;
10617
+ MDBX_val key;
10618
+ key.iov_base = &id;
10619
+ key.iov_len = sizeof(id);
10622
10620
 
10623
10621
  #if MDBX_ENABLE_PROFGC
10624
- prof->rsteps += 1;
10622
+ prof->rsteps += 1;
10625
10623
  #endif /* MDBX_ENABLE_PROFGC */
10626
10624
 
10627
- /* Seek first/next GC record */
10628
- ret.err = mdbx_cursor_get(&recur, &key, NULL, op);
10629
- if (unlikely(ret.err != MDBX_SUCCESS)) {
10630
- if (unlikely(ret.err != MDBX_NOTFOUND))
10631
- goto fail;
10632
- if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
10633
- op = MDBX_PREV;
10634
- goto next_gc;
10635
- }
10636
- goto depleted_gc;
10637
- }
10638
- if (unlikely(key.iov_len != sizeof(txnid_t))) {
10639
- ret.err = MDBX_CORRUPTED;
10625
+ /* Seek first/next GC record */
10626
+ ret.err = mdbx_cursor_get(gc, &key, NULL, op);
10627
+ if (unlikely(ret.err != MDBX_SUCCESS)) {
10628
+ if (unlikely(ret.err != MDBX_NOTFOUND))
10640
10629
  goto fail;
10641
- }
10642
- last = unaligned_peek_u64(4, key.iov_base);
10643
- if (flags & MDBX_ALLOC_LIFO) {
10630
+ if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
10644
10631
  op = MDBX_PREV;
10645
- if (last >= detent)
10646
- goto next_gc;
10647
- /* skip IDs of records that already reclaimed */
10648
- for (size_t i = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); i > 0; --i)
10649
- if (txn->tw.lifo_reclaimed[i] == last)
10650
- goto next_gc;
10651
- } else {
10652
- op = MDBX_NEXT;
10653
- if (unlikely(last >= detent))
10654
- goto depleted_gc;
10632
+ goto next_gc;
10655
10633
  }
10634
+ goto depleted_gc;
10635
+ }
10636
+ if (unlikely(key.iov_len != sizeof(txnid_t))) {
10637
+ ret.err = MDBX_CORRUPTED;
10638
+ goto fail;
10639
+ }
10640
+ id = unaligned_peek_u64(4, key.iov_base);
10641
+ if (flags & MDBX_ALLOC_LIFO) {
10642
+ op = MDBX_PREV;
10643
+ if (id >= detent || is_already_reclaimed(txn, id))
10644
+ goto next_gc;
10645
+ } else {
10646
+ op = MDBX_NEXT;
10647
+ if (unlikely(id >= detent))
10648
+ goto depleted_gc;
10649
+ }
10656
10650
 
10657
- /* Reading next GC record */
10658
- MDBX_val data;
10659
- MDBX_page *const mp = recur.mc_pg[recur.mc_top];
10660
- if (unlikely((ret.err = node_read(&recur,
10661
- page_node(mp, recur.mc_ki[recur.mc_top]),
10662
- &data, mp)) != MDBX_SUCCESS))
10663
- goto fail;
10651
+ /* Reading next GC record */
10652
+ MDBX_val data;
10653
+ MDBX_page *const mp = gc->mc_pg[gc->mc_top];
10654
+ if (unlikely((ret.err = node_read(gc, page_node(mp, gc->mc_ki[gc->mc_top]),
10655
+ &data, mp)) != MDBX_SUCCESS))
10656
+ goto fail;
10664
10657
 
10665
- eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
10666
- pgno_t *gc_pnl = (pgno_t *)data.iov_base;
10667
- if (unlikely(data.iov_len % sizeof(pgno_t) ||
10668
- data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
10669
- !pnl_check(gc_pnl, txn->mt_next_pgno))) {
10670
- ret.err = MDBX_CORRUPTED;
10671
- goto fail;
10658
+ pgno_t *gc_pnl = (pgno_t *)data.iov_base;
10659
+ if (unlikely(data.iov_len % sizeof(pgno_t) ||
10660
+ data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
10661
+ !pnl_check(gc_pnl, txn->mt_next_pgno))) {
10662
+ ret.err = MDBX_CORRUPTED;
10663
+ goto fail;
10664
+ }
10665
+
10666
+ const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
10667
+ TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len,
10668
+ gc_len + re_len);
10669
+
10670
+ eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10671
+ if (unlikely(gc_len + re_len >= env->me_maxgc_ov1page)) {
10672
+ /* Don't try to coalesce too much. */
10673
+ if (flags & MDBX_ALLOC_SHOULD_SCAN) {
10674
+ eASSERT(env, flags & MDBX_ALLOC_COALESCE);
10675
+ eASSERT(env, num > 0);
10676
+ #if MDBX_ENABLE_PROFGC
10677
+ env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
10678
+ #endif /* MDBX_ENABLE_PROFGC */
10679
+ TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
10680
+ if (re_len >= num) {
10681
+ eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
10682
+ MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
10683
+ range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
10684
+ pgno = *range;
10685
+ if (num == 1)
10686
+ goto done;
10687
+ range = scan4seq(range, re_len, num - 1);
10688
+ eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
10689
+ if (likely(range)) {
10690
+ pgno = *range;
10691
+ goto done;
10692
+ }
10693
+ }
10694
+ flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN;
10672
10695
  }
10673
- const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
10674
- if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(
10675
- txn->tw.relist) >= env->me_options.rp_augment_limit) &&
10676
- ((/* not a slot-request from gc-update */
10677
- (flags & MDBX_ALLOC_SLOT) == 0 &&
10696
+ if (unlikely(/* list is too long already */ re_len >=
10697
+ env->me_options.rp_augment_limit) &&
10698
+ ((/* not a slot-request from gc-update */ num &&
10678
10699
  /* have enough unallocated space */ txn->mt_geo.upper >=
10679
10700
  txn->mt_next_pgno + num) ||
10680
- gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) {
10701
+ gc_len + re_len >= MDBX_PGL_LIMIT)) {
10681
10702
  /* Stop reclaiming to avoid large/overflow the page list.
10682
10703
  * This is a rare case while search for a continuously multi-page region
10683
10704
  * in a large database.
10684
- * https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123 */
10705
+ * https://libmdbx.dqdkfa.ru/dead-github/issues/123
10706
+ */
10685
10707
  NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
10686
10708
  "(chunk) -> %zu",
10687
- MDBX_PNL_GETSIZE(txn->tw.relist), gc_len,
10688
- gc_len + MDBX_PNL_GETSIZE(txn->tw.relist));
10709
+ re_len, gc_len, gc_len + re_len);
10689
10710
  goto depleted_gc;
10690
10711
  }
10712
+ }
10691
10713
 
10692
- /* Remember ID of readed GC record */
10693
- txn->tw.last_reclaimed = last;
10694
- if (flags & MDBX_ALLOC_LIFO) {
10695
- ret.err = txl_append(&txn->tw.lifo_reclaimed, last);
10696
- if (unlikely(ret.err != MDBX_SUCCESS))
10697
- goto fail;
10698
- }
10699
-
10700
- /* Append PNL from GC record to tw.relist */
10701
- ret.err = pnl_need(&txn->tw.relist, gc_len);
10714
+ /* Remember ID of readed GC record */
10715
+ txn->tw.last_reclaimed = id;
10716
+ if (flags & MDBX_ALLOC_LIFO) {
10717
+ ret.err = txl_append(&txn->tw.lifo_reclaimed, id);
10702
10718
  if (unlikely(ret.err != MDBX_SUCCESS))
10703
10719
  goto fail;
10704
- txn->tw.relist = txn->tw.relist;
10720
+ }
10705
10721
 
10706
- if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
10707
- DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
10708
- " len %zu, PNL",
10709
- last, txn->mt_dbs[FREE_DBI].md_root, gc_len);
10710
- for (size_t i = gc_len; i; i--)
10711
- DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
10712
- DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
10713
- }
10722
+ /* Append PNL from GC record to tw.relist */
10723
+ ret.err = pnl_need(&txn->tw.relist, gc_len);
10724
+ if (unlikely(ret.err != MDBX_SUCCESS))
10725
+ goto fail;
10714
10726
 
10715
- /* Merge in descending sorted order */
10716
- re_len = pnl_merge(txn->tw.relist, gc_pnl);
10717
- should_scan = true;
10718
- if (AUDIT_ENABLED()) {
10719
- if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
10720
- ret.err = MDBX_CORRUPTED;
10721
- goto fail;
10722
- }
10723
- } else {
10724
- eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno));
10727
+ if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
10728
+ DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
10729
+ " len %zu, PNL",
10730
+ id, txn->mt_dbs[FREE_DBI].md_root, gc_len);
10731
+ for (size_t i = gc_len; i; i--)
10732
+ DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
10733
+ DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
10734
+ }
10735
+
10736
+ /* Merge in descending sorted order */
10737
+ re_len = pnl_merge(txn->tw.relist, gc_pnl);
10738
+ flags |= MDBX_ALLOC_SHOULD_SCAN;
10739
+ if (AUDIT_ENABLED()) {
10740
+ if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
10741
+ ret.err = MDBX_CORRUPTED;
10742
+ goto fail;
10725
10743
  }
10726
- eASSERT(env, dirtylist_check(txn));
10744
+ } else {
10745
+ eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno));
10746
+ }
10747
+ eASSERT(env, dirtylist_check(txn));
10727
10748
 
10728
- eASSERT(env,
10729
- re_len == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno);
10730
- if (MDBX_ENABLE_REFUND && re_len &&
10731
- unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
10732
- /* Refund suitable pages into "unallocated" space */
10733
- if (txn_refund(txn))
10734
- re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10735
- }
10736
- eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10737
- eASSERT(env, pnl_check_allocated(txn->tw.relist,
10738
- txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10749
+ eASSERT(env,
10750
+ re_len == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno);
10751
+ if (MDBX_ENABLE_REFUND && re_len &&
10752
+ unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
10753
+ /* Refund suitable pages into "unallocated" space */
10754
+ txn_refund(txn);
10755
+ re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
10756
+ }
10757
+ eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10758
+ eASSERT(env, pnl_check_allocated(txn->tw.relist,
10759
+ txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10739
10760
 
10740
- /* Done for a kick-reclaim mode, actually no page needed */
10741
- if (unlikely(flags & MDBX_ALLOC_SLOT)) {
10742
- eASSERT(env, ret.err == MDBX_SUCCESS);
10743
- goto early_exit;
10744
- }
10761
+ /* Done for a kick-reclaim mode, actually no page needed */
10762
+ if (unlikely(num == 0)) {
10763
+ eASSERT(env, ret.err == MDBX_SUCCESS);
10764
+ TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
10765
+ re_len);
10766
+ goto early_exit;
10767
+ }
10745
10768
 
10746
- /* TODO: delete reclaimed records */
10769
+ /* TODO: delete reclaimed records */
10747
10770
 
10748
- /* Don't try to coalesce too much. */
10749
- eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
10750
- if (flags & MDBX_ALLOC_COALESCE) {
10751
- if (re_len /* current size */ < coalesce_threshold) {
10752
- #if MDBX_ENABLE_PROFGC
10753
- env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
10754
- #endif /* MDBX_ENABLE_PROFGC */
10755
- goto next_gc;
10756
- }
10757
- TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
10758
- flags &= ~MDBX_ALLOC_COALESCE;
10759
- }
10771
+ eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
10772
+ if (flags & MDBX_ALLOC_COALESCE) {
10773
+ TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id,
10774
+ re_len);
10775
+ goto next_gc;
10776
+ }
10760
10777
 
10761
- scan:
10762
- eASSERT(env, should_scan);
10763
- if (re_len >= num) {
10764
- eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
10765
- MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
10766
- range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
10778
+ scan:
10779
+ eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
10780
+ eASSERT(env, num > 0);
10781
+ if (re_len >= num) {
10782
+ eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
10783
+ MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
10784
+ range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
10785
+ pgno = *range;
10786
+ if (num == 1)
10787
+ goto done;
10788
+ range = scan4seq(range, re_len, num - 1);
10789
+ eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
10790
+ if (likely(range)) {
10767
10791
  pgno = *range;
10768
- if (num == 1)
10769
- goto done;
10770
- range = scan4seq(range, re_len, num - 1);
10771
- eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
10772
- if (likely(range)) {
10773
- pgno = *range;
10774
- goto done;
10775
- }
10792
+ goto done;
10776
10793
  }
10777
- should_scan = false;
10778
- if (ret.err == MDBX_SUCCESS)
10779
- goto next_gc;
10794
+ }
10795
+ flags -= MDBX_ALLOC_SHOULD_SCAN;
10796
+ if (ret.err == MDBX_SUCCESS) {
10797
+ TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id,
10798
+ re_len);
10799
+ goto next_gc;
10800
+ }
10780
10801
 
10781
- depleted_gc:
10782
- ret.err = MDBX_NOTFOUND;
10783
- if (should_scan)
10784
- goto scan;
10802
+ depleted_gc:
10803
+ ret.err = MDBX_NOTFOUND;
10804
+ if (flags & MDBX_ALLOC_SHOULD_SCAN)
10805
+ goto scan;
10785
10806
 
10786
- //-------------------------------------------------------------------------
10807
+ //-------------------------------------------------------------------------
10808
+
10809
+ /* There is no suitable pages in the GC and to be able to allocate
10810
+ * we should CHOICE one of:
10811
+ * - make a new steady checkpoint if reclaiming was stopped by
10812
+ * the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
10813
+ * - kick lagging reader(s) if reclaiming was stopped by ones of it.
10814
+ * - extend the database file. */
10787
10815
 
10788
- /* There is no suitable pages in the GC and to be able to allocate
10789
- * we should CHOICE one of:
10790
- * - make a new steady checkpoint if reclaiming was stopped by
10791
- * the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
10792
- * - kick lagging reader(s) if reclaiming was stopped by ones of it.
10793
- * - extend the database file. */
10794
-
10795
- /* Will use new pages from the map if nothing is suitable in the GC. */
10796
- pgno = txn->mt_next_pgno;
10797
- const size_t newnext = num + pgno;
10798
-
10799
- const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
10800
- const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
10801
- /* does reclaiming stopped at the last steady point? */
10802
- if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
10803
- detent == prefer_steady.txnid + 1) {
10804
- DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
10805
- "-%s, detent %" PRIaTXN,
10806
- recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid,
10807
- durable_caption(prefer_steady.ptr_c), detent);
10808
- const pgno_t autosync_threshold =
10809
- atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed);
10810
- const uint64_t autosync_period =
10811
- atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed);
10812
- uint64_t eoos_timestamp;
10813
- /* wipe the last steady-point if one of:
10814
- * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
10815
- * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
10816
- * otherwise, make a new steady-point if one of:
10817
- * - auto-sync threshold is specified and reached;
10818
- * - upper limit of database size is reached;
10819
- * - database is full (with the current file size)
10820
- * AND auto-sync threshold it NOT specified */
10821
- if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
10822
- ((autosync_threshold | autosync_period) == 0 ||
10823
- newnext >= prefer_steady.ptr_c->mm_geo.now)) {
10824
- /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
10825
- * without any auto-sync threshold(s). */
10816
+ /* Will use new pages from the map if nothing is suitable in the GC. */
10817
+ newnext = (pgno = txn->mt_next_pgno) + num;
10818
+
10819
+ /* Does reclaiming stopped at the last steady point? */
10820
+ const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
10821
+ const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
10822
+ if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
10823
+ detent == prefer_steady.txnid + 1) {
10824
+ DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
10825
+ "-%s, detent %" PRIaTXN,
10826
+ recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid,
10827
+ durable_caption(prefer_steady.ptr_c), detent);
10828
+ const pgno_t autosync_threshold =
10829
+ atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed);
10830
+ const uint64_t autosync_period =
10831
+ atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed);
10832
+ uint64_t eoos_timestamp;
10833
+ /* wipe the last steady-point if one of:
10834
+ * - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
10835
+ * - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
10836
+ * otherwise, make a new steady-point if one of:
10837
+ * - auto-sync threshold is specified and reached;
10838
+ * - upper limit of database size is reached;
10839
+ * - database is full (with the current file size)
10840
+ * AND auto-sync threshold it NOT specified */
10841
+ if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
10842
+ ((autosync_threshold | autosync_period) == 0 ||
10843
+ newnext >= prefer_steady.ptr_c->mm_geo.now)) {
10844
+ /* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
10845
+ * without any auto-sync threshold(s). */
10826
10846
  #if MDBX_ENABLE_PROFGC
10827
- env->me_lck->mti_pgop_stat.gc_prof.wipes += 1;
10847
+ env->me_lck->mti_pgop_stat.gc_prof.wipes += 1;
10828
10848
  #endif /* MDBX_ENABLE_PROFGC */
10829
- ret.err = wipe_steady(txn, detent);
10830
- DEBUG("gc-wipe-steady, rc %d", ret.err);
10831
- if (unlikely(ret.err != MDBX_SUCCESS))
10832
- goto fail;
10833
- eASSERT(env, prefer_steady.ptr_c !=
10834
- meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10835
- goto retry_gc_refresh_oldest;
10836
- }
10837
- if ((flags & (MDBX_ALLOC_BACKLOG | MDBX_ALLOC_NEW)) == 0 ||
10838
- (autosync_threshold &&
10839
- atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
10840
- autosync_threshold) ||
10841
- (autosync_period &&
10842
- (eoos_timestamp =
10843
- atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
10844
- osal_monotime() - eoos_timestamp >= autosync_period) ||
10845
- newnext >= txn->mt_geo.upper ||
10846
- (newnext >= txn->mt_end_pgno &&
10847
- (autosync_threshold | autosync_period) == 0)) {
10848
- /* make steady checkpoint. */
10849
+ ret.err = wipe_steady(txn, detent);
10850
+ DEBUG("gc-wipe-steady, rc %d", ret.err);
10851
+ if (unlikely(ret.err != MDBX_SUCCESS))
10852
+ goto fail;
10853
+ eASSERT(env, prefer_steady.ptr_c !=
10854
+ meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10855
+ goto retry_gc_refresh_oldest;
10856
+ }
10857
+ if ((autosync_threshold &&
10858
+ atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
10859
+ autosync_threshold) ||
10860
+ (autosync_period &&
10861
+ (eoos_timestamp =
10862
+ atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
10863
+ osal_monotime() - eoos_timestamp >= autosync_period) ||
10864
+ newnext >= txn->mt_geo.upper ||
10865
+ ((num == 0 || newnext >= txn->mt_end_pgno) &&
10866
+ (autosync_threshold | autosync_period) == 0)) {
10867
+ /* make steady checkpoint. */
10849
10868
  #if MDBX_ENABLE_PROFGC
10850
- env->me_lck->mti_pgop_stat.gc_prof.flushes += 1;
10869
+ env->me_lck->mti_pgop_stat.gc_prof.flushes += 1;
10851
10870
  #endif /* MDBX_ENABLE_PROFGC */
10852
- MDBX_meta meta = *recent.ptr_c;
10853
- ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta,
10854
- &txn->tw.troika);
10855
- DEBUG("gc-make-steady, rc %d", ret.err);
10856
- eASSERT(env, ret.err != MDBX_RESULT_TRUE);
10857
- if (unlikely(ret.err != MDBX_SUCCESS))
10858
- goto fail;
10859
- eASSERT(env, prefer_steady.ptr_c !=
10860
- meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10861
- goto retry_gc_refresh_oldest;
10862
- }
10871
+ MDBX_meta meta = *recent.ptr_c;
10872
+ ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta,
10873
+ &txn->tw.troika);
10874
+ DEBUG("gc-make-steady, rc %d", ret.err);
10875
+ eASSERT(env, ret.err != MDBX_RESULT_TRUE);
10876
+ if (unlikely(ret.err != MDBX_SUCCESS))
10877
+ goto fail;
10878
+ eASSERT(env, prefer_steady.ptr_c !=
10879
+ meta_prefer_steady(env, &txn->tw.troika).ptr_c);
10880
+ goto retry_gc_refresh_oldest;
10863
10881
  }
10882
+ }
10864
10883
 
10865
- if (env->me_lck_mmap.lck &&
10866
- unlikely(true ==
10867
- atomic_load32(&env->me_lck_mmap.lck->mti_readers_refresh_flag,
10868
- mo_AcquireRelease))) {
10869
- oldest = txn_oldest_reader(txn);
10870
- if (oldest >= detent)
10871
- goto retry_gc_have_oldest;
10872
- }
10884
+ if (unlikely(true == atomic_load32(&env->me_lck->mti_readers_refresh_flag,
10885
+ mo_AcquireRelease))) {
10886
+ oldest = txn_oldest_reader(txn);
10887
+ if (oldest >= detent)
10888
+ goto retry_gc_have_oldest;
10889
+ }
10873
10890
 
10874
- /* avoid kick lagging reader(s) if is enough unallocated space
10875
- * at the end of database file. */
10876
- if ((flags & MDBX_ALLOC_NEW) && newnext <= txn->mt_end_pgno) {
10877
- eASSERT(env, range == nullptr);
10878
- goto done;
10879
- }
10891
+ /* Avoid kick lagging reader(s) if is enough unallocated space
10892
+ * at the end of database file. */
10893
+ if (!(flags & MDBX_ALLOC_RESERVE) && newnext <= txn->mt_end_pgno) {
10894
+ eASSERT(env, range == nullptr);
10895
+ goto done;
10896
+ }
10880
10897
 
10881
- if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) {
10882
- oldest = kick_longlived_readers(env, oldest);
10883
- if (oldest >= detent)
10884
- goto retry_gc_have_oldest;
10885
- }
10898
+ if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) {
10899
+ oldest = kick_longlived_readers(env, oldest);
10900
+ if (oldest >= detent)
10901
+ goto retry_gc_have_oldest;
10886
10902
  }
10887
10903
 
10888
10904
  //---------------------------------------------------------------------------
10889
10905
 
10890
10906
  no_gc:
10891
- if ((flags & MDBX_ALLOC_NEW) == 0) {
10907
+ if (flags & MDBX_ALLOC_RESERVE) {
10892
10908
  ret.err = MDBX_NOTFOUND;
10893
10909
  goto fail;
10894
10910
  }
10895
10911
 
10896
10912
  /* Will use new pages from the map if nothing is suitable in the GC. */
10897
- pgno = txn->mt_next_pgno;
10898
- const size_t newnext = num + pgno;
10913
+ newnext = (pgno = txn->mt_next_pgno) + num;
10899
10914
  if (newnext <= txn->mt_end_pgno)
10900
10915
  goto done;
10901
10916
 
@@ -10932,12 +10947,12 @@ no_gc:
10932
10947
 
10933
10948
  done:
10934
10949
  ret.err = MDBX_SUCCESS;
10935
- if (likely((flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) == 0)) {
10950
+ if (likely((flags & MDBX_ALLOC_RESERVE) == 0)) {
10936
10951
  ENSURE(env, pgno >= NUM_METAS);
10937
10952
  if (range) {
10938
- eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
10939
10953
  eASSERT(env, pgno == *range);
10940
10954
  eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
10955
+ eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
10941
10956
  /* Cutoff allocated pages from tw.relist */
10942
10957
  #if MDBX_PNL_ASCENDING
10943
10958
  for (const pgno_t *const end = re_list + re_len - num; range <= end;
@@ -10951,7 +10966,6 @@ done:
10951
10966
  eASSERT(env, pnl_check_allocated(txn->tw.relist,
10952
10967
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
10953
10968
  } else {
10954
- eASSERT(env, flags & MDBX_ALLOC_NEW);
10955
10969
  pgno = txn->mt_next_pgno;
10956
10970
  txn->mt_next_pgno += (pgno_t)num;
10957
10971
  eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno);
@@ -10995,8 +11009,9 @@ done:
10995
11009
  int level;
10996
11010
  const char *what;
10997
11011
  if (flags & MDBX_ALLOC_RESERVE) {
10998
- level = (flags & MDBX_ALLOC_BACKLOG) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
10999
- what = (flags & MDBX_ALLOC_SLOT) ? "gc-slot/backlog" : "backlog-pages";
11012
+ level =
11013
+ (flags & MDBX_ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
11014
+ what = num ? "reserve-pages" : "fetch-slot";
11000
11015
  } else {
11001
11016
  txn->mt_flags |= MDBX_TXN_ERROR;
11002
11017
  level = MDBX_LOG_ERROR;
@@ -11011,7 +11026,7 @@ done:
11011
11026
  } else {
11012
11027
  early_exit:
11013
11028
  DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num,
11014
- (flags & MDBX_ALLOC_SLOT) ? "SLOT" : "RESERVE", ret.err);
11029
+ num ? "RESERVE" : "SLOT", ret.err);
11015
11030
  ret.page = NULL;
11016
11031
  }
11017
11032
 
@@ -11057,84 +11072,103 @@ __hot static pgr_t page_alloc(const MDBX_cursor *mc) {
11057
11072
  return ret;
11058
11073
  }
11059
11074
 
11060
- if (likely(!(txn->mt_flags & MDBX_TXN_FROZEN_RE))) {
11061
- MDBX_PNL pnl = txn->tw.relist;
11062
- const size_t len = MDBX_PNL_GETSIZE(pnl);
11063
- if (likely(len > 0)) {
11064
- MDBX_env *const env = txn->mt_env;
11075
+ MDBX_PNL pnl = txn->tw.relist;
11076
+ const size_t len = MDBX_PNL_GETSIZE(pnl);
11077
+ if (likely(len > 0)) {
11078
+ MDBX_env *const env = txn->mt_env;
11065
11079
 
11066
- MDBX_PNL_SETSIZE(pnl, len - 1);
11080
+ MDBX_PNL_SETSIZE(pnl, len - 1);
11067
11081
  #if MDBX_PNL_ASCENDING
11068
- const pgno_t pgno = pnl[1];
11069
- for (size_t i = 1; i < len; ++i)
11070
- pnl[i] = pnl[i + 1];
11082
+ const pgno_t pgno = pnl[1];
11083
+ for (size_t i = 1; i < len; ++i)
11084
+ pnl[i] = pnl[i + 1];
11071
11085
  #else
11072
- const pgno_t pgno = pnl[len];
11086
+ const pgno_t pgno = pnl[len];
11073
11087
  #endif
11074
11088
 
11075
11089
  #if MDBX_ENABLE_PROFGC
11076
- const uint64_t monotime_before = osal_monotime();
11077
- size_t majflt_before;
11078
- const uint64_t cputime_before = osal_cputime(&majflt_before);
11079
- profgc_stat_t *const prof =
11080
- (mc->mc_dbi == FREE_DBI) ? &env->me_lck->mti_pgop_stat.gc_prof.self
11081
- : &env->me_lck->mti_pgop_stat.gc_prof.work;
11090
+ const uint64_t monotime_before = osal_monotime();
11091
+ size_t majflt_before;
11092
+ const uint64_t cputime_before = osal_cputime(&majflt_before);
11093
+ profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI)
11094
+ ? &env->me_lck->mti_pgop_stat.gc_prof.self
11095
+ : &env->me_lck->mti_pgop_stat.gc_prof.work;
11082
11096
  #endif /* MDBX_ENABLE_PROFGC */
11083
- pgr_t ret;
11084
- if (env->me_flags & MDBX_WRITEMAP) {
11085
- ret.page = pgno2page(env, pgno);
11086
- MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
11087
- } else {
11088
- ret.page = page_malloc(txn, 1);
11089
- if (unlikely(!ret.page)) {
11090
- ret.err = MDBX_ENOMEM;
11091
- goto bailout;
11092
- }
11097
+ pgr_t ret;
11098
+ if (env->me_flags & MDBX_WRITEMAP) {
11099
+ ret.page = pgno2page(env, pgno);
11100
+ MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
11101
+ } else {
11102
+ ret.page = page_malloc(txn, 1);
11103
+ if (unlikely(!ret.page)) {
11104
+ ret.err = MDBX_ENOMEM;
11105
+ goto bailout;
11093
11106
  }
11107
+ }
11094
11108
 
11095
- VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
11096
- ret.page->mp_pgno = pgno;
11097
- ret.page->mp_leaf2_ksize = 0;
11098
- ret.page->mp_flags = 0;
11099
- tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
11109
+ VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
11110
+ ret.page->mp_pgno = pgno;
11111
+ ret.page->mp_leaf2_ksize = 0;
11112
+ ret.page->mp_flags = 0;
11113
+ tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
11100
11114
 
11101
- ret.err = page_dirty(txn, ret.page, 1);
11102
- bailout:
11103
- tASSERT(txn, pnl_check_allocated(txn->tw.relist,
11104
- txn->mt_next_pgno - MDBX_ENABLE_REFUND));
11115
+ ret.err = page_dirty(txn, ret.page, 1);
11116
+ bailout:
11117
+ tASSERT(txn, pnl_check_allocated(txn->tw.relist,
11118
+ txn->mt_next_pgno - MDBX_ENABLE_REFUND));
11105
11119
  #if MDBX_ENABLE_PROFGC
11106
- size_t majflt_after;
11107
- prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
11108
- prof->majflt += majflt_after - majflt_before;
11109
- prof->xtime_monotonic += osal_monotime() - monotime_before;
11120
+ size_t majflt_after;
11121
+ prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
11122
+ prof->majflt += majflt_after - majflt_before;
11123
+ prof->xtime_monotonic += osal_monotime() - monotime_before;
11110
11124
  #endif /* MDBX_ENABLE_PROFGC */
11111
- return ret;
11112
- }
11125
+ return ret;
11113
11126
  }
11114
11127
 
11115
- return page_alloc_slowpath(mc, 1, MDBX_ALLOC_ALL);
11128
+ return page_alloc_slowpath(mc, 1, MDBX_ALLOC_DEFAULT);
11116
11129
  }
11117
11130
 
11118
- /* Copy the used portions of a non-large/overflow page. */
11119
- __hot static void page_copy(MDBX_page *dst, const MDBX_page *src,
11120
- size_t psize) {
11131
+ /* Copy the used portions of a page. */
11132
+ __hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src,
11133
+ const size_t size) {
11121
11134
  STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ);
11122
11135
  STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4);
11136
+ char *copy_dst = (void *)dst;
11137
+ const char *copy_src = (const void *)src;
11138
+ size_t copy_len = size;
11139
+ if (src->mp_flags & P_LEAF2) {
11140
+ copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src);
11141
+ if (unlikely(copy_len > size))
11142
+ goto bailout;
11143
+ }
11123
11144
  if ((src->mp_flags & (P_LEAF2 | P_OVERFLOW)) == 0) {
11124
- size_t upper = src->mp_upper, lower = src->mp_lower, unused = upper - lower;
11125
-
11145
+ size_t upper = src->mp_upper, lower = src->mp_lower;
11146
+ intptr_t unused = upper - lower;
11126
11147
  /* If page isn't full, just copy the used portion. Adjust
11127
11148
  * alignment so memcpy may copy words instead of bytes. */
11128
- if (unused >= MDBX_CACHELINE_SIZE * 2) {
11149
+ if (unused > MDBX_CACHELINE_SIZE * 3) {
11129
11150
  lower = ceil_powerof2(lower + PAGEHDRSZ, sizeof(void *));
11130
11151
  upper = floor_powerof2(upper + PAGEHDRSZ, sizeof(void *));
11131
- memcpy(dst, src, lower);
11132
- dst = (void *)((char *)dst + upper);
11133
- src = (void *)((char *)src + upper);
11134
- psize -= upper;
11152
+ if (unlikely(upper > copy_len))
11153
+ goto bailout;
11154
+ memcpy(copy_dst, copy_src, lower);
11155
+ copy_dst += upper;
11156
+ copy_src += upper;
11157
+ copy_len -= upper;
11135
11158
  }
11136
11159
  }
11137
- memcpy(dst, src, psize);
11160
+ memcpy(copy_dst, copy_src, copy_len);
11161
+ return;
11162
+
11163
+ bailout:
11164
+ if (src->mp_flags & P_LEAF2)
11165
+ bad_page(src, "%s addr %p, n-keys %zu, ksize %u",
11166
+ "invalid/corrupted source page", __Wpedantic_format_voidptr(src),
11167
+ page_numkeys(src), src->mp_leaf2_ksize);
11168
+ else
11169
+ bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page",
11170
+ __Wpedantic_format_voidptr(src), src->mp_upper);
11171
+ memset(dst, -1, size);
11138
11172
  }
11139
11173
 
11140
11174
  /* Pull a page off the txn's spill list, if present.
@@ -11541,7 +11575,9 @@ __cold int mdbx_env_sync_poll(MDBX_env *env) {
11541
11575
 
11542
11576
  /* Back up parent txn's cursors, then grab the originals for tracking */
11543
11577
  static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
11544
- for (int i = parent->mt_numdbs; --i >= 0;) {
11578
+ tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr);
11579
+ nested->mt_cursors[FREE_DBI] = nullptr;
11580
+ for (int i = parent->mt_numdbs; --i > FREE_DBI;) {
11545
11581
  nested->mt_cursors[i] = NULL;
11546
11582
  MDBX_cursor *mc = parent->mt_cursors[i];
11547
11583
  if (mc != NULL) {
@@ -11586,7 +11622,8 @@ static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
11586
11622
  *
11587
11623
  * Returns 0 on success, non-zero on failure. */
11588
11624
  static void cursors_eot(MDBX_txn *txn, const bool merge) {
11589
- for (intptr_t i = txn->mt_numdbs; --i >= 0;) {
11625
+ tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr);
11626
+ for (intptr_t i = txn->mt_numdbs; --i > FREE_DBI;) {
11590
11627
  MDBX_cursor *next, *mc = txn->mt_cursors[i];
11591
11628
  if (!mc)
11592
11629
  continue;
@@ -11856,7 +11893,7 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) {
11856
11893
  return MDBX_SUCCESS;
11857
11894
  }
11858
11895
 
11859
- /* check against https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269 */
11896
+ /* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
11860
11897
  static bool coherency_check(const MDBX_env *env, const txnid_t txnid,
11861
11898
  const volatile MDBX_db *dbs,
11862
11899
  const volatile MDBX_meta *meta, bool report) {
@@ -11957,7 +11994,7 @@ __cold static int coherency_timeout(uint64_t *timestamp, pgno_t pgno) {
11957
11994
  }
11958
11995
 
11959
11996
  /* check with timeout as the workaround
11960
- * for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269 */
11997
+ * for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
11961
11998
  __hot static int coherency_check_readed(const MDBX_env *env,
11962
11999
  const txnid_t txnid,
11963
12000
  const volatile MDBX_db *dbs,
@@ -12193,8 +12230,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
12193
12230
  txn->tw.troika = meta_tap(env);
12194
12231
  const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
12195
12232
  uint64_t timestamp = 0;
12196
- while (
12197
- "workaround for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
12233
+ while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
12198
12234
  rc = coherency_check_readed(env, head.txnid, head.ptr_v->mm_dbs,
12199
12235
  head.ptr_v, &timestamp);
12200
12236
  if (likely(rc == MDBX_SUCCESS))
@@ -12219,8 +12255,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
12219
12255
  txn->tw.loose_refund_wl = 0;
12220
12256
  #endif /* MDBX_ENABLE_REFUND */
12221
12257
  MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0);
12222
- txn->tw.spill_pages = NULL;
12223
- txn->tw.spill_least_removed = 0;
12258
+ txn->tw.spilled.list = NULL;
12259
+ txn->tw.spilled.least_removed = 0;
12224
12260
  txn->tw.last_reclaimed = 0;
12225
12261
  if (txn->tw.lifo_reclaimed)
12226
12262
  MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0);
@@ -12297,6 +12333,19 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
12297
12333
  osal_srwlock_AcquireShared(&env->me_remap_guard);
12298
12334
  }
12299
12335
  #endif /* Windows */
12336
+ } else {
12337
+ if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) {
12338
+ ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB",
12339
+ txn->mt_dbs[FREE_DBI].md_flags);
12340
+ rc = MDBX_INCOMPATIBLE;
12341
+ goto bailout;
12342
+ }
12343
+
12344
+ tASSERT(txn, txn == env->me_txn0);
12345
+ MDBX_cursor *const gc = (MDBX_cursor *)((char *)txn + sizeof(MDBX_txn));
12346
+ rc = cursor_init(gc, txn, FREE_DBI);
12347
+ if (rc != MDBX_SUCCESS)
12348
+ goto bailout;
12300
12349
  }
12301
12350
  #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
12302
12351
  txn_valgrind(env, txn);
@@ -12514,7 +12563,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
12514
12563
  txn->tw.dirtylru = parent->tw.dirtylru;
12515
12564
 
12516
12565
  dpl_sort(parent);
12517
- if (parent->tw.spill_pages)
12566
+ if (parent->tw.spilled.list)
12518
12567
  spill_purge(parent);
12519
12568
 
12520
12569
  tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >=
@@ -12591,7 +12640,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
12591
12640
  eASSERT(env, (txn->mt_flags &
12592
12641
  ~(MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
12593
12642
  MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0);
12594
- assert(!txn->tw.spill_pages && !txn->tw.spill_least_removed);
12643
+ assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed);
12595
12644
  }
12596
12645
  txn->mt_signature = MDBX_MT_SIGNATURE;
12597
12646
  txn->mt_userctx = context;
@@ -12696,10 +12745,9 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
12696
12745
  env, txn->mt_child ? (size_t)txn->tw.retired_pages
12697
12746
  : MDBX_PNL_GETSIZE(txn->tw.retired_pages));
12698
12747
  info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
12699
- info->txn_space_dirty =
12700
- txn->tw.dirtylist
12701
- ? pgno2bytes(env, txn->tw.dirtylist->pages_including_loose)
12702
- : 0;
12748
+ info->txn_space_dirty = pgno2bytes(
12749
+ env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
12750
+ : txn->tw.writemap_dirty_npages);
12703
12751
  info->txn_reader_lag = INT64_MAX;
12704
12752
  MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
12705
12753
  if (scan_rlt && lck) {
@@ -13015,8 +13063,8 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
13015
13063
  txn->mt_flags = MDBX_TXN_FINISHED;
13016
13064
  txn->mt_owner = 0;
13017
13065
  env->me_txn = txn->mt_parent;
13018
- pnl_free(txn->tw.spill_pages);
13019
- txn->tw.spill_pages = nullptr;
13066
+ pnl_free(txn->tw.spilled.list);
13067
+ txn->tw.spilled.list = nullptr;
13020
13068
  if (txn == env->me_txn0) {
13021
13069
  eASSERT(env, txn->mt_parent == NULL);
13022
13070
  /* Export or close DBI handles created in this txn */
@@ -13283,7 +13331,7 @@ typedef struct gc_update_context {
13283
13331
  #if MDBX_ENABLE_BIGFOOT
13284
13332
  txnid_t bigfoot;
13285
13333
  #endif /* MDBX_ENABLE_BIGFOOT */
13286
- MDBX_cursor_couple cursor;
13334
+ MDBX_cursor cursor;
13287
13335
  } gcu_context_t;
13288
13336
 
13289
13337
  static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
@@ -13292,7 +13340,7 @@ static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
13292
13340
  #if MDBX_ENABLE_BIGFOOT
13293
13341
  ctx->bigfoot = txn->mt_txnid;
13294
13342
  #endif /* MDBX_ENABLE_BIGFOOT */
13295
- return cursor_init(&ctx->cursor.outer, txn, FREE_DBI);
13343
+ return cursor_init(&ctx->cursor, txn, FREE_DBI);
13296
13344
  }
13297
13345
 
13298
13346
  static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) {
@@ -13311,10 +13359,10 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
13311
13359
  #endif /* MDBX_ENABLE_BIGFOOT */
13312
13360
  key.iov_len = sizeof(txnid_t);
13313
13361
  const struct cursor_set_result csr =
13314
- cursor_set(&ctx->cursor.outer, &key, &val, MDBX_SET);
13362
+ cursor_set(&ctx->cursor, &key, &val, MDBX_SET);
13315
13363
  if (csr.err == MDBX_SUCCESS && csr.exact) {
13316
13364
  ctx->retired_stored = 0;
13317
- err = mdbx_cursor_del(&ctx->cursor.outer, 0);
13365
+ err = mdbx_cursor_del(&ctx->cursor, 0);
13318
13366
  TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn),
13319
13367
  err);
13320
13368
  }
@@ -13327,6 +13375,13 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
13327
13375
  return err;
13328
13376
  }
13329
13377
 
13378
+ static int gcu_touch(gcu_context_t *ctx) {
13379
+ ctx->cursor.mc_flags |= C_GCU;
13380
+ int err = cursor_touch(&ctx->cursor);
13381
+ ctx->cursor.mc_flags -= C_GCU;
13382
+ return err;
13383
+ }
13384
+
13330
13385
  /* Prepare a backlog of pages to modify GC itself, while reclaiming is
13331
13386
  * prohibited. It should be enough to prevent search in page_alloc_slowpath()
13332
13387
  * during a deleting, when GC tree is unbalanced. */
@@ -13356,14 +13411,12 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
13356
13411
  key.iov_base = val.iov_base = nullptr;
13357
13412
  key.iov_len = sizeof(txnid_t);
13358
13413
  val.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
13359
- err = cursor_spill(&ctx->cursor.outer, &key, &val);
13414
+ err = cursor_spill(&ctx->cursor, &key, &val);
13360
13415
  if (unlikely(err != MDBX_SUCCESS))
13361
13416
  return err;
13362
13417
  }
13363
13418
 
13364
- tASSERT(txn, txn->mt_flags & MDBX_TXN_UPDATE_GC);
13365
- txn->mt_flags -= MDBX_TXN_UPDATE_GC;
13366
- err = cursor_touch(&ctx->cursor.outer);
13419
+ err = gcu_touch(ctx);
13367
13420
  TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
13368
13421
 
13369
13422
  if (unlikely(pages4retiredlist > 1) &&
@@ -13373,22 +13426,20 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
13373
13426
  err = gcu_clean_stored_retired(txn, ctx);
13374
13427
  if (unlikely(err != MDBX_SUCCESS))
13375
13428
  return err;
13376
- err = page_alloc_slowpath(&ctx->cursor.outer, (pgno_t)pages4retiredlist,
13377
- MDBX_ALLOC_GC | MDBX_ALLOC_RESERVE)
13378
- .err;
13429
+ err =
13430
+ page_alloc_slowpath(&ctx->cursor, pages4retiredlist, MDBX_ALLOC_RESERVE)
13431
+ .err;
13379
13432
  TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err);
13380
- cASSERT(&ctx->cursor.outer,
13433
+ cASSERT(&ctx->cursor,
13381
13434
  gcu_backlog_size(txn) >= pages4retiredlist || err != MDBX_SUCCESS);
13382
13435
  }
13383
13436
 
13384
13437
  while (gcu_backlog_size(txn) < backlog4cow + pages4retiredlist &&
13385
13438
  err == MDBX_SUCCESS)
13386
- err = page_alloc_slowpath(&ctx->cursor.outer, 0,
13387
- MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
13388
- MDBX_ALLOC_RESERVE | MDBX_ALLOC_BACKLOG)
13439
+ err = page_alloc_slowpath(&ctx->cursor, 0,
13440
+ MDBX_ALLOC_RESERVE | MDBX_ALLOC_UNIMPORTANT)
13389
13441
  .err;
13390
13442
 
13391
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13392
13443
  TRACE("<< backlog %zu, err %d", gcu_backlog_size(txn), err);
13393
13444
  return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
13394
13445
  }
@@ -13417,9 +13468,8 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) {
13417
13468
  MDBX_env *const env = txn->mt_env;
13418
13469
  const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo";
13419
13470
  (void)dbg_prefix_mode;
13420
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13421
- ctx->cursor.outer.mc_next = txn->mt_cursors[FREE_DBI];
13422
- txn->mt_cursors[FREE_DBI] = &ctx->cursor.outer;
13471
+ ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI];
13472
+ txn->mt_cursors[FREE_DBI] = &ctx->cursor;
13423
13473
 
13424
13474
  /* txn->tw.relist[] can grow and shrink during this call.
13425
13475
  * txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow.
@@ -13481,7 +13531,7 @@ retry:
13481
13531
  ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
13482
13532
  key.iov_base = &ctx->cleaned_id;
13483
13533
  key.iov_len = sizeof(ctx->cleaned_id);
13484
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, NULL, MDBX_SET);
13534
+ rc = mdbx_cursor_get(&ctx->cursor, &key, NULL, MDBX_SET);
13485
13535
  if (rc == MDBX_NOTFOUND)
13486
13536
  continue;
13487
13537
  if (unlikely(rc != MDBX_SUCCESS))
@@ -13494,18 +13544,17 @@ retry:
13494
13544
  tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
13495
13545
  TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix_mode,
13496
13546
  ctx->cleaned_slot, ctx->cleaned_id);
13497
- tASSERT(txn, *txn->mt_cursors == &ctx->cursor.outer);
13498
- rc = mdbx_cursor_del(&ctx->cursor.outer, 0);
13547
+ tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
13548
+ rc = mdbx_cursor_del(&ctx->cursor, 0);
13499
13549
  if (unlikely(rc != MDBX_SUCCESS))
13500
13550
  goto bailout;
13501
13551
  } while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
13502
13552
  txl_sort(txn->tw.lifo_reclaimed);
13503
13553
  }
13504
13554
  } else {
13505
- /* If using records from GC which we have not yet deleted,
13506
- * now delete them and any we reserved for tw.relist. */
13555
+ /* Удаляем оставшиеся вынутые из GC записи. */
13507
13556
  while (ctx->cleaned_id <= txn->tw.last_reclaimed) {
13508
- rc = cursor_first(&ctx->cursor.outer, &key, NULL);
13557
+ rc = cursor_first(&ctx->cursor, &key, NULL);
13509
13558
  if (rc == MDBX_NOTFOUND)
13510
13559
  break;
13511
13560
  if (unlikely(rc != MDBX_SUCCESS))
@@ -13530,8 +13579,8 @@ retry:
13530
13579
  tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
13531
13580
  TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode,
13532
13581
  ctx->cleaned_id);
13533
- tASSERT(txn, *txn->mt_cursors == &ctx->cursor.outer);
13534
- rc = mdbx_cursor_del(&ctx->cursor.outer, 0);
13582
+ tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
13583
+ rc = mdbx_cursor_del(&ctx->cursor, 0);
13535
13584
  if (unlikely(rc != MDBX_SUCCESS))
13536
13585
  goto bailout;
13537
13586
  }
@@ -13566,10 +13615,7 @@ retry:
13566
13615
  if (txn->tw.loose_count > 0) {
13567
13616
  TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode,
13568
13617
  txn->tw.loose_count);
13569
- rc = page_alloc_slowpath(&ctx->cursor.outer, 0,
13570
- MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
13571
- MDBX_ALLOC_RESERVE)
13572
- .err;
13618
+ rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
13573
13619
  if (rc == MDBX_SUCCESS) {
13574
13620
  TRACE("%s: retry since gc-slot for %zu loose-pages available",
13575
13621
  dbg_prefix_mode, txn->tw.loose_count);
@@ -13651,10 +13697,9 @@ retry:
13651
13697
  if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
13652
13698
  if (unlikely(!ctx->retired_stored)) {
13653
13699
  /* Make sure last page of GC is touched and on retired-list */
13654
- txn->mt_flags -= MDBX_TXN_UPDATE_GC;
13655
- rc = page_search(&ctx->cursor.outer, NULL,
13656
- MDBX_PS_LAST | MDBX_PS_MODIFY);
13657
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13700
+ rc = cursor_last(&ctx->cursor, nullptr, nullptr);
13701
+ if (likely(rc != MDBX_SUCCESS))
13702
+ rc = gcu_touch(ctx);
13658
13703
  if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
13659
13704
  goto bailout;
13660
13705
  }
@@ -13664,6 +13709,8 @@ retry:
13664
13709
  do {
13665
13710
  if (ctx->bigfoot > txn->mt_txnid) {
13666
13711
  rc = gcu_clean_stored_retired(txn, ctx);
13712
+ if (unlikely(rc != MDBX_SUCCESS))
13713
+ goto bailout;
13667
13714
  tASSERT(txn, ctx->bigfoot <= txn->mt_txnid);
13668
13715
  }
13669
13716
 
@@ -13685,7 +13732,7 @@ retry:
13685
13732
  ? env->me_maxgc_ov1page
13686
13733
  : left;
13687
13734
  data.iov_len = (chunk + 1) * sizeof(pgno_t);
13688
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data, MDBX_RESERVE);
13735
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
13689
13736
  if (unlikely(rc != MDBX_SUCCESS))
13690
13737
  goto bailout;
13691
13738
 
@@ -13723,7 +13770,7 @@ retry:
13723
13770
  do {
13724
13771
  gcu_prepare_backlog(txn, ctx, true);
13725
13772
  data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
13726
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data, MDBX_RESERVE);
13773
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
13727
13774
  if (unlikely(rc != MDBX_SUCCESS))
13728
13775
  goto bailout;
13729
13776
  /* Retry if tw.retired_pages[] grew during the Put() */
@@ -13790,17 +13837,13 @@ retry:
13790
13837
  left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
13791
13838
  env->me_maxgc_ov1page &&
13792
13839
  !ctx->dense) {
13793
- /* LY: need just a txn-id for save page list. */
13840
+ /* Hужен свободный для для сохранения списка страниц. */
13794
13841
  bool need_cleanup = false;
13795
- txnid_t snap_oldest;
13842
+ txnid_t snap_oldest = 0;
13796
13843
  retry_rid:
13797
- txn->mt_flags -= MDBX_TXN_UPDATE_GC;
13798
13844
  do {
13799
- snap_oldest = txn_oldest_reader(txn);
13800
- rc = page_alloc_slowpath(&ctx->cursor.outer, 0,
13801
- MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
13802
- MDBX_ALLOC_RESERVE)
13803
- .err;
13845
+ rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
13846
+ snap_oldest = env->me_lck->mti_oldest_reader.weak;
13804
13847
  if (likely(rc == MDBX_SUCCESS)) {
13805
13848
  TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
13806
13849
  MDBX_PNL_LAST(txn->tw.lifo_reclaimed));
@@ -13812,7 +13855,6 @@ retry:
13812
13855
  left >
13813
13856
  (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
13814
13857
  env->me_maxgc_ov1page);
13815
- txn->mt_flags += MDBX_TXN_UPDATE_GC;
13816
13858
 
13817
13859
  if (likely(rc == MDBX_SUCCESS)) {
13818
13860
  TRACE("%s: got enough from GC.", dbg_prefix_mode);
@@ -13830,7 +13872,7 @@ retry:
13830
13872
  } else {
13831
13873
  tASSERT(txn, txn->tw.last_reclaimed == 0);
13832
13874
  if (unlikely(txn_oldest_reader(txn) != snap_oldest))
13833
- /* should retry page_alloc_slowpath(MDBX_ALLOC_GC)
13875
+ /* should retry page_alloc_slowpath()
13834
13876
  * if the oldest reader changes since the last attempt */
13835
13877
  goto retry_rid;
13836
13878
  /* no reclaimable GC entries,
@@ -13840,7 +13882,8 @@ retry:
13840
13882
  ctx->rid);
13841
13883
  }
13842
13884
 
13843
- /* LY: GC is empty, will look any free txn-id in high2low order. */
13885
+ /* В GC нет годных к переработке записей,
13886
+ * будем использовать свободные id в обратном порядке. */
13844
13887
  while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter &&
13845
13888
  left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) -
13846
13889
  ctx->reused_slot) *
@@ -13858,26 +13901,20 @@ retry:
13858
13901
  }
13859
13902
 
13860
13903
  tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID);
13861
- --ctx->rid;
13904
+ ctx->rid -= 1;
13862
13905
  key.iov_base = &ctx->rid;
13863
13906
  key.iov_len = sizeof(ctx->rid);
13864
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_SET_KEY);
13907
+ rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
13865
13908
  if (unlikely(rc == MDBX_SUCCESS)) {
13866
- DEBUG("%s: GC's id %" PRIaTXN " is used, continue bottom-up search",
13909
+ DEBUG("%s: GC's id %" PRIaTXN " is present, going to first",
13867
13910
  dbg_prefix_mode, ctx->rid);
13868
- ++ctx->rid;
13869
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_FIRST);
13870
- if (rc == MDBX_NOTFOUND) {
13871
- DEBUG("%s: GC is empty (going dense-mode)", dbg_prefix_mode);
13872
- ctx->dense = true;
13873
- break;
13874
- }
13911
+ rc = cursor_first(&ctx->cursor, &key, nullptr);
13875
13912
  if (unlikely(rc != MDBX_SUCCESS ||
13876
13913
  key.iov_len != sizeof(txnid_t))) {
13877
13914
  rc = MDBX_CORRUPTED;
13878
13915
  goto bailout;
13879
13916
  }
13880
- txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13917
+ const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13881
13918
  if (gc_first <= MIN_TXNID) {
13882
13919
  DEBUG("%s: no free GC's id(s) less than %" PRIaTXN
13883
13920
  " (going dense-mode)",
@@ -13925,13 +13962,13 @@ retry:
13925
13962
  tASSERT(txn, txn->tw.lifo_reclaimed == NULL);
13926
13963
  if (unlikely(ctx->rid == 0)) {
13927
13964
  ctx->rid = txn_oldest_reader(txn);
13928
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, NULL, MDBX_FIRST);
13929
- if (rc == MDBX_SUCCESS) {
13965
+ rc = cursor_first(&ctx->cursor, &key, nullptr);
13966
+ if (likely(rc == MDBX_SUCCESS)) {
13930
13967
  if (unlikely(key.iov_len != sizeof(txnid_t))) {
13931
13968
  rc = MDBX_CORRUPTED;
13932
13969
  goto bailout;
13933
13970
  }
13934
- txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13971
+ const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
13935
13972
  if (ctx->rid >= gc_first)
13936
13973
  ctx->rid = gc_first - 1;
13937
13974
  if (unlikely(ctx->rid == 0)) {
@@ -14022,7 +14059,7 @@ retry:
14022
14059
  TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk,
14023
14060
  ctx->settled + 1, ctx->settled + chunk + 1, reservation_gc_id);
14024
14061
  gcu_prepare_backlog(txn, ctx, true);
14025
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data,
14062
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
14026
14063
  MDBX_RESERVE | MDBX_NOOVERWRITE);
14027
14064
  tASSERT(txn, pnl_check_allocated(txn->tw.relist,
14028
14065
  txn->mt_next_pgno - MDBX_ENABLE_REFUND));
@@ -14070,7 +14107,7 @@ retry:
14070
14107
  size_t left = amount;
14071
14108
  if (txn->tw.lifo_reclaimed == nullptr) {
14072
14109
  tASSERT(txn, ctx->lifo == 0);
14073
- rc = cursor_first(&ctx->cursor.outer, &key, &data);
14110
+ rc = cursor_first(&ctx->cursor, &key, &data);
14074
14111
  if (unlikely(rc != MDBX_SUCCESS))
14075
14112
  goto bailout;
14076
14113
  } else {
@@ -14104,7 +14141,7 @@ retry:
14104
14141
  dbg_prefix_mode, fill_gc_id, ctx->filled_slot);
14105
14142
  key.iov_base = &fill_gc_id;
14106
14143
  key.iov_len = sizeof(fill_gc_id);
14107
- rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_SET_KEY);
14144
+ rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
14108
14145
  if (unlikely(rc != MDBX_SUCCESS))
14109
14146
  goto bailout;
14110
14147
  }
@@ -14118,7 +14155,6 @@ retry:
14118
14155
  key.iov_len = sizeof(fill_gc_id);
14119
14156
 
14120
14157
  tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
14121
- txn->mt_flags += MDBX_TXN_FROZEN_RE;
14122
14158
  size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
14123
14159
  if (unlikely(chunk > left)) {
14124
14160
  TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk,
@@ -14126,14 +14162,11 @@ retry:
14126
14162
  if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) ||
14127
14163
  chunk - left > env->me_maxgc_ov1page) {
14128
14164
  data.iov_len = (left + 1) * sizeof(pgno_t);
14129
- if (ctx->loop < 7)
14130
- txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
14131
14165
  }
14132
14166
  chunk = left;
14133
14167
  }
14134
- rc = mdbx_cursor_put(&ctx->cursor.outer, &key, &data,
14168
+ rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
14135
14169
  MDBX_CURRENT | MDBX_RESERVE);
14136
- txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
14137
14170
  if (unlikely(rc != MDBX_SUCCESS))
14138
14171
  goto bailout;
14139
14172
  gcu_clean_reserved(env, data);
@@ -14182,7 +14215,7 @@ retry:
14182
14215
 
14183
14216
  if (txn->tw.lifo_reclaimed == nullptr) {
14184
14217
  tASSERT(txn, ctx->lifo == 0);
14185
- rc = cursor_next(&ctx->cursor.outer, &key, &data, MDBX_NEXT);
14218
+ rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT);
14186
14219
  if (unlikely(rc != MDBX_SUCCESS))
14187
14220
  goto bailout;
14188
14221
  } else {
@@ -14213,7 +14246,7 @@ retry:
14213
14246
  ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
14214
14247
 
14215
14248
  bailout:
14216
- txn->mt_cursors[FREE_DBI] = ctx->cursor.outer.mc_next;
14249
+ txn->mt_cursors[FREE_DBI] = ctx->cursor.mc_next;
14217
14250
 
14218
14251
  MDBX_PNL_SETSIZE(txn->tw.relist, 0);
14219
14252
  #if MDBX_ENABLE_PROFGC
@@ -14363,7 +14396,8 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14363
14396
  MDBX_PNL_SETSIZE(parent->tw.retired_pages, w);
14364
14397
 
14365
14398
  /* Filter-out parent spill list */
14366
- if (parent->tw.spill_pages && MDBX_PNL_GETSIZE(parent->tw.spill_pages) > 0) {
14399
+ if (parent->tw.spilled.list &&
14400
+ MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) {
14367
14401
  const MDBX_PNL sl = spill_purge(parent);
14368
14402
  size_t len = MDBX_PNL_GETSIZE(sl);
14369
14403
  if (len) {
@@ -14378,7 +14412,7 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14378
14412
  DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1);
14379
14413
  i -= 1;
14380
14414
  } while (i && sl[i] >= (parent->mt_next_pgno << 1));
14381
- MDBX_PNL_GETSIZE(sl) = i;
14415
+ MDBX_PNL_SETSIZE(sl, i);
14382
14416
  #else
14383
14417
  assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl));
14384
14418
  size_t i = 0;
@@ -14451,10 +14485,10 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14451
14485
  }
14452
14486
 
14453
14487
  /* Remove anything in our spill list from parent's dirty list */
14454
- if (txn->tw.spill_pages) {
14455
- tASSERT(txn, pnl_check_allocated(txn->tw.spill_pages,
14488
+ if (txn->tw.spilled.list) {
14489
+ tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list,
14456
14490
  (size_t)parent->mt_next_pgno << 1));
14457
- dpl_sift(parent, txn->tw.spill_pages, true);
14491
+ dpl_sift(parent, txn->tw.spilled.list, true);
14458
14492
  tASSERT(parent,
14459
14493
  parent->tw.dirtyroom + parent->tw.dirtylist->length ==
14460
14494
  (parent->mt_parent ? parent->mt_parent->tw.dirtyroom
@@ -14606,23 +14640,23 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
14606
14640
  tASSERT(parent, dirtylist_check(parent));
14607
14641
  dpl_free(txn);
14608
14642
 
14609
- if (txn->tw.spill_pages) {
14610
- if (parent->tw.spill_pages) {
14643
+ if (txn->tw.spilled.list) {
14644
+ if (parent->tw.spilled.list) {
14611
14645
  /* Must not fail since space was preserved above. */
14612
- pnl_merge(parent->tw.spill_pages, txn->tw.spill_pages);
14613
- pnl_free(txn->tw.spill_pages);
14646
+ pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list);
14647
+ pnl_free(txn->tw.spilled.list);
14614
14648
  } else {
14615
- parent->tw.spill_pages = txn->tw.spill_pages;
14616
- parent->tw.spill_least_removed = txn->tw.spill_least_removed;
14649
+ parent->tw.spilled.list = txn->tw.spilled.list;
14650
+ parent->tw.spilled.least_removed = txn->tw.spilled.least_removed;
14617
14651
  }
14618
14652
  tASSERT(parent, dirtylist_check(parent));
14619
14653
  }
14620
14654
 
14621
14655
  parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
14622
- if (parent->tw.spill_pages) {
14623
- assert(pnl_check_allocated(parent->tw.spill_pages,
14656
+ if (parent->tw.spilled.list) {
14657
+ assert(pnl_check_allocated(parent->tw.spilled.list,
14624
14658
  (size_t)parent->mt_next_pgno << 1));
14625
- if (MDBX_PNL_GETSIZE(parent->tw.spill_pages))
14659
+ if (MDBX_PNL_GETSIZE(parent->tw.spilled.list))
14626
14660
  parent->mt_flags |= MDBX_TXN_SPILLS;
14627
14661
  }
14628
14662
  }
@@ -14693,8 +14727,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
14693
14727
  sizeof(parent->mt_geo)) == 0);
14694
14728
  tASSERT(txn, memcmp(&parent->mt_canary, &txn->mt_canary,
14695
14729
  sizeof(parent->mt_canary)) == 0);
14696
- tASSERT(txn, !txn->tw.spill_pages ||
14697
- MDBX_PNL_GETSIZE(txn->tw.spill_pages) == 0);
14730
+ tASSERT(txn, !txn->tw.spilled.list ||
14731
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0);
14698
14732
  tASSERT(txn, txn->tw.loose_count == 0);
14699
14733
 
14700
14734
  /* fast completion of pure nested transaction */
@@ -14714,10 +14748,10 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
14714
14748
  goto fail;
14715
14749
  }
14716
14750
 
14717
- if (txn->tw.spill_pages) {
14718
- if (parent->tw.spill_pages) {
14719
- rc = pnl_need(&parent->tw.spill_pages,
14720
- MDBX_PNL_GETSIZE(txn->tw.spill_pages));
14751
+ if (txn->tw.spilled.list) {
14752
+ if (parent->tw.spilled.list) {
14753
+ rc = pnl_need(&parent->tw.spilled.list,
14754
+ MDBX_PNL_GETSIZE(txn->tw.spilled.list));
14721
14755
  if (unlikely(rc != MDBX_SUCCESS))
14722
14756
  goto fail;
14723
14757
  }
@@ -15837,7 +15871,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
15837
15871
  }
15838
15872
 
15839
15873
  uint64_t timestamp = 0;
15840
- while ("workaround for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
15874
+ while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
15841
15875
  rc =
15842
15876
  coherency_check_written(env, pending->unsafe_txnid, target, &timestamp);
15843
15877
  if (likely(rc == MDBX_SUCCESS))
@@ -16359,7 +16393,7 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
16359
16393
 
16360
16394
  uint64_t timestamp = 0;
16361
16395
  while ("workaround for "
16362
- "https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
16396
+ "https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
16363
16397
  meta = *head.ptr_c;
16364
16398
  rc = coherency_check_readed(env, head.txnid, meta.mm_dbs, &meta,
16365
16399
  &timestamp);
@@ -17503,13 +17537,13 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx,
17503
17537
  }
17504
17538
  #else
17505
17539
  struct stat st;
17506
- if (stat(pathname, &st)) {
17540
+ if (stat(pathname, &st) != 0) {
17507
17541
  rc = errno;
17508
17542
  if (rc != MDBX_ENOFILE)
17509
17543
  return rc;
17510
17544
  if (mode == 0 || (*flags & MDBX_RDONLY) != 0)
17511
17545
  /* can't open existing */
17512
- return rc;
17546
+ return rc /* MDBX_ENOFILE */;
17513
17547
 
17514
17548
  /* auto-create directory if requested */
17515
17549
  const mdbx_mode_t dir_mode =
@@ -17702,7 +17736,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
17702
17736
  } else {
17703
17737
  #if MDBX_MMAP_INCOHERENT_FILE_WRITE
17704
17738
  /* Temporary `workaround` for OpenBSD kernel's flaw.
17705
- * See https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/67 */
17739
+ * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */
17706
17740
  if ((flags & MDBX_WRITEMAP) == 0) {
17707
17741
  if (flags & MDBX_ACCEDE)
17708
17742
  flags |= MDBX_WRITEMAP;
@@ -18014,7 +18048,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
18014
18048
  }
18015
18049
 
18016
18050
  if ((flags & MDBX_RDONLY) == 0) {
18017
- const size_t tsize = sizeof(MDBX_txn),
18051
+ const size_t tsize = sizeof(MDBX_txn) + sizeof(MDBX_cursor),
18018
18052
  size = tsize + env->me_maxdbs *
18019
18053
  (sizeof(MDBX_db) + sizeof(MDBX_cursor *) +
18020
18054
  sizeof(MDBX_atomic_uint32_t) + 1);
@@ -18139,9 +18173,10 @@ __cold static int env_close(MDBX_env *env) {
18139
18173
  }
18140
18174
 
18141
18175
  if (env->me_dbxs) {
18142
- for (size_t i = env->me_numdbs; --i >= CORE_DBS;)
18176
+ for (size_t i = CORE_DBS; i < env->me_numdbs; ++i)
18143
18177
  osal_free(env->me_dbxs[i].md_name.iov_base);
18144
18178
  osal_free(env->me_dbxs);
18179
+ env->me_numdbs = CORE_DBS;
18145
18180
  env->me_dbxs = nullptr;
18146
18181
  }
18147
18182
  if (env->me_pbuf) {
@@ -18164,7 +18199,7 @@ __cold static int env_close(MDBX_env *env) {
18164
18199
  dpl_free(env->me_txn0);
18165
18200
  txl_free(env->me_txn0->tw.lifo_reclaimed);
18166
18201
  pnl_free(env->me_txn0->tw.retired_pages);
18167
- pnl_free(env->me_txn0->tw.spill_pages);
18202
+ pnl_free(env->me_txn0->tw.spilled.list);
18168
18203
  pnl_free(env->me_txn0->tw.relist);
18169
18204
  osal_free(env->me_txn0);
18170
18205
  env->me_txn0 = nullptr;
@@ -18907,7 +18942,8 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
18907
18942
  if (!MDBX_DISABLE_VALIDATION) {
18908
18943
  const MDBX_env *env = mc->mc_txn->mt_env;
18909
18944
  const size_t dsize = data->iov_len;
18910
- if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax))
18945
+ if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax) &&
18946
+ mc->mc_dbi != FREE_DBI)
18911
18947
  poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
18912
18948
  const unsigned npages = number_of_ovpages(env, dsize);
18913
18949
  if (unlikely(lp.page->mp_pages != npages)) {
@@ -18915,7 +18951,7 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
18915
18951
  return bad_page(lp.page,
18916
18952
  "too less n-pages %u for bigdata-node (%zu bytes)",
18917
18953
  lp.page->mp_pages, dsize);
18918
- else
18954
+ else if (mc->mc_dbi != FREE_DBI)
18919
18955
  poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)",
18920
18956
  lp.page->mp_pages, dsize);
18921
18957
  }
@@ -20011,7 +20047,6 @@ static int touch_dbi(MDBX_cursor *mc) {
20011
20047
  *mc->mc_dbistate |= DBI_DIRTY;
20012
20048
  mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
20013
20049
  if (mc->mc_dbi >= CORE_DBS) {
20014
- cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_UPDATE_GC) == 0);
20015
20050
  /* Touch DB record of named DB */
20016
20051
  MDBX_cursor_couple cx;
20017
20052
  int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI);
@@ -20424,9 +20459,9 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
20424
20459
 
20425
20460
  /* Large/Overflow page overwrites need special handling */
20426
20461
  if (unlikely(node_flags(node) & F_BIGDATA)) {
20427
- int dpages = (node_size(key, data) > env->me_leaf_nodemax)
20428
- ? number_of_ovpages(env, data->iov_len)
20429
- : 0;
20462
+ const size_t dpages = (node_size(key, data) > env->me_leaf_nodemax)
20463
+ ? number_of_ovpages(env, data->iov_len)
20464
+ : 0;
20430
20465
 
20431
20466
  const pgno_t pgno = node_largedata_pgno(node);
20432
20467
  pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid);
@@ -20435,13 +20470,13 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
20435
20470
  cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW);
20436
20471
 
20437
20472
  /* Is the ov page from this txn (or a parent) and big enough? */
20438
- int ovpages = lp.page->mp_pages;
20439
- if (!IS_FROZEN(mc->mc_txn, lp.page) &&
20440
- (unlikely(mc->mc_txn->mt_flags & MDBX_TXN_FROZEN_RE)
20441
- ? (ovpages >= dpages)
20442
- : (ovpages ==
20443
- /* LY: add configurable threshold to keep reserve space */
20444
- dpages))) {
20473
+ const size_t ovpages = lp.page->mp_pages;
20474
+ const size_t extra_threshold =
20475
+ (mc->mc_dbi == FREE_DBI)
20476
+ ? 1
20477
+ : /* LY: add configurable threshold to keep reserve space */ 0;
20478
+ if (!IS_FROZEN(mc->mc_txn, lp.page) && ovpages >= dpages &&
20479
+ ovpages <= dpages + extra_threshold) {
20445
20480
  /* yes, overwrite it. */
20446
20481
  if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) {
20447
20482
  if (IS_SPILLED(mc->mc_txn, lp.page)) {
@@ -20972,7 +21007,6 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
20972
21007
 
20973
21008
  DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno);
20974
21009
  ret.page->mp_flags = (uint16_t)flags;
20975
- ret.page->mp_txnid = mc->mc_txn->mt_front;
20976
21010
  cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
20977
21011
  cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
20978
21012
  #if MDBX_ENABLE_PGOP_STAT
@@ -20994,25 +21028,24 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
20994
21028
  return ret;
20995
21029
  }
20996
21030
 
20997
- static pgr_t page_new_large(MDBX_cursor *mc, const unsigned npages) {
21031
+ static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) {
20998
21032
  pgr_t ret = likely(npages == 1)
20999
21033
  ? page_alloc(mc)
21000
- : page_alloc_slowpath(mc, npages, MDBX_ALLOC_ALL);
21034
+ : page_alloc_slowpath(mc, npages, MDBX_ALLOC_DEFAULT);
21001
21035
  if (unlikely(ret.err != MDBX_SUCCESS))
21002
21036
  return ret;
21003
21037
 
21004
- DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %u", mc->mc_dbi,
21038
+ DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi,
21005
21039
  ret.page->mp_pgno, npages);
21006
21040
  ret.page->mp_flags = P_OVERFLOW;
21007
- ret.page->mp_txnid = mc->mc_txn->mt_front;
21008
21041
  cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
21009
21042
  cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
21010
21043
  #if MDBX_ENABLE_PGOP_STAT
21011
21044
  mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages;
21012
21045
  #endif /* MDBX_ENABLE_PGOP_STAT */
21013
21046
 
21014
- mc->mc_db->md_overflow_pages += npages;
21015
- ret.page->mp_pages = npages;
21047
+ mc->mc_db->md_overflow_pages += (pgno_t)npages;
21048
+ ret.page->mp_pages = (pgno_t)npages;
21016
21049
  cASSERT(mc, !(mc->mc_flags & C_SUB));
21017
21050
  return ret;
21018
21051
  }
@@ -21109,7 +21142,6 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21109
21142
  key ? key->iov_len : 0, DKEY_DEBUG(key));
21110
21143
  cASSERT(mc, key != NULL && data != NULL);
21111
21144
  cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF);
21112
- cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
21113
21145
  MDBX_page *largepage = NULL;
21114
21146
 
21115
21147
  size_t node_bytes;
@@ -21118,6 +21150,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21118
21150
  STATIC_ASSERT(sizeof(pgno_t) % 2 == 0);
21119
21151
  node_bytes =
21120
21152
  node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
21153
+ cASSERT(mc, page_room(mp) >= node_bytes);
21121
21154
  } else if (unlikely(node_size(key, data) >
21122
21155
  mc->mc_txn->mt_env->me_leaf_nodemax)) {
21123
21156
  /* Put data on large/overflow page. */
@@ -21131,6 +21164,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21131
21164
  flags);
21132
21165
  return MDBX_PROBLEM;
21133
21166
  }
21167
+ cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
21134
21168
  const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len);
21135
21169
  const pgr_t npr = page_new_large(mc, ovpages);
21136
21170
  if (unlikely(npr.err != MDBX_SUCCESS))
@@ -21142,10 +21176,12 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
21142
21176
  flags |= F_BIGDATA;
21143
21177
  node_bytes =
21144
21178
  node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
21179
+ cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
21145
21180
  } else {
21181
+ cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
21146
21182
  node_bytes = node_size(key, data) + sizeof(indx_t);
21183
+ cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
21147
21184
  }
21148
- cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
21149
21185
 
21150
21186
  /* Move higher pointers up one slot. */
21151
21187
  const size_t nkeys = page_numkeys(mp);
@@ -22886,7 +22922,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
22886
22922
  "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
22887
22923
  dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
22888
22924
  if (unlikely(node_size_len(node_ks(node), dsize) <=
22889
- mc->mc_txn->mt_env->me_leaf_nodemax))
22925
+ mc->mc_txn->mt_env->me_leaf_nodemax) &&
22926
+ mc->mc_dbi != FREE_DBI)
22890
22927
  poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
22891
22928
 
22892
22929
  if ((mc->mc_checking & CC_RETIRING) == 0) {
@@ -22901,7 +22938,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
22901
22938
  rc = bad_page(lp.page,
22902
22939
  "too less n-pages %u for bigdata-node (%zu bytes)",
22903
22940
  lp.page->mp_pages, dsize);
22904
- else
22941
+ else if (mc->mc_dbi != FREE_DBI)
22905
22942
  poor_page(lp.page,
22906
22943
  "extra n-pages %u for bigdata-node (%zu bytes)",
22907
22944
  lp.page->mp_pages, dsize);
@@ -23327,7 +23364,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
23327
23364
  int rc = MDBX_SUCCESS, foliage = 0;
23328
23365
  size_t i, ptop;
23329
23366
  MDBX_env *const env = mc->mc_txn->mt_env;
23330
- MDBX_val sepkey, rkey, xdata;
23367
+ MDBX_val rkey, xdata;
23331
23368
  MDBX_page *tmp_ki_copy = NULL;
23332
23369
  DKBUF;
23333
23370
 
@@ -23419,6 +23456,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
23419
23456
  eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1);
23420
23457
 
23421
23458
  cASSERT(mc, !IS_BRANCH(mp) || newindx > 0);
23459
+ MDBX_val sepkey = {nullptr, 0};
23422
23460
  /* It is reasonable and possible to split the page at the begin */
23423
23461
  if (unlikely(newindx < minkeys)) {
23424
23462
  split_indx = minkeys;
@@ -23751,7 +23789,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
23751
23789
  break;
23752
23790
  }
23753
23791
  }
23754
- } else if (!IS_LEAF2(mp)) {
23792
+ } else if (tmp_ki_copy /* !IS_LEAF2(mp) */) {
23755
23793
  /* Move nodes */
23756
23794
  mc->mc_pg[mc->mc_top] = sister;
23757
23795
  i = split_indx;
@@ -25053,7 +25091,7 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn,
25053
25091
  const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
25054
25092
 
25055
25093
  /* is the environment open?
25056
- * (https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/171) */
25094
+ * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */
25057
25095
  if (unlikely(!env->me_map)) {
25058
25096
  /* environment not yet opened */
25059
25097
  #if 1
@@ -27864,7 +27902,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn,
27864
27902
  if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) {
27865
27903
  rss.rlim_cur = estimated_rss;
27866
27904
  if (rss.rlim_max < estimated_rss)
27867
- rss.rlim_max = used_range;
27905
+ rss.rlim_max = estimated_rss;
27868
27906
  if (setrlimit(RLIMIT_RSS, &rss)) {
27869
27907
  rc = errno;
27870
27908
  WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS",
@@ -29696,7 +29734,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
29696
29734
  flags |= O_CLOEXEC;
29697
29735
  #endif /* O_CLOEXEC */
29698
29736
 
29699
- /* Safeguard for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/144 */
29737
+ /* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
29700
29738
  #if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
29701
29739
  int stub_fd0 = -1, stub_fd1 = -1, stub_fd2 = -1;
29702
29740
  static const char dev_null[] = "/dev/null";
@@ -29734,7 +29772,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
29734
29772
  errno = EACCES /* restore errno if file exists */;
29735
29773
  }
29736
29774
 
29737
- /* Safeguard for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/144 */
29775
+ /* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
29738
29776
  #if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
29739
29777
  if (*fd == STDIN_FILENO) {
29740
29778
  WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "IN",
@@ -30091,10 +30129,15 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
30091
30129
  return (int)GetLastError();
30092
30130
  #else
30093
30131
  #if defined(__linux__) || defined(__gnu_linux__)
30094
- assert(linux_kernel_version > 0x02061300);
30095
30132
  /* Since Linux 2.6.19, MS_ASYNC is in fact a no-op. The kernel properly
30096
- * tracks dirty pages and flushes them to storage as necessary. */
30097
- return MDBX_SUCCESS;
30133
+ * tracks dirty pages and flushes ones as necessary. */
30134
+ //
30135
+ // However, this behavior may be changed in custom kernels,
30136
+ // so just leave such optimization to the libc discretion.
30137
+ //
30138
+ // assert(linux_kernel_version > 0x02061300);
30139
+ // if (mode_bits == MDBX_SYNC_NONE)
30140
+ // return MDBX_SUCCESS;
30098
30141
  #endif /* Linux */
30099
30142
  if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
30100
30143
  return errno;
@@ -30577,7 +30620,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
30577
30620
  VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
30578
30621
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30579
30622
  * when this memory will re-used by malloc or another mmapping.
30580
- * See https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
30623
+ * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30581
30624
  */
30582
30625
  MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address,
30583
30626
  (map->filesize && map->filesize < map->limit)
@@ -30656,7 +30699,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
30656
30699
 
30657
30700
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30658
30701
  * when this memory will re-used by malloc or another mmapping.
30659
- * See https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
30702
+ * See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30660
30703
  */
30661
30704
  MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
30662
30705
  status = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
@@ -30937,7 +30980,7 @@ retry_mapview:;
30937
30980
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30938
30981
  * when this memory will re-used by malloc or another mmapping.
30939
30982
  * See
30940
- * https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
30983
+ * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30941
30984
  */
30942
30985
  MDBX_ASAN_UNPOISON_MEMORY_REGION(
30943
30986
  map->address,
@@ -30959,7 +31002,7 @@ retry_mapview:;
30959
31002
  /* Unpoisoning is required for ASAN to avoid false-positive diagnostic
30960
31003
  * when this memory will re-used by malloc or another mmapping.
30961
31004
  * See
30962
- * https://web.archive.org/web/https://github.com/erthink/libmdbx/pull/93#issuecomment-613687203
31005
+ * https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
30963
31006
  */
30964
31007
  MDBX_ASAN_UNPOISON_MEMORY_REGION(
30965
31008
  map->address, (map->current < map->limit) ? map->current : map->limit);
@@ -31782,9 +31825,9 @@ __dll_export
31782
31825
  0,
31783
31826
  12,
31784
31827
  2,
31785
- 0,
31786
- {"2022-11-11T17:35:32+03:00", "cd8aa216aff5c70b45bd3afd46d417a95126dcc3", "9b062cf0c7d41297f756c7f7b897ed981022bdbf",
31787
- "v0.12.2-0-g9b062cf0"},
31828
+ 18,
31829
+ {"2022-11-28T15:45:29+03:00", "9558651eb24ab172a73a7bc6149cadad4c4df990", "b3248442962cfdda728656d6d9085147a7d42b63",
31830
+ "v0.12.2-18-gb3248442"},
31788
31831
  sourcery};
31789
31832
 
31790
31833
  __dll_export