isomorfeus-ferret 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +170 -48
- data/ext/isomorfeus_ferret_ext/frb_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/frb_store.c +231 -108
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +114 -56
- data/ext/isomorfeus_ferret_ext/frt_store.h +0 -9
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +2 -2
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +656 -613
- data/ext/isomorfeus_ferret_ext/test.c +26 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_segments.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +2 -2
- data/ext/isomorfeus_ferret_ext/tests_all.h +0 -3
- data/lib/isomorfeus/ferret/index/index.rb +8 -9
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +4 -6
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +0 -479
- data/ext/isomorfeus_ferret_ext/test_fs_store.c +0 -25
@@ -12,7 +12,7 @@
|
|
12
12
|
* <http://www.OpenLDAP.org/license.html>. */
|
13
13
|
|
14
14
|
#define xMDBX_ALLOY 1
|
15
|
-
#define MDBX_BUILD_SOURCERY
|
15
|
+
#define MDBX_BUILD_SOURCERY 56f8a04f0668bb80d0d3f24fd2c9958d9aeb83004b65badfd5ccfa80647a2218_v0_12_2_18_gb3248442
|
16
16
|
#ifdef MDBX_CONFIG_H
|
17
17
|
#include MDBX_CONFIG_H
|
18
18
|
#endif
|
@@ -428,14 +428,13 @@ __extern_C key_t ftok(const char *, int);
|
|
428
428
|
/* Byteorder */
|
429
429
|
|
430
430
|
#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
|
431
|
-
defined(i486) || defined(__i486) || defined(__i486__) ||
|
432
|
-
defined(
|
433
|
-
defined(
|
434
|
-
defined(
|
435
|
-
defined(
|
436
|
-
defined(__x86_64) || defined(__x86_64__) || \
|
431
|
+
defined(i486) || defined(__i486) || defined(__i486__) || defined(i586) || \
|
432
|
+
defined(__i586) || defined(__i586__) || defined(i686) || \
|
433
|
+
defined(__i686) || defined(__i686__) || defined(_M_IX86) || \
|
434
|
+
defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \
|
435
|
+
defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \
|
437
436
|
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
|
438
|
-
defined(_M_AMD64) || defined(__IA32__)
|
437
|
+
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
|
439
438
|
#ifndef __ia32__
|
440
439
|
/* LY: define neutral __ia32__ for x86 and x86-64 */
|
441
440
|
#define __ia32__ 1
|
@@ -3138,13 +3137,9 @@ struct MDBX_txn {
|
|
3138
3137
|
/* Additional flag for sync_locked() */
|
3139
3138
|
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
3140
3139
|
|
3141
|
-
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
3142
|
-
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
3143
|
-
|
3144
3140
|
#define TXN_FLAGS \
|
3145
3141
|
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
3146
|
-
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID
|
3147
|
-
MDBX_TXN_FROZEN_RE)
|
3142
|
+
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
3148
3143
|
|
3149
3144
|
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
3150
3145
|
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
@@ -3226,11 +3221,16 @@ struct MDBX_txn {
|
|
3226
3221
|
MDBX_page *loose_pages;
|
3227
3222
|
/* Number of loose pages (tw.loose_pages) */
|
3228
3223
|
size_t loose_count;
|
3229
|
-
|
3230
|
-
|
3231
|
-
|
3232
|
-
|
3233
|
-
|
3224
|
+
union {
|
3225
|
+
struct {
|
3226
|
+
size_t least_removed;
|
3227
|
+
/* The sorted list of dirty pages we temporarily wrote to disk
|
3228
|
+
* because the dirty list was full. page numbers in here are
|
3229
|
+
* shifted left by 1, deleted slots have the LSB set. */
|
3230
|
+
MDBX_PNL list;
|
3231
|
+
} spilled;
|
3232
|
+
size_t writemap_dirty_npages;
|
3233
|
+
};
|
3234
3234
|
} tw;
|
3235
3235
|
};
|
3236
3236
|
};
|
@@ -3280,6 +3280,9 @@ struct MDBX_cursor {
|
|
3280
3280
|
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
3281
3281
|
#define C_DEL 0x08 /* last op was a cursor_del */
|
3282
3282
|
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
3283
|
+
#define C_GCU \
|
3284
|
+
0x20 /* Происходит подготовка к обновлению GC, поэтому \
|
3285
|
+
* можно брать страницы из GC даже для FREE_DBI */
|
3283
3286
|
uint8_t mc_flags;
|
3284
3287
|
|
3285
3288
|
/* Cursor checking flags. */
|
@@ -4643,7 +4646,7 @@ __cold static const char *pagetype_caption(const uint8_t type,
|
|
4643
4646
|
}
|
4644
4647
|
}
|
4645
4648
|
|
4646
|
-
__cold static
|
4649
|
+
__cold static int MDBX_PRINTF_ARGS(2, 3)
|
4647
4650
|
bad_page(const MDBX_page *mp, const char *fmt, ...) {
|
4648
4651
|
if (LOG_ENABLED(MDBX_LOG_ERROR)) {
|
4649
4652
|
static const MDBX_page *prev;
|
@@ -5257,7 +5260,7 @@ __cold void thread_dtor(void *rthc) {
|
|
5257
5260
|
if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) {
|
5258
5261
|
TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(),
|
5259
5262
|
__Wpedantic_format_voidptr(reader));
|
5260
|
-
atomic_cas32(&reader->mr_pid, self_pid, 0);
|
5263
|
+
(void)atomic_cas32(&reader->mr_pid, self_pid, 0);
|
5261
5264
|
}
|
5262
5265
|
}
|
5263
5266
|
|
@@ -6346,50 +6349,51 @@ __hot static size_t pnl_merge(MDBX_PNL dst, const MDBX_PNL src) {
|
|
6346
6349
|
return total;
|
6347
6350
|
}
|
6348
6351
|
|
6349
|
-
static void spill_remove(MDBX_txn *txn, size_t idx,
|
6350
|
-
tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.
|
6351
|
-
txn->tw.
|
6352
|
-
txn->tw.
|
6353
|
-
|
6354
|
-
|
6355
|
-
|
6356
|
-
|
6357
|
-
|
6352
|
+
static void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) {
|
6353
|
+
tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) &&
|
6354
|
+
txn->tw.spilled.least_removed > 0);
|
6355
|
+
txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
|
6356
|
+
? idx
|
6357
|
+
: txn->tw.spilled.least_removed;
|
6358
|
+
txn->tw.spilled.list[idx] |= 1;
|
6359
|
+
MDBX_PNL_SETSIZE(txn->tw.spilled.list,
|
6360
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
|
6361
|
+
(idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
|
6358
6362
|
|
6359
6363
|
while (unlikely(npages > 1)) {
|
6360
|
-
const pgno_t pgno = (txn->tw.
|
6364
|
+
const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1;
|
6361
6365
|
if (MDBX_PNL_ASCENDING) {
|
6362
|
-
if (++idx > MDBX_PNL_GETSIZE(txn->tw.
|
6363
|
-
(txn->tw.
|
6366
|
+
if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) ||
|
6367
|
+
(txn->tw.spilled.list[idx] >> 1) != pgno)
|
6364
6368
|
return;
|
6365
6369
|
} else {
|
6366
|
-
if (--idx < 1 || (txn->tw.
|
6370
|
+
if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno)
|
6367
6371
|
return;
|
6368
|
-
txn->tw.
|
6369
|
-
|
6370
|
-
|
6371
|
-
}
|
6372
|
-
txn->tw.
|
6373
|
-
MDBX_PNL_SETSIZE(txn->tw.
|
6374
|
-
MDBX_PNL_GETSIZE(txn->tw.
|
6375
|
-
(idx == MDBX_PNL_GETSIZE(txn->tw.
|
6372
|
+
txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
|
6373
|
+
? idx
|
6374
|
+
: txn->tw.spilled.least_removed;
|
6375
|
+
}
|
6376
|
+
txn->tw.spilled.list[idx] |= 1;
|
6377
|
+
MDBX_PNL_SETSIZE(txn->tw.spilled.list,
|
6378
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
|
6379
|
+
(idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
|
6376
6380
|
--npages;
|
6377
6381
|
}
|
6378
6382
|
}
|
6379
6383
|
|
6380
6384
|
static MDBX_PNL spill_purge(MDBX_txn *txn) {
|
6381
|
-
tASSERT(txn, txn->tw.
|
6382
|
-
const MDBX_PNL sl = txn->tw.
|
6383
|
-
if (txn->tw.
|
6385
|
+
tASSERT(txn, txn->tw.spilled.least_removed > 0);
|
6386
|
+
const MDBX_PNL sl = txn->tw.spilled.list;
|
6387
|
+
if (txn->tw.spilled.least_removed != INT_MAX) {
|
6384
6388
|
size_t len = MDBX_PNL_GETSIZE(sl), r, w;
|
6385
|
-
for (w = r = txn->tw.
|
6389
|
+
for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) {
|
6386
6390
|
sl[w] = sl[r];
|
6387
6391
|
w += 1 - (sl[r] & 1);
|
6388
6392
|
}
|
6389
6393
|
for (size_t i = 1; i < w; ++i)
|
6390
6394
|
tASSERT(txn, (sl[i] & 1) == 0);
|
6391
6395
|
MDBX_PNL_SETSIZE(sl, w - 1);
|
6392
|
-
txn->tw.
|
6396
|
+
txn->tw.spilled.least_removed = INT_MAX;
|
6393
6397
|
} else {
|
6394
6398
|
for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i)
|
6395
6399
|
tASSERT(txn, (sl[i] & 1) == 0);
|
@@ -6445,7 +6449,8 @@ static __inline size_t pnl_search(const MDBX_PNL pnl, pgno_t pgno,
|
|
6445
6449
|
}
|
6446
6450
|
|
6447
6451
|
static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
|
6448
|
-
|
6452
|
+
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6453
|
+
const MDBX_PNL pnl = txn->tw.spilled.list;
|
6449
6454
|
if (likely(!pnl))
|
6450
6455
|
return 0;
|
6451
6456
|
pgno <<= 1;
|
@@ -6454,8 +6459,8 @@ static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
|
|
6454
6459
|
}
|
6455
6460
|
|
6456
6461
|
static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
|
6457
|
-
|
6458
|
-
const MDBX_PNL pnl = txn->tw.
|
6462
|
+
size_t npages) {
|
6463
|
+
const MDBX_PNL pnl = txn->tw.spilled.list;
|
6459
6464
|
if (likely(!pnl))
|
6460
6465
|
return false;
|
6461
6466
|
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
@@ -6467,7 +6472,7 @@ static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
|
|
6467
6472
|
DEBUG_EXTRA_PRINT("%s\n", "]");
|
6468
6473
|
}
|
6469
6474
|
const pgno_t spilled_range_begin = pgno << 1;
|
6470
|
-
const pgno_t spilled_range_last = ((pgno + npages) << 1) - 1;
|
6475
|
+
const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1;
|
6471
6476
|
#if MDBX_PNL_ASCENDING
|
6472
6477
|
const size_t n =
|
6473
6478
|
pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1);
|
@@ -6831,7 +6836,7 @@ dpl_endpgno(const MDBX_dpl *dl, size_t i) {
|
|
6831
6836
|
}
|
6832
6837
|
|
6833
6838
|
static __inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno,
|
6834
|
-
|
6839
|
+
size_t npages) {
|
6835
6840
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
6836
6841
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6837
6842
|
|
@@ -6889,7 +6894,7 @@ MDBX_MAYBE_UNUSED static const MDBX_page *debug_dpl_find(const MDBX_txn *txn,
|
|
6889
6894
|
return nullptr;
|
6890
6895
|
}
|
6891
6896
|
|
6892
|
-
static void dpl_remove_ex(const MDBX_txn *txn, size_t i,
|
6897
|
+
static void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) {
|
6893
6898
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
6894
6899
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6895
6900
|
|
@@ -6911,7 +6916,7 @@ static void dpl_remove(const MDBX_txn *txn, size_t i) {
|
|
6911
6916
|
static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
|
6912
6917
|
pgno_t pgno,
|
6913
6918
|
MDBX_page *page,
|
6914
|
-
|
6919
|
+
size_t npages) {
|
6915
6920
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
6916
6921
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6917
6922
|
MDBX_dpl *dl = txn->tw.dirtylist;
|
@@ -6980,7 +6985,7 @@ static __must_check_result __inline int page_retire(MDBX_cursor *mc,
|
|
6980
6985
|
MDBX_page *mp);
|
6981
6986
|
|
6982
6987
|
static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
6983
|
-
|
6988
|
+
size_t npages);
|
6984
6989
|
typedef struct page_result {
|
6985
6990
|
MDBX_page *page;
|
6986
6991
|
int err;
|
@@ -6989,7 +6994,7 @@ typedef struct page_result {
|
|
6989
6994
|
static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard);
|
6990
6995
|
|
6991
6996
|
static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
|
6992
|
-
static pgr_t page_new_large(MDBX_cursor *mc, const
|
6997
|
+
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
|
6993
6998
|
static int page_touch(MDBX_cursor *mc);
|
6994
6999
|
static int cursor_touch(MDBX_cursor *mc);
|
6995
7000
|
static int touch_dbi(MDBX_cursor *mc);
|
@@ -7588,7 +7593,7 @@ static MDBX_page *page_malloc(MDBX_txn *txn, size_t num) {
|
|
7588
7593
|
}
|
7589
7594
|
|
7590
7595
|
/* Free a shadow dirty page */
|
7591
|
-
static void dpage_free(MDBX_env *env, MDBX_page *dp,
|
7596
|
+
static void dpage_free(MDBX_env *env, MDBX_page *dp, size_t npages) {
|
7592
7597
|
VALGRIND_MAKE_MEM_UNDEFINED(dp, pgno2bytes(env, npages));
|
7593
7598
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages));
|
7594
7599
|
if (unlikely(env->me_flags & MDBX_PAGEPERTURB))
|
@@ -7910,7 +7915,7 @@ static bool txn_refund(MDBX_txn *txn) {
|
|
7910
7915
|
if (before == txn->mt_next_pgno)
|
7911
7916
|
return false;
|
7912
7917
|
|
7913
|
-
if (txn->tw.
|
7918
|
+
if (txn->tw.spilled.list)
|
7914
7919
|
/* Squash deleted pagenums if we refunded any */
|
7915
7920
|
spill_purge(txn);
|
7916
7921
|
|
@@ -7925,9 +7930,9 @@ static __inline bool txn_refund(MDBX_txn *txn) {
|
|
7925
7930
|
#endif /* MDBX_ENABLE_REFUND */
|
7926
7931
|
|
7927
7932
|
__cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
|
7928
|
-
|
7933
|
+
size_t npages) {
|
7929
7934
|
MDBX_env *const env = txn->mt_env;
|
7930
|
-
DEBUG("kill %
|
7935
|
+
DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno);
|
7931
7936
|
eASSERT(env, pgno >= NUM_METAS && npages);
|
7932
7937
|
if (!IS_FROZEN(txn, mp)) {
|
7933
7938
|
const size_t bytes = pgno2bytes(env, npages);
|
@@ -7954,7 +7959,7 @@ __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
|
|
7954
7959
|
|
7955
7960
|
/* Remove page from dirty list */
|
7956
7961
|
static __inline void page_wash(MDBX_txn *txn, const size_t di,
|
7957
|
-
MDBX_page *const mp, const
|
7962
|
+
MDBX_page *const mp, const size_t npages) {
|
7958
7963
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
7959
7964
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
7960
7965
|
tASSERT(txn, di && di <= txn->tw.dirtylist->length &&
|
@@ -8003,7 +8008,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
|
|
8003
8008
|
* So for flexibility and avoid extra internal dependencies we just
|
8004
8009
|
* fallback to reading if dirty list was not allocated yet. */
|
8005
8010
|
size_t di = 0, si = 0;
|
8006
|
-
|
8011
|
+
size_t npages = 1;
|
8007
8012
|
bool is_frozen = false, is_spilled = false, is_shadowed = false;
|
8008
8013
|
if (unlikely(!mp)) {
|
8009
8014
|
if (ASSERT_ENABLED() && pageflags) {
|
@@ -8019,7 +8024,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
|
|
8019
8024
|
is_frozen = true;
|
8020
8025
|
if (ASSERT_ENABLED()) {
|
8021
8026
|
for (MDBX_txn *scan = txn; scan; scan = scan->mt_parent) {
|
8022
|
-
tASSERT(txn, !search_spilled(scan, pgno));
|
8027
|
+
tASSERT(txn, !txn->tw.spilled.list || !search_spilled(scan, pgno));
|
8023
8028
|
tASSERT(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno));
|
8024
8029
|
}
|
8025
8030
|
}
|
@@ -8064,7 +8069,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
|
|
8064
8069
|
is_shadowed = IS_SHADOWED(txn, mp);
|
8065
8070
|
if (is_dirty) {
|
8066
8071
|
tASSERT(txn, !is_spilled);
|
8067
|
-
tASSERT(txn, !search_spilled(txn, pgno));
|
8072
|
+
tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno));
|
8068
8073
|
tASSERT(txn, debug_dpl_find(txn, pgno) == mp || txn->mt_parent ||
|
8069
8074
|
(txn->mt_flags & MDBX_WRITEMAP));
|
8070
8075
|
} else {
|
@@ -8098,12 +8103,12 @@ status_done:
|
|
8098
8103
|
} else {
|
8099
8104
|
npages = mp->mp_pages;
|
8100
8105
|
cASSERT(mc, mc->mc_db->md_overflow_pages >= npages);
|
8101
|
-
mc->mc_db->md_overflow_pages -= npages;
|
8106
|
+
mc->mc_db->md_overflow_pages -= (pgno_t)npages;
|
8102
8107
|
}
|
8103
8108
|
|
8104
8109
|
if (is_frozen) {
|
8105
8110
|
retire:
|
8106
|
-
DEBUG("retire %
|
8111
|
+
DEBUG("retire %zu page %" PRIaPGNO, npages, pgno);
|
8107
8112
|
rc = pnl_append_range(false, &txn->tw.retired_pages, pgno, npages);
|
8108
8113
|
tASSERT(txn, dirtylist_check(txn));
|
8109
8114
|
return rc;
|
@@ -8154,7 +8159,7 @@ status_done:
|
|
8154
8159
|
}
|
8155
8160
|
tASSERT(txn, is_spilled || is_shadowed || (mp && IS_SHADOWED(txn, mp)));
|
8156
8161
|
}
|
8157
|
-
DEBUG("refunded %
|
8162
|
+
DEBUG("refunded %zu %s page %" PRIaPGNO, npages, kind, pgno);
|
8158
8163
|
txn->mt_next_pgno = pgno;
|
8159
8164
|
txn_refund(txn);
|
8160
8165
|
return MDBX_SUCCESS;
|
@@ -8223,7 +8228,7 @@ status_done:
|
|
8223
8228
|
page_wash(txn, di, mp, npages);
|
8224
8229
|
|
8225
8230
|
reclaim:
|
8226
|
-
DEBUG("reclaim %
|
8231
|
+
DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno);
|
8227
8232
|
rc = pnl_insert_range(&txn->tw.relist, pgno, npages);
|
8228
8233
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
8229
8234
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
@@ -8330,7 +8335,7 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
|
|
8330
8335
|
osal_flush_incoherent_mmap(env->me_map + offset, bytes, env->me_os_psize);
|
8331
8336
|
const MDBX_page *const rp = (const MDBX_page *)(env->me_map + offset);
|
8332
8337
|
/* check with timeout as the workaround
|
8333
|
-
* for https://
|
8338
|
+
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
8334
8339
|
if (unlikely(memcmp(wp, rp, bytes))) {
|
8335
8340
|
ctx->coherency_timestamp = 0;
|
8336
8341
|
WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->mp_pgno,
|
@@ -8351,11 +8356,12 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
|
|
8351
8356
|
do {
|
8352
8357
|
eASSERT(env, wp->mp_pgno == bytes2pgno(env, offset));
|
8353
8358
|
eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0);
|
8354
|
-
|
8359
|
+
size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
|
8355
8360
|
size_t chunk = pgno2bytes(env, npages);
|
8356
8361
|
eASSERT(env, bytes >= chunk);
|
8362
|
+
MDBX_page *next = (MDBX_page *)((char *)wp + chunk);
|
8357
8363
|
dpage_free(env, wp, npages);
|
8358
|
-
wp =
|
8364
|
+
wp = next;
|
8359
8365
|
offset += chunk;
|
8360
8366
|
bytes -= chunk;
|
8361
8367
|
} while (bytes);
|
@@ -8384,7 +8390,7 @@ __must_check_result static int iov_write(iov_ctx_t *ctx) {
|
|
8384
8390
|
}
|
8385
8391
|
|
8386
8392
|
__must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
|
8387
|
-
MDBX_page *dp,
|
8393
|
+
MDBX_page *dp, size_t npages) {
|
8388
8394
|
MDBX_env *const env = txn->mt_env;
|
8389
8395
|
tASSERT(txn, ctx->err == MDBX_SUCCESS);
|
8390
8396
|
tASSERT(txn, dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno);
|
@@ -8428,16 +8434,16 @@ __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
|
|
8428
8434
|
#if MDBX_NEED_WRITTEN_RANGE
|
8429
8435
|
ctx->flush_begin =
|
8430
8436
|
(ctx->flush_begin < dp->mp_pgno) ? ctx->flush_begin : dp->mp_pgno;
|
8431
|
-
ctx->flush_end = (ctx->flush_end > dp->mp_pgno + npages)
|
8437
|
+
ctx->flush_end = (ctx->flush_end > dp->mp_pgno + (pgno_t)npages)
|
8432
8438
|
? ctx->flush_end
|
8433
|
-
: dp->mp_pgno + npages;
|
8439
|
+
: dp->mp_pgno + (pgno_t)npages;
|
8434
8440
|
#endif /* MDBX_NEED_WRITTEN_RANGE */
|
8435
8441
|
env->me_lck->mti_unsynced_pages.weak += npages;
|
8436
8442
|
return MDBX_SUCCESS;
|
8437
8443
|
}
|
8438
8444
|
|
8439
8445
|
static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
8440
|
-
const
|
8446
|
+
const size_t npages) {
|
8441
8447
|
tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP) || MDBX_AVOID_MSYNC);
|
8442
8448
|
#if MDBX_ENABLE_PGOP_STAT
|
8443
8449
|
txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages;
|
@@ -8446,7 +8452,7 @@ static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
|
8446
8452
|
int err = iov_page(txn, ctx, dp, npages);
|
8447
8453
|
if (likely(err == MDBX_SUCCESS) &&
|
8448
8454
|
(!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)))
|
8449
|
-
err = pnl_append_range(true, &txn->tw.
|
8455
|
+
err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages);
|
8450
8456
|
return err;
|
8451
8457
|
}
|
8452
8458
|
|
@@ -8496,16 +8502,16 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8496
8502
|
const uint32_t reciprocal) {
|
8497
8503
|
MDBX_dpl *const dl = txn->tw.dirtylist;
|
8498
8504
|
const uint32_t age = dpl_age(txn, i);
|
8499
|
-
const
|
8505
|
+
const size_t npages = dpl_npages(dl, i);
|
8500
8506
|
const pgno_t pgno = dl->items[i].pgno;
|
8501
8507
|
if (age == 0) {
|
8502
|
-
DEBUG("skip %s %
|
8508
|
+
DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno);
|
8503
8509
|
return 256;
|
8504
8510
|
}
|
8505
8511
|
|
8506
8512
|
MDBX_page *const dp = dl->items[i].ptr;
|
8507
8513
|
if (dp->mp_flags & (P_LOOSE | P_SPILLED)) {
|
8508
|
-
DEBUG("skip %s %
|
8514
|
+
DEBUG("skip %s %zu page %" PRIaPGNO,
|
8509
8515
|
(dp->mp_flags & P_LOOSE) ? "loose"
|
8510
8516
|
: (dp->mp_flags & P_LOOSE) ? "loose"
|
8511
8517
|
: "parent-spilled",
|
@@ -8519,7 +8525,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8519
8525
|
if (parent && (parent->mt_flags & MDBX_TXN_SPILLS)) {
|
8520
8526
|
do
|
8521
8527
|
if (intersect_spilled(parent, pgno, npages)) {
|
8522
|
-
DEBUG("skip-2 parent-spilled %
|
8528
|
+
DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno);
|
8523
8529
|
dp->mp_flags |= P_SPILLED;
|
8524
8530
|
return 256;
|
8525
8531
|
}
|
@@ -8533,7 +8539,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8533
8539
|
return prio = 256 - prio;
|
8534
8540
|
|
8535
8541
|
/* make a large/overflow pages be likely to spill */
|
8536
|
-
|
8542
|
+
size_t factor = npages | npages >> 1;
|
8537
8543
|
factor |= factor >> 2;
|
8538
8544
|
factor |= factor >> 4;
|
8539
8545
|
factor |= factor >> 8;
|
@@ -8541,7 +8547,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8541
8547
|
factor = prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
|
8542
8548
|
factor = (factor < 256) ? 255 - factor : 0;
|
8543
8549
|
tASSERT(txn, factor < 256 && factor < (256 - prio));
|
8544
|
-
return prio = factor;
|
8550
|
+
return prio = (unsigned)factor;
|
8545
8551
|
}
|
8546
8552
|
|
8547
8553
|
/* Spill pages from the dirty list back to disk.
|
@@ -8645,7 +8651,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8645
8651
|
if (txn->mt_flags & MDBX_WRITEMAP) {
|
8646
8652
|
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
|
8647
8653
|
dirty_entries, dirty_npages);
|
8648
|
-
tASSERT(txn, txn->tw.
|
8654
|
+
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
8649
8655
|
const MDBX_env *env = txn->mt_env;
|
8650
8656
|
rc =
|
8651
8657
|
osal_msync(&txn->mt_env->me_dxb_mmap, 0,
|
@@ -8669,10 +8675,10 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8669
8675
|
tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
|
8670
8676
|
need_spill_npages);
|
8671
8677
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
8672
|
-
if (!txn->tw.
|
8673
|
-
txn->tw.
|
8674
|
-
txn->tw.
|
8675
|
-
if (unlikely(!txn->tw.
|
8678
|
+
if (!txn->tw.spilled.list) {
|
8679
|
+
txn->tw.spilled.least_removed = INT_MAX;
|
8680
|
+
txn->tw.spilled.list = pnl_alloc(need_spill);
|
8681
|
+
if (unlikely(!txn->tw.spilled.list)) {
|
8676
8682
|
rc = MDBX_ENOMEM;
|
8677
8683
|
bailout:
|
8678
8684
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
@@ -8681,7 +8687,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8681
8687
|
} else {
|
8682
8688
|
/* purge deleted slots */
|
8683
8689
|
spill_purge(txn);
|
8684
|
-
rc = pnl_reserve(&txn->tw.
|
8690
|
+
rc = pnl_reserve(&txn->tw.spilled.list, need_spill);
|
8685
8691
|
(void)rc /* ignore since the resulting list may be shorter
|
8686
8692
|
and pnl_append() will increase pnl on demand */
|
8687
8693
|
;
|
@@ -8865,7 +8871,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8865
8871
|
goto bailout;
|
8866
8872
|
|
8867
8873
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
8868
|
-
pnl_sort(txn->tw.
|
8874
|
+
pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1);
|
8869
8875
|
txn->mt_flags |= MDBX_TXN_SPILLS;
|
8870
8876
|
}
|
8871
8877
|
NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
|
@@ -9279,6 +9285,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) {
|
|
9279
9285
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
9280
9286
|
if (unlikely(lck == NULL /* exclusive without-lck mode */)) {
|
9281
9287
|
eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub);
|
9288
|
+
env->me_lck->mti_readers_refresh_flag.weak = nothing_changed;
|
9282
9289
|
return env->me_lck->mti_oldest_reader.weak = steady;
|
9283
9290
|
}
|
9284
9291
|
|
@@ -9367,10 +9374,13 @@ __cold static pgno_t find_largest_snapshot(const MDBX_env *env,
|
|
9367
9374
|
|
9368
9375
|
/* Add a page to the txn's dirty list */
|
9369
9376
|
__hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
9370
|
-
|
9377
|
+
size_t npages) {
|
9371
9378
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
9379
|
+
mp->mp_txnid = txn->mt_front;
|
9372
9380
|
if (!txn->tw.dirtylist) {
|
9373
9381
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
9382
|
+
txn->tw.writemap_dirty_npages += npages;
|
9383
|
+
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
9374
9384
|
return MDBX_SUCCESS;
|
9375
9385
|
}
|
9376
9386
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
@@ -9383,7 +9393,6 @@ __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
|
9383
9393
|
#endif /* xMDBX_DEBUG_SPILLING == 2 */
|
9384
9394
|
|
9385
9395
|
int rc;
|
9386
|
-
mp->mp_txnid = txn->mt_front;
|
9387
9396
|
if (unlikely(txn->tw.dirtyroom == 0)) {
|
9388
9397
|
if (txn->tw.loose_count) {
|
9389
9398
|
MDBX_page *loose = txn->tw.loose_pages;
|
@@ -10093,6 +10102,8 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) {
|
|
10093
10102
|
}
|
10094
10103
|
#endif /* _MSC_VER */
|
10095
10104
|
|
10105
|
+
#if !MDBX_PNL_ASCENDING
|
10106
|
+
|
10096
10107
|
#if !defined(MDBX_ATTRIBUTE_TARGET) && \
|
10097
10108
|
(__has_attribute(__target__) || __GNUC_PREREQ(5, 0))
|
10098
10109
|
#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
|
@@ -10406,6 +10417,8 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len,
|
|
10406
10417
|
/* Choosing of another variants should be added here. */
|
10407
10418
|
#endif /* scan4seq_default */
|
10408
10419
|
|
10420
|
+
#endif /* MDBX_PNL_ASCENDING */
|
10421
|
+
|
10409
10422
|
#ifndef scan4seq_default
|
10410
10423
|
#define scan4seq_default scan4seq_fallback
|
10411
10424
|
#endif /* scan4seq_default */
|
@@ -10469,45 +10482,39 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len,
|
|
10469
10482
|
*
|
10470
10483
|
* Returns 0 on success, non-zero on failure.*/
|
10471
10484
|
|
10472
|
-
#define
|
10473
|
-
#define
|
10474
|
-
#define
|
10475
|
-
#define
|
10476
|
-
#define
|
10477
|
-
#define
|
10478
|
-
#define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
10479
|
-
#define MDBX_ALLOC_LIFO 128
|
10485
|
+
#define MDBX_ALLOC_DEFAULT 0
|
10486
|
+
#define MDBX_ALLOC_RESERVE 1
|
10487
|
+
#define MDBX_ALLOC_UNIMPORTANT 2
|
10488
|
+
#define MDBX_ALLOC_COALESCE 4 /* внутреннее состояние */
|
10489
|
+
#define MDBX_ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
|
10490
|
+
#define MDBX_ALLOC_LIFO 16 /* внутреннее состояние */
|
10480
10491
|
|
10481
|
-
static __inline bool is_gc_usable(
|
10492
|
+
static __inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc,
|
10493
|
+
const uint8_t flags) {
|
10482
10494
|
/* If txn is updating the GC, then the retired-list cannot play catch-up with
|
10483
10495
|
* itself by growing while trying to save it. */
|
10484
|
-
if (
|
10496
|
+
if (mc->mc_dbi == FREE_DBI && !(flags & MDBX_ALLOC_RESERVE) &&
|
10497
|
+
!(mc->mc_flags & C_GCU))
|
10485
10498
|
return false;
|
10486
10499
|
|
10487
10500
|
/* avoid (recursive) search inside empty tree and while tree is
|
10488
|
-
updating, https://
|
10501
|
+
updating, https://libmdbx.dqdkfa.ru/dead-github/issues/31 */
|
10489
10502
|
if (txn->mt_dbs[FREE_DBI].md_entries == 0)
|
10490
10503
|
return false;
|
10491
10504
|
|
10492
|
-
/* If our dirty list is already full, we can't touch GC */
|
10493
|
-
if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
|
10494
|
-
!(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY))
|
10495
|
-
return false;
|
10496
|
-
|
10497
10505
|
return true;
|
10498
10506
|
}
|
10499
10507
|
|
10500
|
-
static
|
10501
|
-
|
10502
|
-
|
10503
|
-
|
10504
|
-
|
10505
|
-
|
10506
|
-
return cursor_init(mc, txn, FREE_DBI);
|
10508
|
+
__hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) {
|
10509
|
+
const size_t len = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed);
|
10510
|
+
for (size_t i = 1; i <= len; ++i)
|
10511
|
+
if (txn->tw.lifo_reclaimed[i] == id)
|
10512
|
+
return true;
|
10513
|
+
return false;
|
10507
10514
|
}
|
10508
10515
|
|
10509
10516
|
static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
10510
|
-
|
10517
|
+
uint8_t flags) {
|
10511
10518
|
#if MDBX_ENABLE_PROFGC
|
10512
10519
|
const uint64_t monotime_before = osal_monotime();
|
10513
10520
|
size_t majflt_before;
|
@@ -10525,21 +10532,13 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|
10525
10532
|
prof->spe_counter += 1;
|
10526
10533
|
#endif /* MDBX_ENABLE_PROFGC */
|
10527
10534
|
|
10528
|
-
eASSERT(env, num
|
10529
|
-
eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
|
10530
|
-
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
|
10531
|
-
MDBX_ALLOC_BACKLOG)) == 0 ||
|
10532
|
-
(flags & MDBX_ALLOC_GC));
|
10533
|
-
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
|
10534
|
-
MDBX_ALLOC_BACKLOG)) == 0 ||
|
10535
|
-
(flags & MDBX_ALLOC_NEW) == 0);
|
10535
|
+
eASSERT(env, num > 0 || (flags & MDBX_ALLOC_RESERVE));
|
10536
10536
|
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
10537
10537
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
10538
10538
|
|
10539
10539
|
pgno_t pgno = 0, *range = nullptr;
|
10540
|
-
size_t re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
10540
|
+
size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
10541
10541
|
if (num > 1) {
|
10542
|
-
eASSERT(env, !(flags & MDBX_ALLOC_SLOT));
|
10543
10542
|
#if MDBX_ENABLE_PROFGC
|
10544
10543
|
prof->xpages += 1;
|
10545
10544
|
#endif /* MDBX_ENABLE_PROFGC */
|
@@ -10555,347 +10554,363 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|
10555
10554
|
}
|
10556
10555
|
}
|
10557
10556
|
} else {
|
10558
|
-
eASSERT(env,
|
10559
|
-
MDBX_PNL_GETSIZE(txn->tw.relist) == 0);
|
10557
|
+
eASSERT(env, num == 0 || re_len == 0);
|
10560
10558
|
}
|
10561
10559
|
|
10562
10560
|
//---------------------------------------------------------------------------
|
10563
10561
|
|
10564
|
-
if (
|
10565
|
-
|
10566
|
-
goto no_gc;
|
10562
|
+
if (unlikely(!is_gc_usable(txn, mc, flags)))
|
10563
|
+
goto no_gc;
|
10567
10564
|
|
10568
|
-
|
10569
|
-
|
10565
|
+
eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
|
10566
|
+
MDBX_ALLOC_SHOULD_SCAN)) == 0);
|
10567
|
+
flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
|
10570
10568
|
|
10571
|
-
|
10569
|
+
if (/* Не коагулируем записи при подготовке резерва для обновления GC.
|
10570
|
+
* Иначе попытка увеличить резерв может приводить к необходимости ещё
|
10571
|
+
* большего резерва из-за увеличения списка переработанных страниц. */
|
10572
|
+
(flags & MDBX_ALLOC_RESERVE) == 0) {
|
10572
10573
|
if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
|
10573
|
-
|
10574
|
+
re_len < env->me_maxgc_ov1page / 2)
|
10574
10575
|
flags += MDBX_ALLOC_COALESCE;
|
10576
|
+
}
|
10575
10577
|
|
10576
|
-
|
10577
|
-
|
10578
|
-
|
10579
|
-
|
10578
|
+
MDBX_cursor *const gc =
|
10579
|
+
(MDBX_cursor *)((char *)env->me_txn0 + sizeof(MDBX_txn));
|
10580
|
+
gc->mc_txn = txn;
|
10581
|
+
gc->mc_flags = 0;
|
10580
10582
|
|
10581
|
-
|
10582
|
-
|
10583
|
-
|
10584
|
-
|
10585
|
-
|
10586
|
-
|
10587
|
-
|
10588
|
-
|
10589
|
-
|
10590
|
-
|
10591
|
-
|
10592
|
-
goto fail;
|
10593
|
-
}
|
10594
|
-
const txnid_t detent = oldest + 1;
|
10583
|
+
retry_gc_refresh_oldest:;
|
10584
|
+
txnid_t oldest = txn_oldest_reader(txn);
|
10585
|
+
retry_gc_have_oldest:
|
10586
|
+
if (unlikely(oldest >= txn->mt_txnid)) {
|
10587
|
+
ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
|
10588
|
+
" for current-txnid %" PRIaTXN,
|
10589
|
+
oldest, txn->mt_txnid);
|
10590
|
+
ret.err = MDBX_PROBLEM;
|
10591
|
+
goto fail;
|
10592
|
+
}
|
10593
|
+
const txnid_t detent = oldest + 1;
|
10595
10594
|
|
10596
|
-
|
10597
|
-
|
10598
|
-
|
10599
|
-
if (
|
10600
|
-
|
10601
|
-
|
10602
|
-
|
10603
|
-
|
10604
|
-
goto fail;
|
10605
|
-
}
|
10595
|
+
txnid_t id = 0;
|
10596
|
+
MDBX_cursor_op op = MDBX_FIRST;
|
10597
|
+
if (flags & MDBX_ALLOC_LIFO) {
|
10598
|
+
if (!txn->tw.lifo_reclaimed) {
|
10599
|
+
txn->tw.lifo_reclaimed = txl_alloc();
|
10600
|
+
if (unlikely(!txn->tw.lifo_reclaimed)) {
|
10601
|
+
ret.err = MDBX_ENOMEM;
|
10602
|
+
goto fail;
|
10606
10603
|
}
|
10607
|
-
/* Begin lookup backward from oldest reader */
|
10608
|
-
last = detent - 1;
|
10609
|
-
op = MDBX_SET_RANGE;
|
10610
|
-
} else if (txn->tw.last_reclaimed) {
|
10611
|
-
/* Continue lookup forward from last-reclaimed */
|
10612
|
-
last = txn->tw.last_reclaimed + 1;
|
10613
|
-
if (last >= detent)
|
10614
|
-
goto no_gc;
|
10615
|
-
op = MDBX_SET_RANGE;
|
10616
10604
|
}
|
10605
|
+
/* Begin lookup backward from oldest reader */
|
10606
|
+
id = detent - 1;
|
10607
|
+
op = MDBX_SET_RANGE;
|
10608
|
+
} else if (txn->tw.last_reclaimed) {
|
10609
|
+
/* Continue lookup forward from last-reclaimed */
|
10610
|
+
id = txn->tw.last_reclaimed + 1;
|
10611
|
+
if (id >= detent)
|
10612
|
+
goto depleted_gc;
|
10613
|
+
op = MDBX_SET_RANGE;
|
10614
|
+
}
|
10617
10615
|
|
10618
|
-
|
10619
|
-
|
10620
|
-
|
10621
|
-
|
10616
|
+
next_gc:;
|
10617
|
+
MDBX_val key;
|
10618
|
+
key.iov_base = &id;
|
10619
|
+
key.iov_len = sizeof(id);
|
10622
10620
|
|
10623
10621
|
#if MDBX_ENABLE_PROFGC
|
10624
|
-
|
10622
|
+
prof->rsteps += 1;
|
10625
10623
|
#endif /* MDBX_ENABLE_PROFGC */
|
10626
10624
|
|
10627
|
-
|
10628
|
-
|
10629
|
-
|
10630
|
-
|
10631
|
-
goto fail;
|
10632
|
-
if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
|
10633
|
-
op = MDBX_PREV;
|
10634
|
-
goto next_gc;
|
10635
|
-
}
|
10636
|
-
goto depleted_gc;
|
10637
|
-
}
|
10638
|
-
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
10639
|
-
ret.err = MDBX_CORRUPTED;
|
10625
|
+
/* Seek first/next GC record */
|
10626
|
+
ret.err = mdbx_cursor_get(gc, &key, NULL, op);
|
10627
|
+
if (unlikely(ret.err != MDBX_SUCCESS)) {
|
10628
|
+
if (unlikely(ret.err != MDBX_NOTFOUND))
|
10640
10629
|
goto fail;
|
10641
|
-
|
10642
|
-
last = unaligned_peek_u64(4, key.iov_base);
|
10643
|
-
if (flags & MDBX_ALLOC_LIFO) {
|
10630
|
+
if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
|
10644
10631
|
op = MDBX_PREV;
|
10645
|
-
|
10646
|
-
goto next_gc;
|
10647
|
-
/* skip IDs of records that already reclaimed */
|
10648
|
-
for (size_t i = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); i > 0; --i)
|
10649
|
-
if (txn->tw.lifo_reclaimed[i] == last)
|
10650
|
-
goto next_gc;
|
10651
|
-
} else {
|
10652
|
-
op = MDBX_NEXT;
|
10653
|
-
if (unlikely(last >= detent))
|
10654
|
-
goto depleted_gc;
|
10632
|
+
goto next_gc;
|
10655
10633
|
}
|
10634
|
+
goto depleted_gc;
|
10635
|
+
}
|
10636
|
+
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
10637
|
+
ret.err = MDBX_CORRUPTED;
|
10638
|
+
goto fail;
|
10639
|
+
}
|
10640
|
+
id = unaligned_peek_u64(4, key.iov_base);
|
10641
|
+
if (flags & MDBX_ALLOC_LIFO) {
|
10642
|
+
op = MDBX_PREV;
|
10643
|
+
if (id >= detent || is_already_reclaimed(txn, id))
|
10644
|
+
goto next_gc;
|
10645
|
+
} else {
|
10646
|
+
op = MDBX_NEXT;
|
10647
|
+
if (unlikely(id >= detent))
|
10648
|
+
goto depleted_gc;
|
10649
|
+
}
|
10656
10650
|
|
10657
|
-
|
10658
|
-
|
10659
|
-
|
10660
|
-
|
10661
|
-
|
10662
|
-
|
10663
|
-
goto fail;
|
10651
|
+
/* Reading next GC record */
|
10652
|
+
MDBX_val data;
|
10653
|
+
MDBX_page *const mp = gc->mc_pg[gc->mc_top];
|
10654
|
+
if (unlikely((ret.err = node_read(gc, page_node(mp, gc->mc_ki[gc->mc_top]),
|
10655
|
+
&data, mp)) != MDBX_SUCCESS))
|
10656
|
+
goto fail;
|
10664
10657
|
|
10665
|
-
|
10666
|
-
|
10667
|
-
|
10668
|
-
|
10669
|
-
|
10670
|
-
|
10671
|
-
|
10658
|
+
pgno_t *gc_pnl = (pgno_t *)data.iov_base;
|
10659
|
+
if (unlikely(data.iov_len % sizeof(pgno_t) ||
|
10660
|
+
data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
|
10661
|
+
!pnl_check(gc_pnl, txn->mt_next_pgno))) {
|
10662
|
+
ret.err = MDBX_CORRUPTED;
|
10663
|
+
goto fail;
|
10664
|
+
}
|
10665
|
+
|
10666
|
+
const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
|
10667
|
+
TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len,
|
10668
|
+
gc_len + re_len);
|
10669
|
+
|
10670
|
+
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
10671
|
+
if (unlikely(gc_len + re_len >= env->me_maxgc_ov1page)) {
|
10672
|
+
/* Don't try to coalesce too much. */
|
10673
|
+
if (flags & MDBX_ALLOC_SHOULD_SCAN) {
|
10674
|
+
eASSERT(env, flags & MDBX_ALLOC_COALESCE);
|
10675
|
+
eASSERT(env, num > 0);
|
10676
|
+
#if MDBX_ENABLE_PROFGC
|
10677
|
+
env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
|
10678
|
+
#endif /* MDBX_ENABLE_PROFGC */
|
10679
|
+
TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
|
10680
|
+
if (re_len >= num) {
|
10681
|
+
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
10682
|
+
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
10683
|
+
range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
|
10684
|
+
pgno = *range;
|
10685
|
+
if (num == 1)
|
10686
|
+
goto done;
|
10687
|
+
range = scan4seq(range, re_len, num - 1);
|
10688
|
+
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
10689
|
+
if (likely(range)) {
|
10690
|
+
pgno = *range;
|
10691
|
+
goto done;
|
10692
|
+
}
|
10693
|
+
}
|
10694
|
+
flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN;
|
10672
10695
|
}
|
10673
|
-
|
10674
|
-
|
10675
|
-
|
10676
|
-
((/* not a slot-request from gc-update */
|
10677
|
-
(flags & MDBX_ALLOC_SLOT) == 0 &&
|
10696
|
+
if (unlikely(/* list is too long already */ re_len >=
|
10697
|
+
env->me_options.rp_augment_limit) &&
|
10698
|
+
((/* not a slot-request from gc-update */ num &&
|
10678
10699
|
/* have enough unallocated space */ txn->mt_geo.upper >=
|
10679
10700
|
txn->mt_next_pgno + num) ||
|
10680
|
-
gc_len +
|
10701
|
+
gc_len + re_len >= MDBX_PGL_LIMIT)) {
|
10681
10702
|
/* Stop reclaiming to avoid large/overflow the page list.
|
10682
10703
|
* This is a rare case while search for a continuously multi-page region
|
10683
10704
|
* in a large database.
|
10684
|
-
* https://
|
10705
|
+
* https://libmdbx.dqdkfa.ru/dead-github/issues/123
|
10706
|
+
*/
|
10685
10707
|
NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
|
10686
10708
|
"(chunk) -> %zu",
|
10687
|
-
|
10688
|
-
gc_len + MDBX_PNL_GETSIZE(txn->tw.relist));
|
10709
|
+
re_len, gc_len, gc_len + re_len);
|
10689
10710
|
goto depleted_gc;
|
10690
10711
|
}
|
10712
|
+
}
|
10691
10713
|
|
10692
|
-
|
10693
|
-
|
10694
|
-
|
10695
|
-
|
10696
|
-
if (unlikely(ret.err != MDBX_SUCCESS))
|
10697
|
-
goto fail;
|
10698
|
-
}
|
10699
|
-
|
10700
|
-
/* Append PNL from GC record to tw.relist */
|
10701
|
-
ret.err = pnl_need(&txn->tw.relist, gc_len);
|
10714
|
+
/* Remember ID of readed GC record */
|
10715
|
+
txn->tw.last_reclaimed = id;
|
10716
|
+
if (flags & MDBX_ALLOC_LIFO) {
|
10717
|
+
ret.err = txl_append(&txn->tw.lifo_reclaimed, id);
|
10702
10718
|
if (unlikely(ret.err != MDBX_SUCCESS))
|
10703
10719
|
goto fail;
|
10704
|
-
|
10720
|
+
}
|
10705
10721
|
|
10706
|
-
|
10707
|
-
|
10708
|
-
|
10709
|
-
|
10710
|
-
for (size_t i = gc_len; i; i--)
|
10711
|
-
DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
|
10712
|
-
DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
|
10713
|
-
}
|
10722
|
+
/* Append PNL from GC record to tw.relist */
|
10723
|
+
ret.err = pnl_need(&txn->tw.relist, gc_len);
|
10724
|
+
if (unlikely(ret.err != MDBX_SUCCESS))
|
10725
|
+
goto fail;
|
10714
10726
|
|
10715
|
-
|
10716
|
-
|
10717
|
-
|
10718
|
-
|
10719
|
-
|
10720
|
-
|
10721
|
-
|
10722
|
-
|
10723
|
-
|
10724
|
-
|
10727
|
+
if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
|
10728
|
+
DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
|
10729
|
+
" len %zu, PNL",
|
10730
|
+
id, txn->mt_dbs[FREE_DBI].md_root, gc_len);
|
10731
|
+
for (size_t i = gc_len; i; i--)
|
10732
|
+
DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
|
10733
|
+
DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
|
10734
|
+
}
|
10735
|
+
|
10736
|
+
/* Merge in descending sorted order */
|
10737
|
+
re_len = pnl_merge(txn->tw.relist, gc_pnl);
|
10738
|
+
flags |= MDBX_ALLOC_SHOULD_SCAN;
|
10739
|
+
if (AUDIT_ENABLED()) {
|
10740
|
+
if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
|
10741
|
+
ret.err = MDBX_CORRUPTED;
|
10742
|
+
goto fail;
|
10725
10743
|
}
|
10726
|
-
|
10744
|
+
} else {
|
10745
|
+
eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno));
|
10746
|
+
}
|
10747
|
+
eASSERT(env, dirtylist_check(txn));
|
10727
10748
|
|
10728
|
-
|
10729
|
-
|
10730
|
-
|
10731
|
-
|
10732
|
-
|
10733
|
-
|
10734
|
-
|
10735
|
-
|
10736
|
-
|
10737
|
-
|
10738
|
-
|
10749
|
+
eASSERT(env,
|
10750
|
+
re_len == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno);
|
10751
|
+
if (MDBX_ENABLE_REFUND && re_len &&
|
10752
|
+
unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
|
10753
|
+
/* Refund suitable pages into "unallocated" space */
|
10754
|
+
txn_refund(txn);
|
10755
|
+
re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
10756
|
+
}
|
10757
|
+
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
10758
|
+
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
10759
|
+
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
10739
10760
|
|
10740
|
-
|
10741
|
-
|
10742
|
-
|
10743
|
-
|
10744
|
-
|
10761
|
+
/* Done for a kick-reclaim mode, actually no page needed */
|
10762
|
+
if (unlikely(num == 0)) {
|
10763
|
+
eASSERT(env, ret.err == MDBX_SUCCESS);
|
10764
|
+
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
|
10765
|
+
re_len);
|
10766
|
+
goto early_exit;
|
10767
|
+
}
|
10745
10768
|
|
10746
|
-
|
10769
|
+
/* TODO: delete reclaimed records */
|
10747
10770
|
|
10748
|
-
|
10749
|
-
|
10750
|
-
|
10751
|
-
|
10752
|
-
|
10753
|
-
|
10754
|
-
#endif /* MDBX_ENABLE_PROFGC */
|
10755
|
-
goto next_gc;
|
10756
|
-
}
|
10757
|
-
TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
|
10758
|
-
flags &= ~MDBX_ALLOC_COALESCE;
|
10759
|
-
}
|
10771
|
+
eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
|
10772
|
+
if (flags & MDBX_ALLOC_COALESCE) {
|
10773
|
+
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id,
|
10774
|
+
re_len);
|
10775
|
+
goto next_gc;
|
10776
|
+
}
|
10760
10777
|
|
10761
|
-
|
10762
|
-
|
10763
|
-
|
10764
|
-
|
10765
|
-
|
10766
|
-
|
10778
|
+
scan:
|
10779
|
+
eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
|
10780
|
+
eASSERT(env, num > 0);
|
10781
|
+
if (re_len >= num) {
|
10782
|
+
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
10783
|
+
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
10784
|
+
range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
|
10785
|
+
pgno = *range;
|
10786
|
+
if (num == 1)
|
10787
|
+
goto done;
|
10788
|
+
range = scan4seq(range, re_len, num - 1);
|
10789
|
+
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
10790
|
+
if (likely(range)) {
|
10767
10791
|
pgno = *range;
|
10768
|
-
|
10769
|
-
goto done;
|
10770
|
-
range = scan4seq(range, re_len, num - 1);
|
10771
|
-
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
10772
|
-
if (likely(range)) {
|
10773
|
-
pgno = *range;
|
10774
|
-
goto done;
|
10775
|
-
}
|
10792
|
+
goto done;
|
10776
10793
|
}
|
10777
|
-
|
10778
|
-
|
10779
|
-
|
10794
|
+
}
|
10795
|
+
flags -= MDBX_ALLOC_SHOULD_SCAN;
|
10796
|
+
if (ret.err == MDBX_SUCCESS) {
|
10797
|
+
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id,
|
10798
|
+
re_len);
|
10799
|
+
goto next_gc;
|
10800
|
+
}
|
10780
10801
|
|
10781
|
-
|
10782
|
-
|
10783
|
-
|
10784
|
-
|
10802
|
+
depleted_gc:
|
10803
|
+
ret.err = MDBX_NOTFOUND;
|
10804
|
+
if (flags & MDBX_ALLOC_SHOULD_SCAN)
|
10805
|
+
goto scan;
|
10785
10806
|
|
10786
|
-
|
10807
|
+
//-------------------------------------------------------------------------
|
10808
|
+
|
10809
|
+
/* There is no suitable pages in the GC and to be able to allocate
|
10810
|
+
* we should CHOICE one of:
|
10811
|
+
* - make a new steady checkpoint if reclaiming was stopped by
|
10812
|
+
* the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
|
10813
|
+
* - kick lagging reader(s) if reclaiming was stopped by ones of it.
|
10814
|
+
* - extend the database file. */
|
10787
10815
|
|
10788
|
-
|
10789
|
-
|
10790
|
-
|
10791
|
-
|
10792
|
-
|
10793
|
-
|
10794
|
-
|
10795
|
-
|
10796
|
-
|
10797
|
-
|
10798
|
-
|
10799
|
-
|
10800
|
-
const
|
10801
|
-
|
10802
|
-
|
10803
|
-
|
10804
|
-
|
10805
|
-
|
10806
|
-
|
10807
|
-
|
10808
|
-
|
10809
|
-
|
10810
|
-
|
10811
|
-
|
10812
|
-
|
10813
|
-
|
10814
|
-
|
10815
|
-
|
10816
|
-
|
10817
|
-
*
|
10818
|
-
* - upper limit of database size is reached;
|
10819
|
-
* - database is full (with the current file size)
|
10820
|
-
* AND auto-sync threshold it NOT specified */
|
10821
|
-
if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
|
10822
|
-
((autosync_threshold | autosync_period) == 0 ||
|
10823
|
-
newnext >= prefer_steady.ptr_c->mm_geo.now)) {
|
10824
|
-
/* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
|
10825
|
-
* without any auto-sync threshold(s). */
|
10816
|
+
/* Will use new pages from the map if nothing is suitable in the GC. */
|
10817
|
+
newnext = (pgno = txn->mt_next_pgno) + num;
|
10818
|
+
|
10819
|
+
/* Does reclaiming stopped at the last steady point? */
|
10820
|
+
const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
|
10821
|
+
const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
|
10822
|
+
if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
|
10823
|
+
detent == prefer_steady.txnid + 1) {
|
10824
|
+
DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
|
10825
|
+
"-%s, detent %" PRIaTXN,
|
10826
|
+
recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid,
|
10827
|
+
durable_caption(prefer_steady.ptr_c), detent);
|
10828
|
+
const pgno_t autosync_threshold =
|
10829
|
+
atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed);
|
10830
|
+
const uint64_t autosync_period =
|
10831
|
+
atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed);
|
10832
|
+
uint64_t eoos_timestamp;
|
10833
|
+
/* wipe the last steady-point if one of:
|
10834
|
+
* - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
|
10835
|
+
* - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
|
10836
|
+
* otherwise, make a new steady-point if one of:
|
10837
|
+
* - auto-sync threshold is specified and reached;
|
10838
|
+
* - upper limit of database size is reached;
|
10839
|
+
* - database is full (with the current file size)
|
10840
|
+
* AND auto-sync threshold it NOT specified */
|
10841
|
+
if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
|
10842
|
+
((autosync_threshold | autosync_period) == 0 ||
|
10843
|
+
newnext >= prefer_steady.ptr_c->mm_geo.now)) {
|
10844
|
+
/* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
|
10845
|
+
* without any auto-sync threshold(s). */
|
10826
10846
|
#if MDBX_ENABLE_PROFGC
|
10827
|
-
|
10847
|
+
env->me_lck->mti_pgop_stat.gc_prof.wipes += 1;
|
10828
10848
|
#endif /* MDBX_ENABLE_PROFGC */
|
10829
|
-
|
10830
|
-
|
10831
|
-
|
10832
|
-
|
10833
|
-
|
10834
|
-
|
10835
|
-
|
10836
|
-
|
10837
|
-
|
10838
|
-
|
10839
|
-
|
10840
|
-
|
10841
|
-
|
10842
|
-
|
10843
|
-
|
10844
|
-
|
10845
|
-
|
10846
|
-
|
10847
|
-
|
10848
|
-
/* make steady checkpoint. */
|
10849
|
+
ret.err = wipe_steady(txn, detent);
|
10850
|
+
DEBUG("gc-wipe-steady, rc %d", ret.err);
|
10851
|
+
if (unlikely(ret.err != MDBX_SUCCESS))
|
10852
|
+
goto fail;
|
10853
|
+
eASSERT(env, prefer_steady.ptr_c !=
|
10854
|
+
meta_prefer_steady(env, &txn->tw.troika).ptr_c);
|
10855
|
+
goto retry_gc_refresh_oldest;
|
10856
|
+
}
|
10857
|
+
if ((autosync_threshold &&
|
10858
|
+
atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
|
10859
|
+
autosync_threshold) ||
|
10860
|
+
(autosync_period &&
|
10861
|
+
(eoos_timestamp =
|
10862
|
+
atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
|
10863
|
+
osal_monotime() - eoos_timestamp >= autosync_period) ||
|
10864
|
+
newnext >= txn->mt_geo.upper ||
|
10865
|
+
((num == 0 || newnext >= txn->mt_end_pgno) &&
|
10866
|
+
(autosync_threshold | autosync_period) == 0)) {
|
10867
|
+
/* make steady checkpoint. */
|
10849
10868
|
#if MDBX_ENABLE_PROFGC
|
10850
|
-
|
10869
|
+
env->me_lck->mti_pgop_stat.gc_prof.flushes += 1;
|
10851
10870
|
#endif /* MDBX_ENABLE_PROFGC */
|
10852
|
-
|
10853
|
-
|
10854
|
-
|
10855
|
-
|
10856
|
-
|
10857
|
-
|
10858
|
-
|
10859
|
-
|
10860
|
-
|
10861
|
-
|
10862
|
-
}
|
10871
|
+
MDBX_meta meta = *recent.ptr_c;
|
10872
|
+
ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta,
|
10873
|
+
&txn->tw.troika);
|
10874
|
+
DEBUG("gc-make-steady, rc %d", ret.err);
|
10875
|
+
eASSERT(env, ret.err != MDBX_RESULT_TRUE);
|
10876
|
+
if (unlikely(ret.err != MDBX_SUCCESS))
|
10877
|
+
goto fail;
|
10878
|
+
eASSERT(env, prefer_steady.ptr_c !=
|
10879
|
+
meta_prefer_steady(env, &txn->tw.troika).ptr_c);
|
10880
|
+
goto retry_gc_refresh_oldest;
|
10863
10881
|
}
|
10882
|
+
}
|
10864
10883
|
|
10865
|
-
|
10866
|
-
|
10867
|
-
|
10868
|
-
|
10869
|
-
|
10870
|
-
|
10871
|
-
goto retry_gc_have_oldest;
|
10872
|
-
}
|
10884
|
+
if (unlikely(true == atomic_load32(&env->me_lck->mti_readers_refresh_flag,
|
10885
|
+
mo_AcquireRelease))) {
|
10886
|
+
oldest = txn_oldest_reader(txn);
|
10887
|
+
if (oldest >= detent)
|
10888
|
+
goto retry_gc_have_oldest;
|
10889
|
+
}
|
10873
10890
|
|
10874
|
-
|
10875
|
-
|
10876
|
-
|
10877
|
-
|
10878
|
-
|
10879
|
-
|
10891
|
+
/* Avoid kick lagging reader(s) if is enough unallocated space
|
10892
|
+
* at the end of database file. */
|
10893
|
+
if (!(flags & MDBX_ALLOC_RESERVE) && newnext <= txn->mt_end_pgno) {
|
10894
|
+
eASSERT(env, range == nullptr);
|
10895
|
+
goto done;
|
10896
|
+
}
|
10880
10897
|
|
10881
|
-
|
10882
|
-
|
10883
|
-
|
10884
|
-
|
10885
|
-
}
|
10898
|
+
if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) {
|
10899
|
+
oldest = kick_longlived_readers(env, oldest);
|
10900
|
+
if (oldest >= detent)
|
10901
|
+
goto retry_gc_have_oldest;
|
10886
10902
|
}
|
10887
10903
|
|
10888
10904
|
//---------------------------------------------------------------------------
|
10889
10905
|
|
10890
10906
|
no_gc:
|
10891
|
-
if (
|
10907
|
+
if (flags & MDBX_ALLOC_RESERVE) {
|
10892
10908
|
ret.err = MDBX_NOTFOUND;
|
10893
10909
|
goto fail;
|
10894
10910
|
}
|
10895
10911
|
|
10896
10912
|
/* Will use new pages from the map if nothing is suitable in the GC. */
|
10897
|
-
pgno = txn->mt_next_pgno;
|
10898
|
-
const size_t newnext = num + pgno;
|
10913
|
+
newnext = (pgno = txn->mt_next_pgno) + num;
|
10899
10914
|
if (newnext <= txn->mt_end_pgno)
|
10900
10915
|
goto done;
|
10901
10916
|
|
@@ -10932,12 +10947,12 @@ no_gc:
|
|
10932
10947
|
|
10933
10948
|
done:
|
10934
10949
|
ret.err = MDBX_SUCCESS;
|
10935
|
-
if (likely((flags &
|
10950
|
+
if (likely((flags & MDBX_ALLOC_RESERVE) == 0)) {
|
10936
10951
|
ENSURE(env, pgno >= NUM_METAS);
|
10937
10952
|
if (range) {
|
10938
|
-
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
|
10939
10953
|
eASSERT(env, pgno == *range);
|
10940
10954
|
eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
|
10955
|
+
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
10941
10956
|
/* Cutoff allocated pages from tw.relist */
|
10942
10957
|
#if MDBX_PNL_ASCENDING
|
10943
10958
|
for (const pgno_t *const end = re_list + re_len - num; range <= end;
|
@@ -10951,7 +10966,6 @@ done:
|
|
10951
10966
|
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
10952
10967
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
10953
10968
|
} else {
|
10954
|
-
eASSERT(env, flags & MDBX_ALLOC_NEW);
|
10955
10969
|
pgno = txn->mt_next_pgno;
|
10956
10970
|
txn->mt_next_pgno += (pgno_t)num;
|
10957
10971
|
eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno);
|
@@ -10995,8 +11009,9 @@ done:
|
|
10995
11009
|
int level;
|
10996
11010
|
const char *what;
|
10997
11011
|
if (flags & MDBX_ALLOC_RESERVE) {
|
10998
|
-
level =
|
10999
|
-
|
11012
|
+
level =
|
11013
|
+
(flags & MDBX_ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
|
11014
|
+
what = num ? "reserve-pages" : "fetch-slot";
|
11000
11015
|
} else {
|
11001
11016
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
11002
11017
|
level = MDBX_LOG_ERROR;
|
@@ -11011,7 +11026,7 @@ done:
|
|
11011
11026
|
} else {
|
11012
11027
|
early_exit:
|
11013
11028
|
DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num,
|
11014
|
-
|
11029
|
+
num ? "RESERVE" : "SLOT", ret.err);
|
11015
11030
|
ret.page = NULL;
|
11016
11031
|
}
|
11017
11032
|
|
@@ -11057,84 +11072,103 @@ __hot static pgr_t page_alloc(const MDBX_cursor *mc) {
|
|
11057
11072
|
return ret;
|
11058
11073
|
}
|
11059
11074
|
|
11060
|
-
|
11061
|
-
|
11062
|
-
|
11063
|
-
|
11064
|
-
MDBX_env *const env = txn->mt_env;
|
11075
|
+
MDBX_PNL pnl = txn->tw.relist;
|
11076
|
+
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
11077
|
+
if (likely(len > 0)) {
|
11078
|
+
MDBX_env *const env = txn->mt_env;
|
11065
11079
|
|
11066
|
-
|
11080
|
+
MDBX_PNL_SETSIZE(pnl, len - 1);
|
11067
11081
|
#if MDBX_PNL_ASCENDING
|
11068
|
-
|
11069
|
-
|
11070
|
-
|
11082
|
+
const pgno_t pgno = pnl[1];
|
11083
|
+
for (size_t i = 1; i < len; ++i)
|
11084
|
+
pnl[i] = pnl[i + 1];
|
11071
11085
|
#else
|
11072
|
-
|
11086
|
+
const pgno_t pgno = pnl[len];
|
11073
11087
|
#endif
|
11074
11088
|
|
11075
11089
|
#if MDBX_ENABLE_PROFGC
|
11076
|
-
|
11077
|
-
|
11078
|
-
|
11079
|
-
|
11080
|
-
|
11081
|
-
|
11090
|
+
const uint64_t monotime_before = osal_monotime();
|
11091
|
+
size_t majflt_before;
|
11092
|
+
const uint64_t cputime_before = osal_cputime(&majflt_before);
|
11093
|
+
profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI)
|
11094
|
+
? &env->me_lck->mti_pgop_stat.gc_prof.self
|
11095
|
+
: &env->me_lck->mti_pgop_stat.gc_prof.work;
|
11082
11096
|
#endif /* MDBX_ENABLE_PROFGC */
|
11083
|
-
|
11084
|
-
|
11085
|
-
|
11086
|
-
|
11087
|
-
|
11088
|
-
|
11089
|
-
|
11090
|
-
|
11091
|
-
|
11092
|
-
}
|
11097
|
+
pgr_t ret;
|
11098
|
+
if (env->me_flags & MDBX_WRITEMAP) {
|
11099
|
+
ret.page = pgno2page(env, pgno);
|
11100
|
+
MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
|
11101
|
+
} else {
|
11102
|
+
ret.page = page_malloc(txn, 1);
|
11103
|
+
if (unlikely(!ret.page)) {
|
11104
|
+
ret.err = MDBX_ENOMEM;
|
11105
|
+
goto bailout;
|
11093
11106
|
}
|
11107
|
+
}
|
11094
11108
|
|
11095
|
-
|
11096
|
-
|
11097
|
-
|
11098
|
-
|
11099
|
-
|
11109
|
+
VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
|
11110
|
+
ret.page->mp_pgno = pgno;
|
11111
|
+
ret.page->mp_leaf2_ksize = 0;
|
11112
|
+
ret.page->mp_flags = 0;
|
11113
|
+
tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
|
11100
11114
|
|
11101
|
-
|
11102
|
-
|
11103
|
-
|
11104
|
-
|
11115
|
+
ret.err = page_dirty(txn, ret.page, 1);
|
11116
|
+
bailout:
|
11117
|
+
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
11118
|
+
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
11105
11119
|
#if MDBX_ENABLE_PROFGC
|
11106
|
-
|
11107
|
-
|
11108
|
-
|
11109
|
-
|
11120
|
+
size_t majflt_after;
|
11121
|
+
prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
|
11122
|
+
prof->majflt += majflt_after - majflt_before;
|
11123
|
+
prof->xtime_monotonic += osal_monotime() - monotime_before;
|
11110
11124
|
#endif /* MDBX_ENABLE_PROFGC */
|
11111
|
-
|
11112
|
-
}
|
11125
|
+
return ret;
|
11113
11126
|
}
|
11114
11127
|
|
11115
|
-
return page_alloc_slowpath(mc, 1,
|
11128
|
+
return page_alloc_slowpath(mc, 1, MDBX_ALLOC_DEFAULT);
|
11116
11129
|
}
|
11117
11130
|
|
11118
|
-
/* Copy the used portions of a
|
11119
|
-
__hot static void page_copy(MDBX_page *dst, const MDBX_page *src,
|
11120
|
-
size_t
|
11131
|
+
/* Copy the used portions of a page. */
|
11132
|
+
__hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src,
|
11133
|
+
const size_t size) {
|
11121
11134
|
STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ);
|
11122
11135
|
STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4);
|
11136
|
+
char *copy_dst = (void *)dst;
|
11137
|
+
const char *copy_src = (const void *)src;
|
11138
|
+
size_t copy_len = size;
|
11139
|
+
if (src->mp_flags & P_LEAF2) {
|
11140
|
+
copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src);
|
11141
|
+
if (unlikely(copy_len > size))
|
11142
|
+
goto bailout;
|
11143
|
+
}
|
11123
11144
|
if ((src->mp_flags & (P_LEAF2 | P_OVERFLOW)) == 0) {
|
11124
|
-
size_t upper = src->mp_upper, lower = src->mp_lower
|
11125
|
-
|
11145
|
+
size_t upper = src->mp_upper, lower = src->mp_lower;
|
11146
|
+
intptr_t unused = upper - lower;
|
11126
11147
|
/* If page isn't full, just copy the used portion. Adjust
|
11127
11148
|
* alignment so memcpy may copy words instead of bytes. */
|
11128
|
-
if (unused
|
11149
|
+
if (unused > MDBX_CACHELINE_SIZE * 3) {
|
11129
11150
|
lower = ceil_powerof2(lower + PAGEHDRSZ, sizeof(void *));
|
11130
11151
|
upper = floor_powerof2(upper + PAGEHDRSZ, sizeof(void *));
|
11131
|
-
|
11132
|
-
|
11133
|
-
|
11134
|
-
|
11152
|
+
if (unlikely(upper > copy_len))
|
11153
|
+
goto bailout;
|
11154
|
+
memcpy(copy_dst, copy_src, lower);
|
11155
|
+
copy_dst += upper;
|
11156
|
+
copy_src += upper;
|
11157
|
+
copy_len -= upper;
|
11135
11158
|
}
|
11136
11159
|
}
|
11137
|
-
memcpy(
|
11160
|
+
memcpy(copy_dst, copy_src, copy_len);
|
11161
|
+
return;
|
11162
|
+
|
11163
|
+
bailout:
|
11164
|
+
if (src->mp_flags & P_LEAF2)
|
11165
|
+
bad_page(src, "%s addr %p, n-keys %zu, ksize %u",
|
11166
|
+
"invalid/corrupted source page", __Wpedantic_format_voidptr(src),
|
11167
|
+
page_numkeys(src), src->mp_leaf2_ksize);
|
11168
|
+
else
|
11169
|
+
bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page",
|
11170
|
+
__Wpedantic_format_voidptr(src), src->mp_upper);
|
11171
|
+
memset(dst, -1, size);
|
11138
11172
|
}
|
11139
11173
|
|
11140
11174
|
/* Pull a page off the txn's spill list, if present.
|
@@ -11541,7 +11575,9 @@ __cold int mdbx_env_sync_poll(MDBX_env *env) {
|
|
11541
11575
|
|
11542
11576
|
/* Back up parent txn's cursors, then grab the originals for tracking */
|
11543
11577
|
static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
|
11544
|
-
|
11578
|
+
tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr);
|
11579
|
+
nested->mt_cursors[FREE_DBI] = nullptr;
|
11580
|
+
for (int i = parent->mt_numdbs; --i > FREE_DBI;) {
|
11545
11581
|
nested->mt_cursors[i] = NULL;
|
11546
11582
|
MDBX_cursor *mc = parent->mt_cursors[i];
|
11547
11583
|
if (mc != NULL) {
|
@@ -11586,7 +11622,8 @@ static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
|
|
11586
11622
|
*
|
11587
11623
|
* Returns 0 on success, non-zero on failure. */
|
11588
11624
|
static void cursors_eot(MDBX_txn *txn, const bool merge) {
|
11589
|
-
|
11625
|
+
tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr);
|
11626
|
+
for (intptr_t i = txn->mt_numdbs; --i > FREE_DBI;) {
|
11590
11627
|
MDBX_cursor *next, *mc = txn->mt_cursors[i];
|
11591
11628
|
if (!mc)
|
11592
11629
|
continue;
|
@@ -11856,7 +11893,7 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) {
|
|
11856
11893
|
return MDBX_SUCCESS;
|
11857
11894
|
}
|
11858
11895
|
|
11859
|
-
/* check against https://
|
11896
|
+
/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
11860
11897
|
static bool coherency_check(const MDBX_env *env, const txnid_t txnid,
|
11861
11898
|
const volatile MDBX_db *dbs,
|
11862
11899
|
const volatile MDBX_meta *meta, bool report) {
|
@@ -11957,7 +11994,7 @@ __cold static int coherency_timeout(uint64_t *timestamp, pgno_t pgno) {
|
|
11957
11994
|
}
|
11958
11995
|
|
11959
11996
|
/* check with timeout as the workaround
|
11960
|
-
* for https://
|
11997
|
+
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
11961
11998
|
__hot static int coherency_check_readed(const MDBX_env *env,
|
11962
11999
|
const txnid_t txnid,
|
11963
12000
|
const volatile MDBX_db *dbs,
|
@@ -12193,8 +12230,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|
12193
12230
|
txn->tw.troika = meta_tap(env);
|
12194
12231
|
const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
|
12195
12232
|
uint64_t timestamp = 0;
|
12196
|
-
while (
|
12197
|
-
"workaround for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
|
12233
|
+
while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
|
12198
12234
|
rc = coherency_check_readed(env, head.txnid, head.ptr_v->mm_dbs,
|
12199
12235
|
head.ptr_v, ×tamp);
|
12200
12236
|
if (likely(rc == MDBX_SUCCESS))
|
@@ -12219,8 +12255,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|
12219
12255
|
txn->tw.loose_refund_wl = 0;
|
12220
12256
|
#endif /* MDBX_ENABLE_REFUND */
|
12221
12257
|
MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0);
|
12222
|
-
txn->tw.
|
12223
|
-
txn->tw.
|
12258
|
+
txn->tw.spilled.list = NULL;
|
12259
|
+
txn->tw.spilled.least_removed = 0;
|
12224
12260
|
txn->tw.last_reclaimed = 0;
|
12225
12261
|
if (txn->tw.lifo_reclaimed)
|
12226
12262
|
MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0);
|
@@ -12297,6 +12333,19 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|
12297
12333
|
osal_srwlock_AcquireShared(&env->me_remap_guard);
|
12298
12334
|
}
|
12299
12335
|
#endif /* Windows */
|
12336
|
+
} else {
|
12337
|
+
if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) {
|
12338
|
+
ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB",
|
12339
|
+
txn->mt_dbs[FREE_DBI].md_flags);
|
12340
|
+
rc = MDBX_INCOMPATIBLE;
|
12341
|
+
goto bailout;
|
12342
|
+
}
|
12343
|
+
|
12344
|
+
tASSERT(txn, txn == env->me_txn0);
|
12345
|
+
MDBX_cursor *const gc = (MDBX_cursor *)((char *)txn + sizeof(MDBX_txn));
|
12346
|
+
rc = cursor_init(gc, txn, FREE_DBI);
|
12347
|
+
if (rc != MDBX_SUCCESS)
|
12348
|
+
goto bailout;
|
12300
12349
|
}
|
12301
12350
|
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
|
12302
12351
|
txn_valgrind(env, txn);
|
@@ -12514,7 +12563,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
|
|
12514
12563
|
txn->tw.dirtylru = parent->tw.dirtylru;
|
12515
12564
|
|
12516
12565
|
dpl_sort(parent);
|
12517
|
-
if (parent->tw.
|
12566
|
+
if (parent->tw.spilled.list)
|
12518
12567
|
spill_purge(parent);
|
12519
12568
|
|
12520
12569
|
tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >=
|
@@ -12591,7 +12640,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
|
|
12591
12640
|
eASSERT(env, (txn->mt_flags &
|
12592
12641
|
~(MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
|
12593
12642
|
MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0);
|
12594
|
-
assert(!txn->tw.
|
12643
|
+
assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed);
|
12595
12644
|
}
|
12596
12645
|
txn->mt_signature = MDBX_MT_SIGNATURE;
|
12597
12646
|
txn->mt_userctx = context;
|
@@ -12696,10 +12745,9 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
|
|
12696
12745
|
env, txn->mt_child ? (size_t)txn->tw.retired_pages
|
12697
12746
|
: MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
12698
12747
|
info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
|
12699
|
-
info->txn_space_dirty =
|
12700
|
-
txn->tw.dirtylist
|
12701
|
-
|
12702
|
-
: 0;
|
12748
|
+
info->txn_space_dirty = pgno2bytes(
|
12749
|
+
env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
12750
|
+
: txn->tw.writemap_dirty_npages);
|
12703
12751
|
info->txn_reader_lag = INT64_MAX;
|
12704
12752
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
12705
12753
|
if (scan_rlt && lck) {
|
@@ -13015,8 +13063,8 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
|
|
13015
13063
|
txn->mt_flags = MDBX_TXN_FINISHED;
|
13016
13064
|
txn->mt_owner = 0;
|
13017
13065
|
env->me_txn = txn->mt_parent;
|
13018
|
-
pnl_free(txn->tw.
|
13019
|
-
txn->tw.
|
13066
|
+
pnl_free(txn->tw.spilled.list);
|
13067
|
+
txn->tw.spilled.list = nullptr;
|
13020
13068
|
if (txn == env->me_txn0) {
|
13021
13069
|
eASSERT(env, txn->mt_parent == NULL);
|
13022
13070
|
/* Export or close DBI handles created in this txn */
|
@@ -13283,7 +13331,7 @@ typedef struct gc_update_context {
|
|
13283
13331
|
#if MDBX_ENABLE_BIGFOOT
|
13284
13332
|
txnid_t bigfoot;
|
13285
13333
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
13286
|
-
|
13334
|
+
MDBX_cursor cursor;
|
13287
13335
|
} gcu_context_t;
|
13288
13336
|
|
13289
13337
|
static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
|
@@ -13292,7 +13340,7 @@ static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13292
13340
|
#if MDBX_ENABLE_BIGFOOT
|
13293
13341
|
ctx->bigfoot = txn->mt_txnid;
|
13294
13342
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
13295
|
-
return cursor_init(&ctx->cursor
|
13343
|
+
return cursor_init(&ctx->cursor, txn, FREE_DBI);
|
13296
13344
|
}
|
13297
13345
|
|
13298
13346
|
static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) {
|
@@ -13311,10 +13359,10 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13311
13359
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
13312
13360
|
key.iov_len = sizeof(txnid_t);
|
13313
13361
|
const struct cursor_set_result csr =
|
13314
|
-
cursor_set(&ctx->cursor
|
13362
|
+
cursor_set(&ctx->cursor, &key, &val, MDBX_SET);
|
13315
13363
|
if (csr.err == MDBX_SUCCESS && csr.exact) {
|
13316
13364
|
ctx->retired_stored = 0;
|
13317
|
-
err = mdbx_cursor_del(&ctx->cursor
|
13365
|
+
err = mdbx_cursor_del(&ctx->cursor, 0);
|
13318
13366
|
TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn),
|
13319
13367
|
err);
|
13320
13368
|
}
|
@@ -13327,6 +13375,13 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13327
13375
|
return err;
|
13328
13376
|
}
|
13329
13377
|
|
13378
|
+
static int gcu_touch(gcu_context_t *ctx) {
|
13379
|
+
ctx->cursor.mc_flags |= C_GCU;
|
13380
|
+
int err = cursor_touch(&ctx->cursor);
|
13381
|
+
ctx->cursor.mc_flags -= C_GCU;
|
13382
|
+
return err;
|
13383
|
+
}
|
13384
|
+
|
13330
13385
|
/* Prepare a backlog of pages to modify GC itself, while reclaiming is
|
13331
13386
|
* prohibited. It should be enough to prevent search in page_alloc_slowpath()
|
13332
13387
|
* during a deleting, when GC tree is unbalanced. */
|
@@ -13356,14 +13411,12 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|
13356
13411
|
key.iov_base = val.iov_base = nullptr;
|
13357
13412
|
key.iov_len = sizeof(txnid_t);
|
13358
13413
|
val.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
|
13359
|
-
err = cursor_spill(&ctx->cursor
|
13414
|
+
err = cursor_spill(&ctx->cursor, &key, &val);
|
13360
13415
|
if (unlikely(err != MDBX_SUCCESS))
|
13361
13416
|
return err;
|
13362
13417
|
}
|
13363
13418
|
|
13364
|
-
|
13365
|
-
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
13366
|
-
err = cursor_touch(&ctx->cursor.outer);
|
13419
|
+
err = gcu_touch(ctx);
|
13367
13420
|
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
13368
13421
|
|
13369
13422
|
if (unlikely(pages4retiredlist > 1) &&
|
@@ -13373,22 +13426,20 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|
13373
13426
|
err = gcu_clean_stored_retired(txn, ctx);
|
13374
13427
|
if (unlikely(err != MDBX_SUCCESS))
|
13375
13428
|
return err;
|
13376
|
-
err =
|
13377
|
-
|
13378
|
-
|
13429
|
+
err =
|
13430
|
+
page_alloc_slowpath(&ctx->cursor, pages4retiredlist, MDBX_ALLOC_RESERVE)
|
13431
|
+
.err;
|
13379
13432
|
TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
13380
|
-
cASSERT(&ctx->cursor
|
13433
|
+
cASSERT(&ctx->cursor,
|
13381
13434
|
gcu_backlog_size(txn) >= pages4retiredlist || err != MDBX_SUCCESS);
|
13382
13435
|
}
|
13383
13436
|
|
13384
13437
|
while (gcu_backlog_size(txn) < backlog4cow + pages4retiredlist &&
|
13385
13438
|
err == MDBX_SUCCESS)
|
13386
|
-
err = page_alloc_slowpath(&ctx->cursor
|
13387
|
-
|
13388
|
-
MDBX_ALLOC_RESERVE | MDBX_ALLOC_BACKLOG)
|
13439
|
+
err = page_alloc_slowpath(&ctx->cursor, 0,
|
13440
|
+
MDBX_ALLOC_RESERVE | MDBX_ALLOC_UNIMPORTANT)
|
13389
13441
|
.err;
|
13390
13442
|
|
13391
|
-
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
13392
13443
|
TRACE("<< backlog %zu, err %d", gcu_backlog_size(txn), err);
|
13393
13444
|
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
13394
13445
|
}
|
@@ -13417,9 +13468,8 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13417
13468
|
MDBX_env *const env = txn->mt_env;
|
13418
13469
|
const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo";
|
13419
13470
|
(void)dbg_prefix_mode;
|
13420
|
-
|
13421
|
-
|
13422
|
-
txn->mt_cursors[FREE_DBI] = &ctx->cursor.outer;
|
13471
|
+
ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI];
|
13472
|
+
txn->mt_cursors[FREE_DBI] = &ctx->cursor;
|
13423
13473
|
|
13424
13474
|
/* txn->tw.relist[] can grow and shrink during this call.
|
13425
13475
|
* txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow.
|
@@ -13481,7 +13531,7 @@ retry:
|
|
13481
13531
|
ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
|
13482
13532
|
key.iov_base = &ctx->cleaned_id;
|
13483
13533
|
key.iov_len = sizeof(ctx->cleaned_id);
|
13484
|
-
rc = mdbx_cursor_get(&ctx->cursor
|
13534
|
+
rc = mdbx_cursor_get(&ctx->cursor, &key, NULL, MDBX_SET);
|
13485
13535
|
if (rc == MDBX_NOTFOUND)
|
13486
13536
|
continue;
|
13487
13537
|
if (unlikely(rc != MDBX_SUCCESS))
|
@@ -13494,18 +13544,17 @@ retry:
|
|
13494
13544
|
tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
|
13495
13545
|
TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix_mode,
|
13496
13546
|
ctx->cleaned_slot, ctx->cleaned_id);
|
13497
|
-
tASSERT(txn, *txn->mt_cursors == &ctx->cursor
|
13498
|
-
rc = mdbx_cursor_del(&ctx->cursor
|
13547
|
+
tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
|
13548
|
+
rc = mdbx_cursor_del(&ctx->cursor, 0);
|
13499
13549
|
if (unlikely(rc != MDBX_SUCCESS))
|
13500
13550
|
goto bailout;
|
13501
13551
|
} while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
|
13502
13552
|
txl_sort(txn->tw.lifo_reclaimed);
|
13503
13553
|
}
|
13504
13554
|
} else {
|
13505
|
-
/*
|
13506
|
-
* now delete them and any we reserved for tw.relist. */
|
13555
|
+
/* Удаляем оставшиеся вынутые из GC записи. */
|
13507
13556
|
while (ctx->cleaned_id <= txn->tw.last_reclaimed) {
|
13508
|
-
rc = cursor_first(&ctx->cursor
|
13557
|
+
rc = cursor_first(&ctx->cursor, &key, NULL);
|
13509
13558
|
if (rc == MDBX_NOTFOUND)
|
13510
13559
|
break;
|
13511
13560
|
if (unlikely(rc != MDBX_SUCCESS))
|
@@ -13530,8 +13579,8 @@ retry:
|
|
13530
13579
|
tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
|
13531
13580
|
TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode,
|
13532
13581
|
ctx->cleaned_id);
|
13533
|
-
tASSERT(txn, *txn->mt_cursors == &ctx->cursor
|
13534
|
-
rc = mdbx_cursor_del(&ctx->cursor
|
13582
|
+
tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
|
13583
|
+
rc = mdbx_cursor_del(&ctx->cursor, 0);
|
13535
13584
|
if (unlikely(rc != MDBX_SUCCESS))
|
13536
13585
|
goto bailout;
|
13537
13586
|
}
|
@@ -13566,10 +13615,7 @@ retry:
|
|
13566
13615
|
if (txn->tw.loose_count > 0) {
|
13567
13616
|
TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode,
|
13568
13617
|
txn->tw.loose_count);
|
13569
|
-
rc = page_alloc_slowpath(&ctx->cursor
|
13570
|
-
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
13571
|
-
MDBX_ALLOC_RESERVE)
|
13572
|
-
.err;
|
13618
|
+
rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
|
13573
13619
|
if (rc == MDBX_SUCCESS) {
|
13574
13620
|
TRACE("%s: retry since gc-slot for %zu loose-pages available",
|
13575
13621
|
dbg_prefix_mode, txn->tw.loose_count);
|
@@ -13651,10 +13697,9 @@ retry:
|
|
13651
13697
|
if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
13652
13698
|
if (unlikely(!ctx->retired_stored)) {
|
13653
13699
|
/* Make sure last page of GC is touched and on retired-list */
|
13654
|
-
|
13655
|
-
rc
|
13656
|
-
|
13657
|
-
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
13700
|
+
rc = cursor_last(&ctx->cursor, nullptr, nullptr);
|
13701
|
+
if (likely(rc != MDBX_SUCCESS))
|
13702
|
+
rc = gcu_touch(ctx);
|
13658
13703
|
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
|
13659
13704
|
goto bailout;
|
13660
13705
|
}
|
@@ -13664,6 +13709,8 @@ retry:
|
|
13664
13709
|
do {
|
13665
13710
|
if (ctx->bigfoot > txn->mt_txnid) {
|
13666
13711
|
rc = gcu_clean_stored_retired(txn, ctx);
|
13712
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
13713
|
+
goto bailout;
|
13667
13714
|
tASSERT(txn, ctx->bigfoot <= txn->mt_txnid);
|
13668
13715
|
}
|
13669
13716
|
|
@@ -13685,7 +13732,7 @@ retry:
|
|
13685
13732
|
? env->me_maxgc_ov1page
|
13686
13733
|
: left;
|
13687
13734
|
data.iov_len = (chunk + 1) * sizeof(pgno_t);
|
13688
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
13735
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
|
13689
13736
|
if (unlikely(rc != MDBX_SUCCESS))
|
13690
13737
|
goto bailout;
|
13691
13738
|
|
@@ -13723,7 +13770,7 @@ retry:
|
|
13723
13770
|
do {
|
13724
13771
|
gcu_prepare_backlog(txn, ctx, true);
|
13725
13772
|
data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
|
13726
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
13773
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
|
13727
13774
|
if (unlikely(rc != MDBX_SUCCESS))
|
13728
13775
|
goto bailout;
|
13729
13776
|
/* Retry if tw.retired_pages[] grew during the Put() */
|
@@ -13790,17 +13837,13 @@ retry:
|
|
13790
13837
|
left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
13791
13838
|
env->me_maxgc_ov1page &&
|
13792
13839
|
!ctx->dense) {
|
13793
|
-
/*
|
13840
|
+
/* Hужен свободный для для сохранения списка страниц. */
|
13794
13841
|
bool need_cleanup = false;
|
13795
|
-
txnid_t snap_oldest;
|
13842
|
+
txnid_t snap_oldest = 0;
|
13796
13843
|
retry_rid:
|
13797
|
-
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
13798
13844
|
do {
|
13799
|
-
|
13800
|
-
|
13801
|
-
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
13802
|
-
MDBX_ALLOC_RESERVE)
|
13803
|
-
.err;
|
13845
|
+
rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
|
13846
|
+
snap_oldest = env->me_lck->mti_oldest_reader.weak;
|
13804
13847
|
if (likely(rc == MDBX_SUCCESS)) {
|
13805
13848
|
TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
|
13806
13849
|
MDBX_PNL_LAST(txn->tw.lifo_reclaimed));
|
@@ -13812,7 +13855,6 @@ retry:
|
|
13812
13855
|
left >
|
13813
13856
|
(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
13814
13857
|
env->me_maxgc_ov1page);
|
13815
|
-
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
13816
13858
|
|
13817
13859
|
if (likely(rc == MDBX_SUCCESS)) {
|
13818
13860
|
TRACE("%s: got enough from GC.", dbg_prefix_mode);
|
@@ -13830,7 +13872,7 @@ retry:
|
|
13830
13872
|
} else {
|
13831
13873
|
tASSERT(txn, txn->tw.last_reclaimed == 0);
|
13832
13874
|
if (unlikely(txn_oldest_reader(txn) != snap_oldest))
|
13833
|
-
/* should retry page_alloc_slowpath(
|
13875
|
+
/* should retry page_alloc_slowpath()
|
13834
13876
|
* if the oldest reader changes since the last attempt */
|
13835
13877
|
goto retry_rid;
|
13836
13878
|
/* no reclaimable GC entries,
|
@@ -13840,7 +13882,8 @@ retry:
|
|
13840
13882
|
ctx->rid);
|
13841
13883
|
}
|
13842
13884
|
|
13843
|
-
/*
|
13885
|
+
/* В GC нет годных к переработке записей,
|
13886
|
+
* будем использовать свободные id в обратном порядке. */
|
13844
13887
|
while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter &&
|
13845
13888
|
left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) -
|
13846
13889
|
ctx->reused_slot) *
|
@@ -13858,26 +13901,20 @@ retry:
|
|
13858
13901
|
}
|
13859
13902
|
|
13860
13903
|
tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID);
|
13861
|
-
|
13904
|
+
ctx->rid -= 1;
|
13862
13905
|
key.iov_base = &ctx->rid;
|
13863
13906
|
key.iov_len = sizeof(ctx->rid);
|
13864
|
-
rc = mdbx_cursor_get(&ctx->cursor
|
13907
|
+
rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
|
13865
13908
|
if (unlikely(rc == MDBX_SUCCESS)) {
|
13866
|
-
DEBUG("%s: GC's id %" PRIaTXN " is
|
13909
|
+
DEBUG("%s: GC's id %" PRIaTXN " is present, going to first",
|
13867
13910
|
dbg_prefix_mode, ctx->rid);
|
13868
|
-
|
13869
|
-
rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_FIRST);
|
13870
|
-
if (rc == MDBX_NOTFOUND) {
|
13871
|
-
DEBUG("%s: GC is empty (going dense-mode)", dbg_prefix_mode);
|
13872
|
-
ctx->dense = true;
|
13873
|
-
break;
|
13874
|
-
}
|
13911
|
+
rc = cursor_first(&ctx->cursor, &key, nullptr);
|
13875
13912
|
if (unlikely(rc != MDBX_SUCCESS ||
|
13876
13913
|
key.iov_len != sizeof(txnid_t))) {
|
13877
13914
|
rc = MDBX_CORRUPTED;
|
13878
13915
|
goto bailout;
|
13879
13916
|
}
|
13880
|
-
txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13917
|
+
const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13881
13918
|
if (gc_first <= MIN_TXNID) {
|
13882
13919
|
DEBUG("%s: no free GC's id(s) less than %" PRIaTXN
|
13883
13920
|
" (going dense-mode)",
|
@@ -13925,13 +13962,13 @@ retry:
|
|
13925
13962
|
tASSERT(txn, txn->tw.lifo_reclaimed == NULL);
|
13926
13963
|
if (unlikely(ctx->rid == 0)) {
|
13927
13964
|
ctx->rid = txn_oldest_reader(txn);
|
13928
|
-
rc =
|
13929
|
-
if (rc == MDBX_SUCCESS) {
|
13965
|
+
rc = cursor_first(&ctx->cursor, &key, nullptr);
|
13966
|
+
if (likely(rc == MDBX_SUCCESS)) {
|
13930
13967
|
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
13931
13968
|
rc = MDBX_CORRUPTED;
|
13932
13969
|
goto bailout;
|
13933
13970
|
}
|
13934
|
-
txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13971
|
+
const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13935
13972
|
if (ctx->rid >= gc_first)
|
13936
13973
|
ctx->rid = gc_first - 1;
|
13937
13974
|
if (unlikely(ctx->rid == 0)) {
|
@@ -14022,7 +14059,7 @@ retry:
|
|
14022
14059
|
TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk,
|
14023
14060
|
ctx->settled + 1, ctx->settled + chunk + 1, reservation_gc_id);
|
14024
14061
|
gcu_prepare_backlog(txn, ctx, true);
|
14025
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
14062
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
|
14026
14063
|
MDBX_RESERVE | MDBX_NOOVERWRITE);
|
14027
14064
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
14028
14065
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
@@ -14070,7 +14107,7 @@ retry:
|
|
14070
14107
|
size_t left = amount;
|
14071
14108
|
if (txn->tw.lifo_reclaimed == nullptr) {
|
14072
14109
|
tASSERT(txn, ctx->lifo == 0);
|
14073
|
-
rc = cursor_first(&ctx->cursor
|
14110
|
+
rc = cursor_first(&ctx->cursor, &key, &data);
|
14074
14111
|
if (unlikely(rc != MDBX_SUCCESS))
|
14075
14112
|
goto bailout;
|
14076
14113
|
} else {
|
@@ -14104,7 +14141,7 @@ retry:
|
|
14104
14141
|
dbg_prefix_mode, fill_gc_id, ctx->filled_slot);
|
14105
14142
|
key.iov_base = &fill_gc_id;
|
14106
14143
|
key.iov_len = sizeof(fill_gc_id);
|
14107
|
-
rc = mdbx_cursor_get(&ctx->cursor
|
14144
|
+
rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
|
14108
14145
|
if (unlikely(rc != MDBX_SUCCESS))
|
14109
14146
|
goto bailout;
|
14110
14147
|
}
|
@@ -14118,7 +14155,6 @@ retry:
|
|
14118
14155
|
key.iov_len = sizeof(fill_gc_id);
|
14119
14156
|
|
14120
14157
|
tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
14121
|
-
txn->mt_flags += MDBX_TXN_FROZEN_RE;
|
14122
14158
|
size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
|
14123
14159
|
if (unlikely(chunk > left)) {
|
14124
14160
|
TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk,
|
@@ -14126,14 +14162,11 @@ retry:
|
|
14126
14162
|
if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) ||
|
14127
14163
|
chunk - left > env->me_maxgc_ov1page) {
|
14128
14164
|
data.iov_len = (left + 1) * sizeof(pgno_t);
|
14129
|
-
if (ctx->loop < 7)
|
14130
|
-
txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
|
14131
14165
|
}
|
14132
14166
|
chunk = left;
|
14133
14167
|
}
|
14134
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
14168
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
|
14135
14169
|
MDBX_CURRENT | MDBX_RESERVE);
|
14136
|
-
txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
|
14137
14170
|
if (unlikely(rc != MDBX_SUCCESS))
|
14138
14171
|
goto bailout;
|
14139
14172
|
gcu_clean_reserved(env, data);
|
@@ -14182,7 +14215,7 @@ retry:
|
|
14182
14215
|
|
14183
14216
|
if (txn->tw.lifo_reclaimed == nullptr) {
|
14184
14217
|
tASSERT(txn, ctx->lifo == 0);
|
14185
|
-
rc = cursor_next(&ctx->cursor
|
14218
|
+
rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT);
|
14186
14219
|
if (unlikely(rc != MDBX_SUCCESS))
|
14187
14220
|
goto bailout;
|
14188
14221
|
} else {
|
@@ -14213,7 +14246,7 @@ retry:
|
|
14213
14246
|
ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
|
14214
14247
|
|
14215
14248
|
bailout:
|
14216
|
-
txn->mt_cursors[FREE_DBI] = ctx->cursor.
|
14249
|
+
txn->mt_cursors[FREE_DBI] = ctx->cursor.mc_next;
|
14217
14250
|
|
14218
14251
|
MDBX_PNL_SETSIZE(txn->tw.relist, 0);
|
14219
14252
|
#if MDBX_ENABLE_PROFGC
|
@@ -14363,7 +14396,8 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14363
14396
|
MDBX_PNL_SETSIZE(parent->tw.retired_pages, w);
|
14364
14397
|
|
14365
14398
|
/* Filter-out parent spill list */
|
14366
|
-
if (parent->tw.
|
14399
|
+
if (parent->tw.spilled.list &&
|
14400
|
+
MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) {
|
14367
14401
|
const MDBX_PNL sl = spill_purge(parent);
|
14368
14402
|
size_t len = MDBX_PNL_GETSIZE(sl);
|
14369
14403
|
if (len) {
|
@@ -14378,7 +14412,7 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14378
14412
|
DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1);
|
14379
14413
|
i -= 1;
|
14380
14414
|
} while (i && sl[i] >= (parent->mt_next_pgno << 1));
|
14381
|
-
|
14415
|
+
MDBX_PNL_SETSIZE(sl, i);
|
14382
14416
|
#else
|
14383
14417
|
assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl));
|
14384
14418
|
size_t i = 0;
|
@@ -14451,10 +14485,10 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14451
14485
|
}
|
14452
14486
|
|
14453
14487
|
/* Remove anything in our spill list from parent's dirty list */
|
14454
|
-
if (txn->tw.
|
14455
|
-
tASSERT(txn, pnl_check_allocated(txn->tw.
|
14488
|
+
if (txn->tw.spilled.list) {
|
14489
|
+
tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list,
|
14456
14490
|
(size_t)parent->mt_next_pgno << 1));
|
14457
|
-
dpl_sift(parent, txn->tw.
|
14491
|
+
dpl_sift(parent, txn->tw.spilled.list, true);
|
14458
14492
|
tASSERT(parent,
|
14459
14493
|
parent->tw.dirtyroom + parent->tw.dirtylist->length ==
|
14460
14494
|
(parent->mt_parent ? parent->mt_parent->tw.dirtyroom
|
@@ -14606,23 +14640,23 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14606
14640
|
tASSERT(parent, dirtylist_check(parent));
|
14607
14641
|
dpl_free(txn);
|
14608
14642
|
|
14609
|
-
if (txn->tw.
|
14610
|
-
if (parent->tw.
|
14643
|
+
if (txn->tw.spilled.list) {
|
14644
|
+
if (parent->tw.spilled.list) {
|
14611
14645
|
/* Must not fail since space was preserved above. */
|
14612
|
-
pnl_merge(parent->tw.
|
14613
|
-
pnl_free(txn->tw.
|
14646
|
+
pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list);
|
14647
|
+
pnl_free(txn->tw.spilled.list);
|
14614
14648
|
} else {
|
14615
|
-
parent->tw.
|
14616
|
-
parent->tw.
|
14649
|
+
parent->tw.spilled.list = txn->tw.spilled.list;
|
14650
|
+
parent->tw.spilled.least_removed = txn->tw.spilled.least_removed;
|
14617
14651
|
}
|
14618
14652
|
tASSERT(parent, dirtylist_check(parent));
|
14619
14653
|
}
|
14620
14654
|
|
14621
14655
|
parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
|
14622
|
-
if (parent->tw.
|
14623
|
-
assert(pnl_check_allocated(parent->tw.
|
14656
|
+
if (parent->tw.spilled.list) {
|
14657
|
+
assert(pnl_check_allocated(parent->tw.spilled.list,
|
14624
14658
|
(size_t)parent->mt_next_pgno << 1));
|
14625
|
-
if (MDBX_PNL_GETSIZE(parent->tw.
|
14659
|
+
if (MDBX_PNL_GETSIZE(parent->tw.spilled.list))
|
14626
14660
|
parent->mt_flags |= MDBX_TXN_SPILLS;
|
14627
14661
|
}
|
14628
14662
|
}
|
@@ -14693,8 +14727,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
|
14693
14727
|
sizeof(parent->mt_geo)) == 0);
|
14694
14728
|
tASSERT(txn, memcmp(&parent->mt_canary, &txn->mt_canary,
|
14695
14729
|
sizeof(parent->mt_canary)) == 0);
|
14696
|
-
tASSERT(txn, !txn->tw.
|
14697
|
-
MDBX_PNL_GETSIZE(txn->tw.
|
14730
|
+
tASSERT(txn, !txn->tw.spilled.list ||
|
14731
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0);
|
14698
14732
|
tASSERT(txn, txn->tw.loose_count == 0);
|
14699
14733
|
|
14700
14734
|
/* fast completion of pure nested transaction */
|
@@ -14714,10 +14748,10 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
|
14714
14748
|
goto fail;
|
14715
14749
|
}
|
14716
14750
|
|
14717
|
-
if (txn->tw.
|
14718
|
-
if (parent->tw.
|
14719
|
-
rc = pnl_need(&parent->tw.
|
14720
|
-
MDBX_PNL_GETSIZE(txn->tw.
|
14751
|
+
if (txn->tw.spilled.list) {
|
14752
|
+
if (parent->tw.spilled.list) {
|
14753
|
+
rc = pnl_need(&parent->tw.spilled.list,
|
14754
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list));
|
14721
14755
|
if (unlikely(rc != MDBX_SUCCESS))
|
14722
14756
|
goto fail;
|
14723
14757
|
}
|
@@ -15837,7 +15871,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
|
|
15837
15871
|
}
|
15838
15872
|
|
15839
15873
|
uint64_t timestamp = 0;
|
15840
|
-
while ("workaround for https://
|
15874
|
+
while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
|
15841
15875
|
rc =
|
15842
15876
|
coherency_check_written(env, pending->unsafe_txnid, target, ×tamp);
|
15843
15877
|
if (likely(rc == MDBX_SUCCESS))
|
@@ -16359,7 +16393,7 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
|
16359
16393
|
|
16360
16394
|
uint64_t timestamp = 0;
|
16361
16395
|
while ("workaround for "
|
16362
|
-
"https://
|
16396
|
+
"https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
|
16363
16397
|
meta = *head.ptr_c;
|
16364
16398
|
rc = coherency_check_readed(env, head.txnid, meta.mm_dbs, &meta,
|
16365
16399
|
×tamp);
|
@@ -17503,13 +17537,13 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx,
|
|
17503
17537
|
}
|
17504
17538
|
#else
|
17505
17539
|
struct stat st;
|
17506
|
-
if (stat(pathname, &st)) {
|
17540
|
+
if (stat(pathname, &st) != 0) {
|
17507
17541
|
rc = errno;
|
17508
17542
|
if (rc != MDBX_ENOFILE)
|
17509
17543
|
return rc;
|
17510
17544
|
if (mode == 0 || (*flags & MDBX_RDONLY) != 0)
|
17511
17545
|
/* can't open existing */
|
17512
|
-
return rc
|
17546
|
+
return rc /* MDBX_ENOFILE */;
|
17513
17547
|
|
17514
17548
|
/* auto-create directory if requested */
|
17515
17549
|
const mdbx_mode_t dir_mode =
|
@@ -17702,7 +17736,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
|
|
17702
17736
|
} else {
|
17703
17737
|
#if MDBX_MMAP_INCOHERENT_FILE_WRITE
|
17704
17738
|
/* Temporary `workaround` for OpenBSD kernel's flaw.
|
17705
|
-
* See https://
|
17739
|
+
* See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */
|
17706
17740
|
if ((flags & MDBX_WRITEMAP) == 0) {
|
17707
17741
|
if (flags & MDBX_ACCEDE)
|
17708
17742
|
flags |= MDBX_WRITEMAP;
|
@@ -18014,7 +18048,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
|
|
18014
18048
|
}
|
18015
18049
|
|
18016
18050
|
if ((flags & MDBX_RDONLY) == 0) {
|
18017
|
-
const size_t tsize = sizeof(MDBX_txn),
|
18051
|
+
const size_t tsize = sizeof(MDBX_txn) + sizeof(MDBX_cursor),
|
18018
18052
|
size = tsize + env->me_maxdbs *
|
18019
18053
|
(sizeof(MDBX_db) + sizeof(MDBX_cursor *) +
|
18020
18054
|
sizeof(MDBX_atomic_uint32_t) + 1);
|
@@ -18139,9 +18173,10 @@ __cold static int env_close(MDBX_env *env) {
|
|
18139
18173
|
}
|
18140
18174
|
|
18141
18175
|
if (env->me_dbxs) {
|
18142
|
-
for (size_t i = env->me_numdbs;
|
18176
|
+
for (size_t i = CORE_DBS; i < env->me_numdbs; ++i)
|
18143
18177
|
osal_free(env->me_dbxs[i].md_name.iov_base);
|
18144
18178
|
osal_free(env->me_dbxs);
|
18179
|
+
env->me_numdbs = CORE_DBS;
|
18145
18180
|
env->me_dbxs = nullptr;
|
18146
18181
|
}
|
18147
18182
|
if (env->me_pbuf) {
|
@@ -18164,7 +18199,7 @@ __cold static int env_close(MDBX_env *env) {
|
|
18164
18199
|
dpl_free(env->me_txn0);
|
18165
18200
|
txl_free(env->me_txn0->tw.lifo_reclaimed);
|
18166
18201
|
pnl_free(env->me_txn0->tw.retired_pages);
|
18167
|
-
pnl_free(env->me_txn0->tw.
|
18202
|
+
pnl_free(env->me_txn0->tw.spilled.list);
|
18168
18203
|
pnl_free(env->me_txn0->tw.relist);
|
18169
18204
|
osal_free(env->me_txn0);
|
18170
18205
|
env->me_txn0 = nullptr;
|
@@ -18907,7 +18942,8 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
|
|
18907
18942
|
if (!MDBX_DISABLE_VALIDATION) {
|
18908
18943
|
const MDBX_env *env = mc->mc_txn->mt_env;
|
18909
18944
|
const size_t dsize = data->iov_len;
|
18910
|
-
if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax)
|
18945
|
+
if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax) &&
|
18946
|
+
mc->mc_dbi != FREE_DBI)
|
18911
18947
|
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
18912
18948
|
const unsigned npages = number_of_ovpages(env, dsize);
|
18913
18949
|
if (unlikely(lp.page->mp_pages != npages)) {
|
@@ -18915,7 +18951,7 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
|
|
18915
18951
|
return bad_page(lp.page,
|
18916
18952
|
"too less n-pages %u for bigdata-node (%zu bytes)",
|
18917
18953
|
lp.page->mp_pages, dsize);
|
18918
|
-
else
|
18954
|
+
else if (mc->mc_dbi != FREE_DBI)
|
18919
18955
|
poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)",
|
18920
18956
|
lp.page->mp_pages, dsize);
|
18921
18957
|
}
|
@@ -20011,7 +20047,6 @@ static int touch_dbi(MDBX_cursor *mc) {
|
|
20011
20047
|
*mc->mc_dbistate |= DBI_DIRTY;
|
20012
20048
|
mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
|
20013
20049
|
if (mc->mc_dbi >= CORE_DBS) {
|
20014
|
-
cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_UPDATE_GC) == 0);
|
20015
20050
|
/* Touch DB record of named DB */
|
20016
20051
|
MDBX_cursor_couple cx;
|
20017
20052
|
int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI);
|
@@ -20424,9 +20459,9 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|
20424
20459
|
|
20425
20460
|
/* Large/Overflow page overwrites need special handling */
|
20426
20461
|
if (unlikely(node_flags(node) & F_BIGDATA)) {
|
20427
|
-
|
20428
|
-
|
20429
|
-
|
20462
|
+
const size_t dpages = (node_size(key, data) > env->me_leaf_nodemax)
|
20463
|
+
? number_of_ovpages(env, data->iov_len)
|
20464
|
+
: 0;
|
20430
20465
|
|
20431
20466
|
const pgno_t pgno = node_largedata_pgno(node);
|
20432
20467
|
pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid);
|
@@ -20435,13 +20470,13 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|
20435
20470
|
cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW);
|
20436
20471
|
|
20437
20472
|
/* Is the ov page from this txn (or a parent) and big enough? */
|
20438
|
-
|
20439
|
-
|
20440
|
-
(
|
20441
|
-
|
20442
|
-
|
20443
|
-
|
20444
|
-
|
20473
|
+
const size_t ovpages = lp.page->mp_pages;
|
20474
|
+
const size_t extra_threshold =
|
20475
|
+
(mc->mc_dbi == FREE_DBI)
|
20476
|
+
? 1
|
20477
|
+
: /* LY: add configurable threshold to keep reserve space */ 0;
|
20478
|
+
if (!IS_FROZEN(mc->mc_txn, lp.page) && ovpages >= dpages &&
|
20479
|
+
ovpages <= dpages + extra_threshold) {
|
20445
20480
|
/* yes, overwrite it. */
|
20446
20481
|
if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) {
|
20447
20482
|
if (IS_SPILLED(mc->mc_txn, lp.page)) {
|
@@ -20972,7 +21007,6 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
|
|
20972
21007
|
|
20973
21008
|
DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno);
|
20974
21009
|
ret.page->mp_flags = (uint16_t)flags;
|
20975
|
-
ret.page->mp_txnid = mc->mc_txn->mt_front;
|
20976
21010
|
cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
|
20977
21011
|
cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
|
20978
21012
|
#if MDBX_ENABLE_PGOP_STAT
|
@@ -20994,25 +21028,24 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
|
|
20994
21028
|
return ret;
|
20995
21029
|
}
|
20996
21030
|
|
20997
|
-
static pgr_t page_new_large(MDBX_cursor *mc, const
|
21031
|
+
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) {
|
20998
21032
|
pgr_t ret = likely(npages == 1)
|
20999
21033
|
? page_alloc(mc)
|
21000
|
-
: page_alloc_slowpath(mc, npages,
|
21034
|
+
: page_alloc_slowpath(mc, npages, MDBX_ALLOC_DEFAULT);
|
21001
21035
|
if (unlikely(ret.err != MDBX_SUCCESS))
|
21002
21036
|
return ret;
|
21003
21037
|
|
21004
|
-
DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %
|
21038
|
+
DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi,
|
21005
21039
|
ret.page->mp_pgno, npages);
|
21006
21040
|
ret.page->mp_flags = P_OVERFLOW;
|
21007
|
-
ret.page->mp_txnid = mc->mc_txn->mt_front;
|
21008
21041
|
cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
|
21009
21042
|
cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
|
21010
21043
|
#if MDBX_ENABLE_PGOP_STAT
|
21011
21044
|
mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages;
|
21012
21045
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
21013
21046
|
|
21014
|
-
mc->mc_db->md_overflow_pages += npages;
|
21015
|
-
ret.page->mp_pages = npages;
|
21047
|
+
mc->mc_db->md_overflow_pages += (pgno_t)npages;
|
21048
|
+
ret.page->mp_pages = (pgno_t)npages;
|
21016
21049
|
cASSERT(mc, !(mc->mc_flags & C_SUB));
|
21017
21050
|
return ret;
|
21018
21051
|
}
|
@@ -21109,7 +21142,6 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21109
21142
|
key ? key->iov_len : 0, DKEY_DEBUG(key));
|
21110
21143
|
cASSERT(mc, key != NULL && data != NULL);
|
21111
21144
|
cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF);
|
21112
|
-
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
21113
21145
|
MDBX_page *largepage = NULL;
|
21114
21146
|
|
21115
21147
|
size_t node_bytes;
|
@@ -21118,6 +21150,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21118
21150
|
STATIC_ASSERT(sizeof(pgno_t) % 2 == 0);
|
21119
21151
|
node_bytes =
|
21120
21152
|
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
21153
|
+
cASSERT(mc, page_room(mp) >= node_bytes);
|
21121
21154
|
} else if (unlikely(node_size(key, data) >
|
21122
21155
|
mc->mc_txn->mt_env->me_leaf_nodemax)) {
|
21123
21156
|
/* Put data on large/overflow page. */
|
@@ -21131,6 +21164,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21131
21164
|
flags);
|
21132
21165
|
return MDBX_PROBLEM;
|
21133
21166
|
}
|
21167
|
+
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
21134
21168
|
const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len);
|
21135
21169
|
const pgr_t npr = page_new_large(mc, ovpages);
|
21136
21170
|
if (unlikely(npr.err != MDBX_SUCCESS))
|
@@ -21142,10 +21176,12 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21142
21176
|
flags |= F_BIGDATA;
|
21143
21177
|
node_bytes =
|
21144
21178
|
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
21179
|
+
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
21145
21180
|
} else {
|
21181
|
+
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
21146
21182
|
node_bytes = node_size(key, data) + sizeof(indx_t);
|
21183
|
+
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
21147
21184
|
}
|
21148
|
-
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
21149
21185
|
|
21150
21186
|
/* Move higher pointers up one slot. */
|
21151
21187
|
const size_t nkeys = page_numkeys(mp);
|
@@ -22886,7 +22922,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
|
|
22886
22922
|
"big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
|
22887
22923
|
dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
|
22888
22924
|
if (unlikely(node_size_len(node_ks(node), dsize) <=
|
22889
|
-
mc->mc_txn->mt_env->me_leaf_nodemax)
|
22925
|
+
mc->mc_txn->mt_env->me_leaf_nodemax) &&
|
22926
|
+
mc->mc_dbi != FREE_DBI)
|
22890
22927
|
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
22891
22928
|
|
22892
22929
|
if ((mc->mc_checking & CC_RETIRING) == 0) {
|
@@ -22901,7 +22938,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
|
|
22901
22938
|
rc = bad_page(lp.page,
|
22902
22939
|
"too less n-pages %u for bigdata-node (%zu bytes)",
|
22903
22940
|
lp.page->mp_pages, dsize);
|
22904
|
-
else
|
22941
|
+
else if (mc->mc_dbi != FREE_DBI)
|
22905
22942
|
poor_page(lp.page,
|
22906
22943
|
"extra n-pages %u for bigdata-node (%zu bytes)",
|
22907
22944
|
lp.page->mp_pages, dsize);
|
@@ -23327,7 +23364,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
|
|
23327
23364
|
int rc = MDBX_SUCCESS, foliage = 0;
|
23328
23365
|
size_t i, ptop;
|
23329
23366
|
MDBX_env *const env = mc->mc_txn->mt_env;
|
23330
|
-
MDBX_val
|
23367
|
+
MDBX_val rkey, xdata;
|
23331
23368
|
MDBX_page *tmp_ki_copy = NULL;
|
23332
23369
|
DKBUF;
|
23333
23370
|
|
@@ -23419,6 +23456,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
|
|
23419
23456
|
eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1);
|
23420
23457
|
|
23421
23458
|
cASSERT(mc, !IS_BRANCH(mp) || newindx > 0);
|
23459
|
+
MDBX_val sepkey = {nullptr, 0};
|
23422
23460
|
/* It is reasonable and possible to split the page at the begin */
|
23423
23461
|
if (unlikely(newindx < minkeys)) {
|
23424
23462
|
split_indx = minkeys;
|
@@ -23751,7 +23789,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
|
|
23751
23789
|
break;
|
23752
23790
|
}
|
23753
23791
|
}
|
23754
|
-
} else if (!IS_LEAF2(mp)) {
|
23792
|
+
} else if (tmp_ki_copy /* !IS_LEAF2(mp) */) {
|
23755
23793
|
/* Move nodes */
|
23756
23794
|
mc->mc_pg[mc->mc_top] = sister;
|
23757
23795
|
i = split_indx;
|
@@ -25053,7 +25091,7 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn,
|
|
25053
25091
|
const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
|
25054
25092
|
|
25055
25093
|
/* is the environment open?
|
25056
|
-
* (https://
|
25094
|
+
* (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */
|
25057
25095
|
if (unlikely(!env->me_map)) {
|
25058
25096
|
/* environment not yet opened */
|
25059
25097
|
#if 1
|
@@ -27864,7 +27902,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn,
|
|
27864
27902
|
if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) {
|
27865
27903
|
rss.rlim_cur = estimated_rss;
|
27866
27904
|
if (rss.rlim_max < estimated_rss)
|
27867
|
-
rss.rlim_max =
|
27905
|
+
rss.rlim_max = estimated_rss;
|
27868
27906
|
if (setrlimit(RLIMIT_RSS, &rss)) {
|
27869
27907
|
rc = errno;
|
27870
27908
|
WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS",
|
@@ -29696,7 +29734,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
|
29696
29734
|
flags |= O_CLOEXEC;
|
29697
29735
|
#endif /* O_CLOEXEC */
|
29698
29736
|
|
29699
|
-
/* Safeguard for https://
|
29737
|
+
/* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
|
29700
29738
|
#if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
|
29701
29739
|
int stub_fd0 = -1, stub_fd1 = -1, stub_fd2 = -1;
|
29702
29740
|
static const char dev_null[] = "/dev/null";
|
@@ -29734,7 +29772,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
|
29734
29772
|
errno = EACCES /* restore errno if file exists */;
|
29735
29773
|
}
|
29736
29774
|
|
29737
|
-
/* Safeguard for https://
|
29775
|
+
/* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
|
29738
29776
|
#if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
|
29739
29777
|
if (*fd == STDIN_FILENO) {
|
29740
29778
|
WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "IN",
|
@@ -30091,10 +30129,15 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
|
30091
30129
|
return (int)GetLastError();
|
30092
30130
|
#else
|
30093
30131
|
#if defined(__linux__) || defined(__gnu_linux__)
|
30094
|
-
assert(linux_kernel_version > 0x02061300);
|
30095
30132
|
/* Since Linux 2.6.19, MS_ASYNC is in fact a no-op. The kernel properly
|
30096
|
-
* tracks dirty pages and flushes
|
30097
|
-
|
30133
|
+
* tracks dirty pages and flushes ones as necessary. */
|
30134
|
+
//
|
30135
|
+
// However, this behavior may be changed in custom kernels,
|
30136
|
+
// so just leave such optimization to the libc discretion.
|
30137
|
+
//
|
30138
|
+
// assert(linux_kernel_version > 0x02061300);
|
30139
|
+
// if (mode_bits == MDBX_SYNC_NONE)
|
30140
|
+
// return MDBX_SUCCESS;
|
30098
30141
|
#endif /* Linux */
|
30099
30142
|
if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
|
30100
30143
|
return errno;
|
@@ -30577,7 +30620,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
|
|
30577
30620
|
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
|
30578
30621
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30579
30622
|
* when this memory will re-used by malloc or another mmapping.
|
30580
|
-
* See https://
|
30623
|
+
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30581
30624
|
*/
|
30582
30625
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address,
|
30583
30626
|
(map->filesize && map->filesize < map->limit)
|
@@ -30656,7 +30699,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
|
|
30656
30699
|
|
30657
30700
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30658
30701
|
* when this memory will re-used by malloc or another mmapping.
|
30659
|
-
* See https://
|
30702
|
+
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30660
30703
|
*/
|
30661
30704
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
|
30662
30705
|
status = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
|
@@ -30937,7 +30980,7 @@ retry_mapview:;
|
|
30937
30980
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30938
30981
|
* when this memory will re-used by malloc or another mmapping.
|
30939
30982
|
* See
|
30940
|
-
* https://
|
30983
|
+
* https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30941
30984
|
*/
|
30942
30985
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(
|
30943
30986
|
map->address,
|
@@ -30959,7 +31002,7 @@ retry_mapview:;
|
|
30959
31002
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30960
31003
|
* when this memory will re-used by malloc or another mmapping.
|
30961
31004
|
* See
|
30962
|
-
* https://
|
31005
|
+
* https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30963
31006
|
*/
|
30964
31007
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(
|
30965
31008
|
map->address, (map->current < map->limit) ? map->current : map->limit);
|
@@ -31782,9 +31825,9 @@ __dll_export
|
|
31782
31825
|
0,
|
31783
31826
|
12,
|
31784
31827
|
2,
|
31785
|
-
|
31786
|
-
{"2022-11-
|
31787
|
-
"v0.12.2-
|
31828
|
+
18,
|
31829
|
+
{"2022-11-28T15:45:29+03:00", "9558651eb24ab172a73a7bc6149cadad4c4df990", "b3248442962cfdda728656d6d9085147a7d42b63",
|
31830
|
+
"v0.12.2-18-gb3248442"},
|
31788
31831
|
sourcery};
|
31789
31832
|
|
31790
31833
|
__dll_export
|