isomorfeus-ferret 0.15.0 → 0.16.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +170 -48
- data/ext/isomorfeus_ferret_ext/frb_search.c +1 -1
- data/ext/isomorfeus_ferret_ext/frb_store.c +231 -108
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +6 -12
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +114 -56
- data/ext/isomorfeus_ferret_ext/frt_store.h +0 -9
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +2 -2
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +656 -613
- data/ext/isomorfeus_ferret_ext/test.c +26 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_segments.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +2 -2
- data/ext/isomorfeus_ferret_ext/tests_all.h +0 -3
- data/lib/isomorfeus/ferret/index/index.rb +8 -9
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +4 -6
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +0 -479
- data/ext/isomorfeus_ferret_ext/test_fs_store.c +0 -25
@@ -12,7 +12,7 @@
|
|
12
12
|
* <http://www.OpenLDAP.org/license.html>. */
|
13
13
|
|
14
14
|
#define xMDBX_ALLOY 1
|
15
|
-
#define MDBX_BUILD_SOURCERY
|
15
|
+
#define MDBX_BUILD_SOURCERY 56f8a04f0668bb80d0d3f24fd2c9958d9aeb83004b65badfd5ccfa80647a2218_v0_12_2_18_gb3248442
|
16
16
|
#ifdef MDBX_CONFIG_H
|
17
17
|
#include MDBX_CONFIG_H
|
18
18
|
#endif
|
@@ -428,14 +428,13 @@ __extern_C key_t ftok(const char *, int);
|
|
428
428
|
/* Byteorder */
|
429
429
|
|
430
430
|
#if defined(i386) || defined(__386) || defined(__i386) || defined(__i386__) || \
|
431
|
-
defined(i486) || defined(__i486) || defined(__i486__) ||
|
432
|
-
defined(
|
433
|
-
defined(
|
434
|
-
defined(
|
435
|
-
defined(
|
436
|
-
defined(__x86_64) || defined(__x86_64__) || \
|
431
|
+
defined(i486) || defined(__i486) || defined(__i486__) || defined(i586) || \
|
432
|
+
defined(__i586) || defined(__i586__) || defined(i686) || \
|
433
|
+
defined(__i686) || defined(__i686__) || defined(_M_IX86) || \
|
434
|
+
defined(_X86_) || defined(__THW_INTEL__) || defined(__I86__) || \
|
435
|
+
defined(__INTEL__) || defined(__x86_64) || defined(__x86_64__) || \
|
437
436
|
defined(__amd64__) || defined(__amd64) || defined(_M_X64) || \
|
438
|
-
defined(_M_AMD64) || defined(__IA32__)
|
437
|
+
defined(_M_AMD64) || defined(__IA32__) || defined(__INTEL__)
|
439
438
|
#ifndef __ia32__
|
440
439
|
/* LY: define neutral __ia32__ for x86 and x86-64 */
|
441
440
|
#define __ia32__ 1
|
@@ -3138,13 +3137,9 @@ struct MDBX_txn {
|
|
3138
3137
|
/* Additional flag for sync_locked() */
|
3139
3138
|
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
3140
3139
|
|
3141
|
-
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
3142
|
-
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
3143
|
-
|
3144
3140
|
#define TXN_FLAGS \
|
3145
3141
|
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
3146
|
-
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID
|
3147
|
-
MDBX_TXN_FROZEN_RE)
|
3142
|
+
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
3148
3143
|
|
3149
3144
|
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
3150
3145
|
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
@@ -3226,11 +3221,16 @@ struct MDBX_txn {
|
|
3226
3221
|
MDBX_page *loose_pages;
|
3227
3222
|
/* Number of loose pages (tw.loose_pages) */
|
3228
3223
|
size_t loose_count;
|
3229
|
-
|
3230
|
-
|
3231
|
-
|
3232
|
-
|
3233
|
-
|
3224
|
+
union {
|
3225
|
+
struct {
|
3226
|
+
size_t least_removed;
|
3227
|
+
/* The sorted list of dirty pages we temporarily wrote to disk
|
3228
|
+
* because the dirty list was full. page numbers in here are
|
3229
|
+
* shifted left by 1, deleted slots have the LSB set. */
|
3230
|
+
MDBX_PNL list;
|
3231
|
+
} spilled;
|
3232
|
+
size_t writemap_dirty_npages;
|
3233
|
+
};
|
3234
3234
|
} tw;
|
3235
3235
|
};
|
3236
3236
|
};
|
@@ -3280,6 +3280,9 @@ struct MDBX_cursor {
|
|
3280
3280
|
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
3281
3281
|
#define C_DEL 0x08 /* last op was a cursor_del */
|
3282
3282
|
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
3283
|
+
#define C_GCU \
|
3284
|
+
0x20 /* Происходит подготовка к обновлению GC, поэтому \
|
3285
|
+
* можно брать страницы из GC даже для FREE_DBI */
|
3283
3286
|
uint8_t mc_flags;
|
3284
3287
|
|
3285
3288
|
/* Cursor checking flags. */
|
@@ -4643,7 +4646,7 @@ __cold static const char *pagetype_caption(const uint8_t type,
|
|
4643
4646
|
}
|
4644
4647
|
}
|
4645
4648
|
|
4646
|
-
__cold static
|
4649
|
+
__cold static int MDBX_PRINTF_ARGS(2, 3)
|
4647
4650
|
bad_page(const MDBX_page *mp, const char *fmt, ...) {
|
4648
4651
|
if (LOG_ENABLED(MDBX_LOG_ERROR)) {
|
4649
4652
|
static const MDBX_page *prev;
|
@@ -5257,7 +5260,7 @@ __cold void thread_dtor(void *rthc) {
|
|
5257
5260
|
if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) {
|
5258
5261
|
TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(),
|
5259
5262
|
__Wpedantic_format_voidptr(reader));
|
5260
|
-
atomic_cas32(&reader->mr_pid, self_pid, 0);
|
5263
|
+
(void)atomic_cas32(&reader->mr_pid, self_pid, 0);
|
5261
5264
|
}
|
5262
5265
|
}
|
5263
5266
|
|
@@ -6346,50 +6349,51 @@ __hot static size_t pnl_merge(MDBX_PNL dst, const MDBX_PNL src) {
|
|
6346
6349
|
return total;
|
6347
6350
|
}
|
6348
6351
|
|
6349
|
-
static void spill_remove(MDBX_txn *txn, size_t idx,
|
6350
|
-
tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.
|
6351
|
-
txn->tw.
|
6352
|
-
txn->tw.
|
6353
|
-
|
6354
|
-
|
6355
|
-
|
6356
|
-
|
6357
|
-
|
6352
|
+
static void spill_remove(MDBX_txn *txn, size_t idx, size_t npages) {
|
6353
|
+
tASSERT(txn, idx > 0 && idx <= MDBX_PNL_GETSIZE(txn->tw.spilled.list) &&
|
6354
|
+
txn->tw.spilled.least_removed > 0);
|
6355
|
+
txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
|
6356
|
+
? idx
|
6357
|
+
: txn->tw.spilled.least_removed;
|
6358
|
+
txn->tw.spilled.list[idx] |= 1;
|
6359
|
+
MDBX_PNL_SETSIZE(txn->tw.spilled.list,
|
6360
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
|
6361
|
+
(idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
|
6358
6362
|
|
6359
6363
|
while (unlikely(npages > 1)) {
|
6360
|
-
const pgno_t pgno = (txn->tw.
|
6364
|
+
const pgno_t pgno = (txn->tw.spilled.list[idx] >> 1) + 1;
|
6361
6365
|
if (MDBX_PNL_ASCENDING) {
|
6362
|
-
if (++idx > MDBX_PNL_GETSIZE(txn->tw.
|
6363
|
-
(txn->tw.
|
6366
|
+
if (++idx > MDBX_PNL_GETSIZE(txn->tw.spilled.list) ||
|
6367
|
+
(txn->tw.spilled.list[idx] >> 1) != pgno)
|
6364
6368
|
return;
|
6365
6369
|
} else {
|
6366
|
-
if (--idx < 1 || (txn->tw.
|
6370
|
+
if (--idx < 1 || (txn->tw.spilled.list[idx] >> 1) != pgno)
|
6367
6371
|
return;
|
6368
|
-
txn->tw.
|
6369
|
-
|
6370
|
-
|
6371
|
-
}
|
6372
|
-
txn->tw.
|
6373
|
-
MDBX_PNL_SETSIZE(txn->tw.
|
6374
|
-
MDBX_PNL_GETSIZE(txn->tw.
|
6375
|
-
(idx == MDBX_PNL_GETSIZE(txn->tw.
|
6372
|
+
txn->tw.spilled.least_removed = (idx < txn->tw.spilled.least_removed)
|
6373
|
+
? idx
|
6374
|
+
: txn->tw.spilled.least_removed;
|
6375
|
+
}
|
6376
|
+
txn->tw.spilled.list[idx] |= 1;
|
6377
|
+
MDBX_PNL_SETSIZE(txn->tw.spilled.list,
|
6378
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) -
|
6379
|
+
(idx == MDBX_PNL_GETSIZE(txn->tw.spilled.list)));
|
6376
6380
|
--npages;
|
6377
6381
|
}
|
6378
6382
|
}
|
6379
6383
|
|
6380
6384
|
static MDBX_PNL spill_purge(MDBX_txn *txn) {
|
6381
|
-
tASSERT(txn, txn->tw.
|
6382
|
-
const MDBX_PNL sl = txn->tw.
|
6383
|
-
if (txn->tw.
|
6385
|
+
tASSERT(txn, txn->tw.spilled.least_removed > 0);
|
6386
|
+
const MDBX_PNL sl = txn->tw.spilled.list;
|
6387
|
+
if (txn->tw.spilled.least_removed != INT_MAX) {
|
6384
6388
|
size_t len = MDBX_PNL_GETSIZE(sl), r, w;
|
6385
|
-
for (w = r = txn->tw.
|
6389
|
+
for (w = r = txn->tw.spilled.least_removed; r <= len; ++r) {
|
6386
6390
|
sl[w] = sl[r];
|
6387
6391
|
w += 1 - (sl[r] & 1);
|
6388
6392
|
}
|
6389
6393
|
for (size_t i = 1; i < w; ++i)
|
6390
6394
|
tASSERT(txn, (sl[i] & 1) == 0);
|
6391
6395
|
MDBX_PNL_SETSIZE(sl, w - 1);
|
6392
|
-
txn->tw.
|
6396
|
+
txn->tw.spilled.least_removed = INT_MAX;
|
6393
6397
|
} else {
|
6394
6398
|
for (size_t i = 1; i <= MDBX_PNL_GETSIZE(sl); ++i)
|
6395
6399
|
tASSERT(txn, (sl[i] & 1) == 0);
|
@@ -6445,7 +6449,8 @@ static __inline size_t pnl_search(const MDBX_PNL pnl, pgno_t pgno,
|
|
6445
6449
|
}
|
6446
6450
|
|
6447
6451
|
static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
|
6448
|
-
|
6452
|
+
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6453
|
+
const MDBX_PNL pnl = txn->tw.spilled.list;
|
6449
6454
|
if (likely(!pnl))
|
6450
6455
|
return 0;
|
6451
6456
|
pgno <<= 1;
|
@@ -6454,8 +6459,8 @@ static __inline size_t search_spilled(const MDBX_txn *txn, pgno_t pgno) {
|
|
6454
6459
|
}
|
6455
6460
|
|
6456
6461
|
static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
|
6457
|
-
|
6458
|
-
const MDBX_PNL pnl = txn->tw.
|
6462
|
+
size_t npages) {
|
6463
|
+
const MDBX_PNL pnl = txn->tw.spilled.list;
|
6459
6464
|
if (likely(!pnl))
|
6460
6465
|
return false;
|
6461
6466
|
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
@@ -6467,7 +6472,7 @@ static __inline bool intersect_spilled(const MDBX_txn *txn, pgno_t pgno,
|
|
6467
6472
|
DEBUG_EXTRA_PRINT("%s\n", "]");
|
6468
6473
|
}
|
6469
6474
|
const pgno_t spilled_range_begin = pgno << 1;
|
6470
|
-
const pgno_t spilled_range_last = ((pgno + npages) << 1) - 1;
|
6475
|
+
const pgno_t spilled_range_last = ((pgno + (pgno_t)npages) << 1) - 1;
|
6471
6476
|
#if MDBX_PNL_ASCENDING
|
6472
6477
|
const size_t n =
|
6473
6478
|
pnl_search(pnl, spilled_range_begin, (size_t)(MAX_PAGENO + 1) << 1);
|
@@ -6831,7 +6836,7 @@ dpl_endpgno(const MDBX_dpl *dl, size_t i) {
|
|
6831
6836
|
}
|
6832
6837
|
|
6833
6838
|
static __inline bool dpl_intersect(const MDBX_txn *txn, pgno_t pgno,
|
6834
|
-
|
6839
|
+
size_t npages) {
|
6835
6840
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
6836
6841
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6837
6842
|
|
@@ -6889,7 +6894,7 @@ MDBX_MAYBE_UNUSED static const MDBX_page *debug_dpl_find(const MDBX_txn *txn,
|
|
6889
6894
|
return nullptr;
|
6890
6895
|
}
|
6891
6896
|
|
6892
|
-
static void dpl_remove_ex(const MDBX_txn *txn, size_t i,
|
6897
|
+
static void dpl_remove_ex(const MDBX_txn *txn, size_t i, size_t npages) {
|
6893
6898
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
6894
6899
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6895
6900
|
|
@@ -6911,7 +6916,7 @@ static void dpl_remove(const MDBX_txn *txn, size_t i) {
|
|
6911
6916
|
static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
|
6912
6917
|
pgno_t pgno,
|
6913
6918
|
MDBX_page *page,
|
6914
|
-
|
6919
|
+
size_t npages) {
|
6915
6920
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
6916
6921
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
6917
6922
|
MDBX_dpl *dl = txn->tw.dirtylist;
|
@@ -6980,7 +6985,7 @@ static __must_check_result __inline int page_retire(MDBX_cursor *mc,
|
|
6980
6985
|
MDBX_page *mp);
|
6981
6986
|
|
6982
6987
|
static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
6983
|
-
|
6988
|
+
size_t npages);
|
6984
6989
|
typedef struct page_result {
|
6985
6990
|
MDBX_page *page;
|
6986
6991
|
int err;
|
@@ -6989,7 +6994,7 @@ typedef struct page_result {
|
|
6989
6994
|
static txnid_t kick_longlived_readers(MDBX_env *env, const txnid_t laggard);
|
6990
6995
|
|
6991
6996
|
static pgr_t page_new(MDBX_cursor *mc, const unsigned flags);
|
6992
|
-
static pgr_t page_new_large(MDBX_cursor *mc, const
|
6997
|
+
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages);
|
6993
6998
|
static int page_touch(MDBX_cursor *mc);
|
6994
6999
|
static int cursor_touch(MDBX_cursor *mc);
|
6995
7000
|
static int touch_dbi(MDBX_cursor *mc);
|
@@ -7588,7 +7593,7 @@ static MDBX_page *page_malloc(MDBX_txn *txn, size_t num) {
|
|
7588
7593
|
}
|
7589
7594
|
|
7590
7595
|
/* Free a shadow dirty page */
|
7591
|
-
static void dpage_free(MDBX_env *env, MDBX_page *dp,
|
7596
|
+
static void dpage_free(MDBX_env *env, MDBX_page *dp, size_t npages) {
|
7592
7597
|
VALGRIND_MAKE_MEM_UNDEFINED(dp, pgno2bytes(env, npages));
|
7593
7598
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(dp, pgno2bytes(env, npages));
|
7594
7599
|
if (unlikely(env->me_flags & MDBX_PAGEPERTURB))
|
@@ -7910,7 +7915,7 @@ static bool txn_refund(MDBX_txn *txn) {
|
|
7910
7915
|
if (before == txn->mt_next_pgno)
|
7911
7916
|
return false;
|
7912
7917
|
|
7913
|
-
if (txn->tw.
|
7918
|
+
if (txn->tw.spilled.list)
|
7914
7919
|
/* Squash deleted pagenums if we refunded any */
|
7915
7920
|
spill_purge(txn);
|
7916
7921
|
|
@@ -7925,9 +7930,9 @@ static __inline bool txn_refund(MDBX_txn *txn) {
|
|
7925
7930
|
#endif /* MDBX_ENABLE_REFUND */
|
7926
7931
|
|
7927
7932
|
__cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
|
7928
|
-
|
7933
|
+
size_t npages) {
|
7929
7934
|
MDBX_env *const env = txn->mt_env;
|
7930
|
-
DEBUG("kill %
|
7935
|
+
DEBUG("kill %zu page(s) %" PRIaPGNO, npages, pgno);
|
7931
7936
|
eASSERT(env, pgno >= NUM_METAS && npages);
|
7932
7937
|
if (!IS_FROZEN(txn, mp)) {
|
7933
7938
|
const size_t bytes = pgno2bytes(env, npages);
|
@@ -7954,7 +7959,7 @@ __cold static void kill_page(MDBX_txn *txn, MDBX_page *mp, pgno_t pgno,
|
|
7954
7959
|
|
7955
7960
|
/* Remove page from dirty list */
|
7956
7961
|
static __inline void page_wash(MDBX_txn *txn, const size_t di,
|
7957
|
-
MDBX_page *const mp, const
|
7962
|
+
MDBX_page *const mp, const size_t npages) {
|
7958
7963
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
7959
7964
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
7960
7965
|
tASSERT(txn, di && di <= txn->tw.dirtylist->length &&
|
@@ -8003,7 +8008,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
|
|
8003
8008
|
* So for flexibility and avoid extra internal dependencies we just
|
8004
8009
|
* fallback to reading if dirty list was not allocated yet. */
|
8005
8010
|
size_t di = 0, si = 0;
|
8006
|
-
|
8011
|
+
size_t npages = 1;
|
8007
8012
|
bool is_frozen = false, is_spilled = false, is_shadowed = false;
|
8008
8013
|
if (unlikely(!mp)) {
|
8009
8014
|
if (ASSERT_ENABLED() && pageflags) {
|
@@ -8019,7 +8024,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
|
|
8019
8024
|
is_frozen = true;
|
8020
8025
|
if (ASSERT_ENABLED()) {
|
8021
8026
|
for (MDBX_txn *scan = txn; scan; scan = scan->mt_parent) {
|
8022
|
-
tASSERT(txn, !search_spilled(scan, pgno));
|
8027
|
+
tASSERT(txn, !txn->tw.spilled.list || !search_spilled(scan, pgno));
|
8023
8028
|
tASSERT(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno));
|
8024
8029
|
}
|
8025
8030
|
}
|
@@ -8064,7 +8069,7 @@ static int page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
|
|
8064
8069
|
is_shadowed = IS_SHADOWED(txn, mp);
|
8065
8070
|
if (is_dirty) {
|
8066
8071
|
tASSERT(txn, !is_spilled);
|
8067
|
-
tASSERT(txn, !search_spilled(txn, pgno));
|
8072
|
+
tASSERT(txn, !txn->tw.spilled.list || !search_spilled(txn, pgno));
|
8068
8073
|
tASSERT(txn, debug_dpl_find(txn, pgno) == mp || txn->mt_parent ||
|
8069
8074
|
(txn->mt_flags & MDBX_WRITEMAP));
|
8070
8075
|
} else {
|
@@ -8098,12 +8103,12 @@ status_done:
|
|
8098
8103
|
} else {
|
8099
8104
|
npages = mp->mp_pages;
|
8100
8105
|
cASSERT(mc, mc->mc_db->md_overflow_pages >= npages);
|
8101
|
-
mc->mc_db->md_overflow_pages -= npages;
|
8106
|
+
mc->mc_db->md_overflow_pages -= (pgno_t)npages;
|
8102
8107
|
}
|
8103
8108
|
|
8104
8109
|
if (is_frozen) {
|
8105
8110
|
retire:
|
8106
|
-
DEBUG("retire %
|
8111
|
+
DEBUG("retire %zu page %" PRIaPGNO, npages, pgno);
|
8107
8112
|
rc = pnl_append_range(false, &txn->tw.retired_pages, pgno, npages);
|
8108
8113
|
tASSERT(txn, dirtylist_check(txn));
|
8109
8114
|
return rc;
|
@@ -8154,7 +8159,7 @@ status_done:
|
|
8154
8159
|
}
|
8155
8160
|
tASSERT(txn, is_spilled || is_shadowed || (mp && IS_SHADOWED(txn, mp)));
|
8156
8161
|
}
|
8157
|
-
DEBUG("refunded %
|
8162
|
+
DEBUG("refunded %zu %s page %" PRIaPGNO, npages, kind, pgno);
|
8158
8163
|
txn->mt_next_pgno = pgno;
|
8159
8164
|
txn_refund(txn);
|
8160
8165
|
return MDBX_SUCCESS;
|
@@ -8223,7 +8228,7 @@ status_done:
|
|
8223
8228
|
page_wash(txn, di, mp, npages);
|
8224
8229
|
|
8225
8230
|
reclaim:
|
8226
|
-
DEBUG("reclaim %
|
8231
|
+
DEBUG("reclaim %zu %s page %" PRIaPGNO, npages, "dirty", pgno);
|
8227
8232
|
rc = pnl_insert_range(&txn->tw.relist, pgno, npages);
|
8228
8233
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
8229
8234
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
@@ -8330,7 +8335,7 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
|
|
8330
8335
|
osal_flush_incoherent_mmap(env->me_map + offset, bytes, env->me_os_psize);
|
8331
8336
|
const MDBX_page *const rp = (const MDBX_page *)(env->me_map + offset);
|
8332
8337
|
/* check with timeout as the workaround
|
8333
|
-
* for https://
|
8338
|
+
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
8334
8339
|
if (unlikely(memcmp(wp, rp, bytes))) {
|
8335
8340
|
ctx->coherency_timestamp = 0;
|
8336
8341
|
WARNING("catch delayed/non-arrived page %" PRIaPGNO " %s", wp->mp_pgno,
|
@@ -8351,11 +8356,12 @@ static void iov_callback4dirtypages(iov_ctx_t *ctx, size_t offset, void *data,
|
|
8351
8356
|
do {
|
8352
8357
|
eASSERT(env, wp->mp_pgno == bytes2pgno(env, offset));
|
8353
8358
|
eASSERT(env, (wp->mp_flags & P_ILL_BITS) == 0);
|
8354
|
-
|
8359
|
+
size_t npages = IS_OVERFLOW(wp) ? wp->mp_pages : 1u;
|
8355
8360
|
size_t chunk = pgno2bytes(env, npages);
|
8356
8361
|
eASSERT(env, bytes >= chunk);
|
8362
|
+
MDBX_page *next = (MDBX_page *)((char *)wp + chunk);
|
8357
8363
|
dpage_free(env, wp, npages);
|
8358
|
-
wp =
|
8364
|
+
wp = next;
|
8359
8365
|
offset += chunk;
|
8360
8366
|
bytes -= chunk;
|
8361
8367
|
} while (bytes);
|
@@ -8384,7 +8390,7 @@ __must_check_result static int iov_write(iov_ctx_t *ctx) {
|
|
8384
8390
|
}
|
8385
8391
|
|
8386
8392
|
__must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
|
8387
|
-
MDBX_page *dp,
|
8393
|
+
MDBX_page *dp, size_t npages) {
|
8388
8394
|
MDBX_env *const env = txn->mt_env;
|
8389
8395
|
tASSERT(txn, ctx->err == MDBX_SUCCESS);
|
8390
8396
|
tASSERT(txn, dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno);
|
@@ -8428,16 +8434,16 @@ __must_check_result static int iov_page(MDBX_txn *txn, iov_ctx_t *ctx,
|
|
8428
8434
|
#if MDBX_NEED_WRITTEN_RANGE
|
8429
8435
|
ctx->flush_begin =
|
8430
8436
|
(ctx->flush_begin < dp->mp_pgno) ? ctx->flush_begin : dp->mp_pgno;
|
8431
|
-
ctx->flush_end = (ctx->flush_end > dp->mp_pgno + npages)
|
8437
|
+
ctx->flush_end = (ctx->flush_end > dp->mp_pgno + (pgno_t)npages)
|
8432
8438
|
? ctx->flush_end
|
8433
|
-
: dp->mp_pgno + npages;
|
8439
|
+
: dp->mp_pgno + (pgno_t)npages;
|
8434
8440
|
#endif /* MDBX_NEED_WRITTEN_RANGE */
|
8435
8441
|
env->me_lck->mti_unsynced_pages.weak += npages;
|
8436
8442
|
return MDBX_SUCCESS;
|
8437
8443
|
}
|
8438
8444
|
|
8439
8445
|
static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
8440
|
-
const
|
8446
|
+
const size_t npages) {
|
8441
8447
|
tASSERT(txn, !(txn->mt_flags & MDBX_WRITEMAP) || MDBX_AVOID_MSYNC);
|
8442
8448
|
#if MDBX_ENABLE_PGOP_STAT
|
8443
8449
|
txn->mt_env->me_lck->mti_pgop_stat.spill.weak += npages;
|
@@ -8446,7 +8452,7 @@ static int spill_page(MDBX_txn *txn, iov_ctx_t *ctx, MDBX_page *dp,
|
|
8446
8452
|
int err = iov_page(txn, ctx, dp, npages);
|
8447
8453
|
if (likely(err == MDBX_SUCCESS) &&
|
8448
8454
|
(!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)))
|
8449
|
-
err = pnl_append_range(true, &txn->tw.
|
8455
|
+
err = pnl_append_range(true, &txn->tw.spilled.list, pgno << 1, npages);
|
8450
8456
|
return err;
|
8451
8457
|
}
|
8452
8458
|
|
@@ -8496,16 +8502,16 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8496
8502
|
const uint32_t reciprocal) {
|
8497
8503
|
MDBX_dpl *const dl = txn->tw.dirtylist;
|
8498
8504
|
const uint32_t age = dpl_age(txn, i);
|
8499
|
-
const
|
8505
|
+
const size_t npages = dpl_npages(dl, i);
|
8500
8506
|
const pgno_t pgno = dl->items[i].pgno;
|
8501
8507
|
if (age == 0) {
|
8502
|
-
DEBUG("skip %s %
|
8508
|
+
DEBUG("skip %s %zu page %" PRIaPGNO, "keep", npages, pgno);
|
8503
8509
|
return 256;
|
8504
8510
|
}
|
8505
8511
|
|
8506
8512
|
MDBX_page *const dp = dl->items[i].ptr;
|
8507
8513
|
if (dp->mp_flags & (P_LOOSE | P_SPILLED)) {
|
8508
|
-
DEBUG("skip %s %
|
8514
|
+
DEBUG("skip %s %zu page %" PRIaPGNO,
|
8509
8515
|
(dp->mp_flags & P_LOOSE) ? "loose"
|
8510
8516
|
: (dp->mp_flags & P_LOOSE) ? "loose"
|
8511
8517
|
: "parent-spilled",
|
@@ -8519,7 +8525,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8519
8525
|
if (parent && (parent->mt_flags & MDBX_TXN_SPILLS)) {
|
8520
8526
|
do
|
8521
8527
|
if (intersect_spilled(parent, pgno, npages)) {
|
8522
|
-
DEBUG("skip-2 parent-spilled %
|
8528
|
+
DEBUG("skip-2 parent-spilled %zu page %" PRIaPGNO, npages, pgno);
|
8523
8529
|
dp->mp_flags |= P_SPILLED;
|
8524
8530
|
return 256;
|
8525
8531
|
}
|
@@ -8533,7 +8539,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8533
8539
|
return prio = 256 - prio;
|
8534
8540
|
|
8535
8541
|
/* make a large/overflow pages be likely to spill */
|
8536
|
-
|
8542
|
+
size_t factor = npages | npages >> 1;
|
8537
8543
|
factor |= factor >> 2;
|
8538
8544
|
factor |= factor >> 4;
|
8539
8545
|
factor |= factor >> 8;
|
@@ -8541,7 +8547,7 @@ static unsigned spill_prio(const MDBX_txn *txn, const size_t i,
|
|
8541
8547
|
factor = prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
|
8542
8548
|
factor = (factor < 256) ? 255 - factor : 0;
|
8543
8549
|
tASSERT(txn, factor < 256 && factor < (256 - prio));
|
8544
|
-
return prio = factor;
|
8550
|
+
return prio = (unsigned)factor;
|
8545
8551
|
}
|
8546
8552
|
|
8547
8553
|
/* Spill pages from the dirty list back to disk.
|
@@ -8645,7 +8651,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8645
8651
|
if (txn->mt_flags & MDBX_WRITEMAP) {
|
8646
8652
|
NOTICE("%s-spilling %zu dirty-entries, %zu dirty-npages", "msync",
|
8647
8653
|
dirty_entries, dirty_npages);
|
8648
|
-
tASSERT(txn, txn->tw.
|
8654
|
+
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
8649
8655
|
const MDBX_env *env = txn->mt_env;
|
8650
8656
|
rc =
|
8651
8657
|
osal_msync(&txn->mt_env->me_dxb_mmap, 0,
|
@@ -8669,10 +8675,10 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8669
8675
|
tASSERT(txn, txn->tw.dirtylist->pages_including_loose - txn->tw.loose_count >=
|
8670
8676
|
need_spill_npages);
|
8671
8677
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
8672
|
-
if (!txn->tw.
|
8673
|
-
txn->tw.
|
8674
|
-
txn->tw.
|
8675
|
-
if (unlikely(!txn->tw.
|
8678
|
+
if (!txn->tw.spilled.list) {
|
8679
|
+
txn->tw.spilled.least_removed = INT_MAX;
|
8680
|
+
txn->tw.spilled.list = pnl_alloc(need_spill);
|
8681
|
+
if (unlikely(!txn->tw.spilled.list)) {
|
8676
8682
|
rc = MDBX_ENOMEM;
|
8677
8683
|
bailout:
|
8678
8684
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
@@ -8681,7 +8687,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8681
8687
|
} else {
|
8682
8688
|
/* purge deleted slots */
|
8683
8689
|
spill_purge(txn);
|
8684
|
-
rc = pnl_reserve(&txn->tw.
|
8690
|
+
rc = pnl_reserve(&txn->tw.spilled.list, need_spill);
|
8685
8691
|
(void)rc /* ignore since the resulting list may be shorter
|
8686
8692
|
and pnl_append() will increase pnl on demand */
|
8687
8693
|
;
|
@@ -8865,7 +8871,7 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|
8865
8871
|
goto bailout;
|
8866
8872
|
|
8867
8873
|
if (!MDBX_AVOID_MSYNC || !(txn->mt_flags & MDBX_WRITEMAP)) {
|
8868
|
-
pnl_sort(txn->tw.
|
8874
|
+
pnl_sort(txn->tw.spilled.list, (size_t)txn->mt_next_pgno << 1);
|
8869
8875
|
txn->mt_flags |= MDBX_TXN_SPILLS;
|
8870
8876
|
}
|
8871
8877
|
NOTICE("spilled %u dirty-entries, %u dirty-npages, now have %zu dirty-room",
|
@@ -9279,6 +9285,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) {
|
|
9279
9285
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
9280
9286
|
if (unlikely(lck == NULL /* exclusive without-lck mode */)) {
|
9281
9287
|
eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub);
|
9288
|
+
env->me_lck->mti_readers_refresh_flag.weak = nothing_changed;
|
9282
9289
|
return env->me_lck->mti_oldest_reader.weak = steady;
|
9283
9290
|
}
|
9284
9291
|
|
@@ -9367,10 +9374,13 @@ __cold static pgno_t find_largest_snapshot(const MDBX_env *env,
|
|
9367
9374
|
|
9368
9375
|
/* Add a page to the txn's dirty list */
|
9369
9376
|
__hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
9370
|
-
|
9377
|
+
size_t npages) {
|
9371
9378
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
9379
|
+
mp->mp_txnid = txn->mt_front;
|
9372
9380
|
if (!txn->tw.dirtylist) {
|
9373
9381
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
9382
|
+
txn->tw.writemap_dirty_npages += npages;
|
9383
|
+
tASSERT(txn, txn->tw.spilled.list == nullptr);
|
9374
9384
|
return MDBX_SUCCESS;
|
9375
9385
|
}
|
9376
9386
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
@@ -9383,7 +9393,6 @@ __hot static int __must_check_result page_dirty(MDBX_txn *txn, MDBX_page *mp,
|
|
9383
9393
|
#endif /* xMDBX_DEBUG_SPILLING == 2 */
|
9384
9394
|
|
9385
9395
|
int rc;
|
9386
|
-
mp->mp_txnid = txn->mt_front;
|
9387
9396
|
if (unlikely(txn->tw.dirtyroom == 0)) {
|
9388
9397
|
if (txn->tw.loose_count) {
|
9389
9398
|
MDBX_page *loose = txn->tw.loose_pages;
|
@@ -10093,6 +10102,8 @@ MDBX_MAYBE_UNUSED static __always_inline size_t __builtin_clzl(size_t value) {
|
|
10093
10102
|
}
|
10094
10103
|
#endif /* _MSC_VER */
|
10095
10104
|
|
10105
|
+
#if !MDBX_PNL_ASCENDING
|
10106
|
+
|
10096
10107
|
#if !defined(MDBX_ATTRIBUTE_TARGET) && \
|
10097
10108
|
(__has_attribute(__target__) || __GNUC_PREREQ(5, 0))
|
10098
10109
|
#define MDBX_ATTRIBUTE_TARGET(target) __attribute__((__target__(target)))
|
@@ -10406,6 +10417,8 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len,
|
|
10406
10417
|
/* Choosing of another variants should be added here. */
|
10407
10418
|
#endif /* scan4seq_default */
|
10408
10419
|
|
10420
|
+
#endif /* MDBX_PNL_ASCENDING */
|
10421
|
+
|
10409
10422
|
#ifndef scan4seq_default
|
10410
10423
|
#define scan4seq_default scan4seq_fallback
|
10411
10424
|
#endif /* scan4seq_default */
|
@@ -10469,45 +10482,39 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len,
|
|
10469
10482
|
*
|
10470
10483
|
* Returns 0 on success, non-zero on failure.*/
|
10471
10484
|
|
10472
|
-
#define
|
10473
|
-
#define
|
10474
|
-
#define
|
10475
|
-
#define
|
10476
|
-
#define
|
10477
|
-
#define
|
10478
|
-
#define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
10479
|
-
#define MDBX_ALLOC_LIFO 128
|
10485
|
+
#define MDBX_ALLOC_DEFAULT 0
|
10486
|
+
#define MDBX_ALLOC_RESERVE 1
|
10487
|
+
#define MDBX_ALLOC_UNIMPORTANT 2
|
10488
|
+
#define MDBX_ALLOC_COALESCE 4 /* внутреннее состояние */
|
10489
|
+
#define MDBX_ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
|
10490
|
+
#define MDBX_ALLOC_LIFO 16 /* внутреннее состояние */
|
10480
10491
|
|
10481
|
-
static __inline bool is_gc_usable(
|
10492
|
+
static __inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc,
|
10493
|
+
const uint8_t flags) {
|
10482
10494
|
/* If txn is updating the GC, then the retired-list cannot play catch-up with
|
10483
10495
|
* itself by growing while trying to save it. */
|
10484
|
-
if (
|
10496
|
+
if (mc->mc_dbi == FREE_DBI && !(flags & MDBX_ALLOC_RESERVE) &&
|
10497
|
+
!(mc->mc_flags & C_GCU))
|
10485
10498
|
return false;
|
10486
10499
|
|
10487
10500
|
/* avoid (recursive) search inside empty tree and while tree is
|
10488
|
-
updating, https://
|
10501
|
+
updating, https://libmdbx.dqdkfa.ru/dead-github/issues/31 */
|
10489
10502
|
if (txn->mt_dbs[FREE_DBI].md_entries == 0)
|
10490
10503
|
return false;
|
10491
10504
|
|
10492
|
-
/* If our dirty list is already full, we can't touch GC */
|
10493
|
-
if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
|
10494
|
-
!(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY))
|
10495
|
-
return false;
|
10496
|
-
|
10497
10505
|
return true;
|
10498
10506
|
}
|
10499
10507
|
|
10500
|
-
static
|
10501
|
-
|
10502
|
-
|
10503
|
-
|
10504
|
-
|
10505
|
-
|
10506
|
-
return cursor_init(mc, txn, FREE_DBI);
|
10508
|
+
__hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) {
|
10509
|
+
const size_t len = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed);
|
10510
|
+
for (size_t i = 1; i <= len; ++i)
|
10511
|
+
if (txn->tw.lifo_reclaimed[i] == id)
|
10512
|
+
return true;
|
10513
|
+
return false;
|
10507
10514
|
}
|
10508
10515
|
|
10509
10516
|
static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
10510
|
-
|
10517
|
+
uint8_t flags) {
|
10511
10518
|
#if MDBX_ENABLE_PROFGC
|
10512
10519
|
const uint64_t monotime_before = osal_monotime();
|
10513
10520
|
size_t majflt_before;
|
@@ -10525,21 +10532,13 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|
10525
10532
|
prof->spe_counter += 1;
|
10526
10533
|
#endif /* MDBX_ENABLE_PROFGC */
|
10527
10534
|
|
10528
|
-
eASSERT(env, num
|
10529
|
-
eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
|
10530
|
-
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
|
10531
|
-
MDBX_ALLOC_BACKLOG)) == 0 ||
|
10532
|
-
(flags & MDBX_ALLOC_GC));
|
10533
|
-
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
|
10534
|
-
MDBX_ALLOC_BACKLOG)) == 0 ||
|
10535
|
-
(flags & MDBX_ALLOC_NEW) == 0);
|
10535
|
+
eASSERT(env, num > 0 || (flags & MDBX_ALLOC_RESERVE));
|
10536
10536
|
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
10537
10537
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
10538
10538
|
|
10539
10539
|
pgno_t pgno = 0, *range = nullptr;
|
10540
|
-
size_t re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
10540
|
+
size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
10541
10541
|
if (num > 1) {
|
10542
|
-
eASSERT(env, !(flags & MDBX_ALLOC_SLOT));
|
10543
10542
|
#if MDBX_ENABLE_PROFGC
|
10544
10543
|
prof->xpages += 1;
|
10545
10544
|
#endif /* MDBX_ENABLE_PROFGC */
|
@@ -10555,347 +10554,363 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|
10555
10554
|
}
|
10556
10555
|
}
|
10557
10556
|
} else {
|
10558
|
-
eASSERT(env,
|
10559
|
-
MDBX_PNL_GETSIZE(txn->tw.relist) == 0);
|
10557
|
+
eASSERT(env, num == 0 || re_len == 0);
|
10560
10558
|
}
|
10561
10559
|
|
10562
10560
|
//---------------------------------------------------------------------------
|
10563
10561
|
|
10564
|
-
if (
|
10565
|
-
|
10566
|
-
goto no_gc;
|
10562
|
+
if (unlikely(!is_gc_usable(txn, mc, flags)))
|
10563
|
+
goto no_gc;
|
10567
10564
|
|
10568
|
-
|
10569
|
-
|
10565
|
+
eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
|
10566
|
+
MDBX_ALLOC_SHOULD_SCAN)) == 0);
|
10567
|
+
flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
|
10570
10568
|
|
10571
|
-
|
10569
|
+
if (/* Не коагулируем записи при подготовке резерва для обновления GC.
|
10570
|
+
* Иначе попытка увеличить резерв может приводить к необходимости ещё
|
10571
|
+
* большего резерва из-за увеличения списка переработанных страниц. */
|
10572
|
+
(flags & MDBX_ALLOC_RESERVE) == 0) {
|
10572
10573
|
if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
|
10573
|
-
|
10574
|
+
re_len < env->me_maxgc_ov1page / 2)
|
10574
10575
|
flags += MDBX_ALLOC_COALESCE;
|
10576
|
+
}
|
10575
10577
|
|
10576
|
-
|
10577
|
-
|
10578
|
-
|
10579
|
-
|
10578
|
+
MDBX_cursor *const gc =
|
10579
|
+
(MDBX_cursor *)((char *)env->me_txn0 + sizeof(MDBX_txn));
|
10580
|
+
gc->mc_txn = txn;
|
10581
|
+
gc->mc_flags = 0;
|
10580
10582
|
|
10581
|
-
|
10582
|
-
|
10583
|
-
|
10584
|
-
|
10585
|
-
|
10586
|
-
|
10587
|
-
|
10588
|
-
|
10589
|
-
|
10590
|
-
|
10591
|
-
|
10592
|
-
goto fail;
|
10593
|
-
}
|
10594
|
-
const txnid_t detent = oldest + 1;
|
10583
|
+
retry_gc_refresh_oldest:;
|
10584
|
+
txnid_t oldest = txn_oldest_reader(txn);
|
10585
|
+
retry_gc_have_oldest:
|
10586
|
+
if (unlikely(oldest >= txn->mt_txnid)) {
|
10587
|
+
ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
|
10588
|
+
" for current-txnid %" PRIaTXN,
|
10589
|
+
oldest, txn->mt_txnid);
|
10590
|
+
ret.err = MDBX_PROBLEM;
|
10591
|
+
goto fail;
|
10592
|
+
}
|
10593
|
+
const txnid_t detent = oldest + 1;
|
10595
10594
|
|
10596
|
-
|
10597
|
-
|
10598
|
-
|
10599
|
-
if (
|
10600
|
-
|
10601
|
-
|
10602
|
-
|
10603
|
-
|
10604
|
-
goto fail;
|
10605
|
-
}
|
10595
|
+
txnid_t id = 0;
|
10596
|
+
MDBX_cursor_op op = MDBX_FIRST;
|
10597
|
+
if (flags & MDBX_ALLOC_LIFO) {
|
10598
|
+
if (!txn->tw.lifo_reclaimed) {
|
10599
|
+
txn->tw.lifo_reclaimed = txl_alloc();
|
10600
|
+
if (unlikely(!txn->tw.lifo_reclaimed)) {
|
10601
|
+
ret.err = MDBX_ENOMEM;
|
10602
|
+
goto fail;
|
10606
10603
|
}
|
10607
|
-
/* Begin lookup backward from oldest reader */
|
10608
|
-
last = detent - 1;
|
10609
|
-
op = MDBX_SET_RANGE;
|
10610
|
-
} else if (txn->tw.last_reclaimed) {
|
10611
|
-
/* Continue lookup forward from last-reclaimed */
|
10612
|
-
last = txn->tw.last_reclaimed + 1;
|
10613
|
-
if (last >= detent)
|
10614
|
-
goto no_gc;
|
10615
|
-
op = MDBX_SET_RANGE;
|
10616
10604
|
}
|
10605
|
+
/* Begin lookup backward from oldest reader */
|
10606
|
+
id = detent - 1;
|
10607
|
+
op = MDBX_SET_RANGE;
|
10608
|
+
} else if (txn->tw.last_reclaimed) {
|
10609
|
+
/* Continue lookup forward from last-reclaimed */
|
10610
|
+
id = txn->tw.last_reclaimed + 1;
|
10611
|
+
if (id >= detent)
|
10612
|
+
goto depleted_gc;
|
10613
|
+
op = MDBX_SET_RANGE;
|
10614
|
+
}
|
10617
10615
|
|
10618
|
-
|
10619
|
-
|
10620
|
-
|
10621
|
-
|
10616
|
+
next_gc:;
|
10617
|
+
MDBX_val key;
|
10618
|
+
key.iov_base = &id;
|
10619
|
+
key.iov_len = sizeof(id);
|
10622
10620
|
|
10623
10621
|
#if MDBX_ENABLE_PROFGC
|
10624
|
-
|
10622
|
+
prof->rsteps += 1;
|
10625
10623
|
#endif /* MDBX_ENABLE_PROFGC */
|
10626
10624
|
|
10627
|
-
|
10628
|
-
|
10629
|
-
|
10630
|
-
|
10631
|
-
goto fail;
|
10632
|
-
if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
|
10633
|
-
op = MDBX_PREV;
|
10634
|
-
goto next_gc;
|
10635
|
-
}
|
10636
|
-
goto depleted_gc;
|
10637
|
-
}
|
10638
|
-
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
10639
|
-
ret.err = MDBX_CORRUPTED;
|
10625
|
+
/* Seek first/next GC record */
|
10626
|
+
ret.err = mdbx_cursor_get(gc, &key, NULL, op);
|
10627
|
+
if (unlikely(ret.err != MDBX_SUCCESS)) {
|
10628
|
+
if (unlikely(ret.err != MDBX_NOTFOUND))
|
10640
10629
|
goto fail;
|
10641
|
-
|
10642
|
-
last = unaligned_peek_u64(4, key.iov_base);
|
10643
|
-
if (flags & MDBX_ALLOC_LIFO) {
|
10630
|
+
if ((flags & MDBX_ALLOC_LIFO) && op == MDBX_SET_RANGE) {
|
10644
10631
|
op = MDBX_PREV;
|
10645
|
-
|
10646
|
-
goto next_gc;
|
10647
|
-
/* skip IDs of records that already reclaimed */
|
10648
|
-
for (size_t i = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); i > 0; --i)
|
10649
|
-
if (txn->tw.lifo_reclaimed[i] == last)
|
10650
|
-
goto next_gc;
|
10651
|
-
} else {
|
10652
|
-
op = MDBX_NEXT;
|
10653
|
-
if (unlikely(last >= detent))
|
10654
|
-
goto depleted_gc;
|
10632
|
+
goto next_gc;
|
10655
10633
|
}
|
10634
|
+
goto depleted_gc;
|
10635
|
+
}
|
10636
|
+
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
10637
|
+
ret.err = MDBX_CORRUPTED;
|
10638
|
+
goto fail;
|
10639
|
+
}
|
10640
|
+
id = unaligned_peek_u64(4, key.iov_base);
|
10641
|
+
if (flags & MDBX_ALLOC_LIFO) {
|
10642
|
+
op = MDBX_PREV;
|
10643
|
+
if (id >= detent || is_already_reclaimed(txn, id))
|
10644
|
+
goto next_gc;
|
10645
|
+
} else {
|
10646
|
+
op = MDBX_NEXT;
|
10647
|
+
if (unlikely(id >= detent))
|
10648
|
+
goto depleted_gc;
|
10649
|
+
}
|
10656
10650
|
|
10657
|
-
|
10658
|
-
|
10659
|
-
|
10660
|
-
|
10661
|
-
|
10662
|
-
|
10663
|
-
goto fail;
|
10651
|
+
/* Reading next GC record */
|
10652
|
+
MDBX_val data;
|
10653
|
+
MDBX_page *const mp = gc->mc_pg[gc->mc_top];
|
10654
|
+
if (unlikely((ret.err = node_read(gc, page_node(mp, gc->mc_ki[gc->mc_top]),
|
10655
|
+
&data, mp)) != MDBX_SUCCESS))
|
10656
|
+
goto fail;
|
10664
10657
|
|
10665
|
-
|
10666
|
-
|
10667
|
-
|
10668
|
-
|
10669
|
-
|
10670
|
-
|
10671
|
-
|
10658
|
+
pgno_t *gc_pnl = (pgno_t *)data.iov_base;
|
10659
|
+
if (unlikely(data.iov_len % sizeof(pgno_t) ||
|
10660
|
+
data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
|
10661
|
+
!pnl_check(gc_pnl, txn->mt_next_pgno))) {
|
10662
|
+
ret.err = MDBX_CORRUPTED;
|
10663
|
+
goto fail;
|
10664
|
+
}
|
10665
|
+
|
10666
|
+
const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
|
10667
|
+
TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len,
|
10668
|
+
gc_len + re_len);
|
10669
|
+
|
10670
|
+
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
10671
|
+
if (unlikely(gc_len + re_len >= env->me_maxgc_ov1page)) {
|
10672
|
+
/* Don't try to coalesce too much. */
|
10673
|
+
if (flags & MDBX_ALLOC_SHOULD_SCAN) {
|
10674
|
+
eASSERT(env, flags & MDBX_ALLOC_COALESCE);
|
10675
|
+
eASSERT(env, num > 0);
|
10676
|
+
#if MDBX_ENABLE_PROFGC
|
10677
|
+
env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
|
10678
|
+
#endif /* MDBX_ENABLE_PROFGC */
|
10679
|
+
TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
|
10680
|
+
if (re_len >= num) {
|
10681
|
+
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
10682
|
+
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
10683
|
+
range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
|
10684
|
+
pgno = *range;
|
10685
|
+
if (num == 1)
|
10686
|
+
goto done;
|
10687
|
+
range = scan4seq(range, re_len, num - 1);
|
10688
|
+
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
10689
|
+
if (likely(range)) {
|
10690
|
+
pgno = *range;
|
10691
|
+
goto done;
|
10692
|
+
}
|
10693
|
+
}
|
10694
|
+
flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN;
|
10672
10695
|
}
|
10673
|
-
|
10674
|
-
|
10675
|
-
|
10676
|
-
((/* not a slot-request from gc-update */
|
10677
|
-
(flags & MDBX_ALLOC_SLOT) == 0 &&
|
10696
|
+
if (unlikely(/* list is too long already */ re_len >=
|
10697
|
+
env->me_options.rp_augment_limit) &&
|
10698
|
+
((/* not a slot-request from gc-update */ num &&
|
10678
10699
|
/* have enough unallocated space */ txn->mt_geo.upper >=
|
10679
10700
|
txn->mt_next_pgno + num) ||
|
10680
|
-
gc_len +
|
10701
|
+
gc_len + re_len >= MDBX_PGL_LIMIT)) {
|
10681
10702
|
/* Stop reclaiming to avoid large/overflow the page list.
|
10682
10703
|
* This is a rare case while search for a continuously multi-page region
|
10683
10704
|
* in a large database.
|
10684
|
-
* https://
|
10705
|
+
* https://libmdbx.dqdkfa.ru/dead-github/issues/123
|
10706
|
+
*/
|
10685
10707
|
NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
|
10686
10708
|
"(chunk) -> %zu",
|
10687
|
-
|
10688
|
-
gc_len + MDBX_PNL_GETSIZE(txn->tw.relist));
|
10709
|
+
re_len, gc_len, gc_len + re_len);
|
10689
10710
|
goto depleted_gc;
|
10690
10711
|
}
|
10712
|
+
}
|
10691
10713
|
|
10692
|
-
|
10693
|
-
|
10694
|
-
|
10695
|
-
|
10696
|
-
if (unlikely(ret.err != MDBX_SUCCESS))
|
10697
|
-
goto fail;
|
10698
|
-
}
|
10699
|
-
|
10700
|
-
/* Append PNL from GC record to tw.relist */
|
10701
|
-
ret.err = pnl_need(&txn->tw.relist, gc_len);
|
10714
|
+
/* Remember ID of readed GC record */
|
10715
|
+
txn->tw.last_reclaimed = id;
|
10716
|
+
if (flags & MDBX_ALLOC_LIFO) {
|
10717
|
+
ret.err = txl_append(&txn->tw.lifo_reclaimed, id);
|
10702
10718
|
if (unlikely(ret.err != MDBX_SUCCESS))
|
10703
10719
|
goto fail;
|
10704
|
-
|
10720
|
+
}
|
10705
10721
|
|
10706
|
-
|
10707
|
-
|
10708
|
-
|
10709
|
-
|
10710
|
-
for (size_t i = gc_len; i; i--)
|
10711
|
-
DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
|
10712
|
-
DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
|
10713
|
-
}
|
10722
|
+
/* Append PNL from GC record to tw.relist */
|
10723
|
+
ret.err = pnl_need(&txn->tw.relist, gc_len);
|
10724
|
+
if (unlikely(ret.err != MDBX_SUCCESS))
|
10725
|
+
goto fail;
|
10714
10726
|
|
10715
|
-
|
10716
|
-
|
10717
|
-
|
10718
|
-
|
10719
|
-
|
10720
|
-
|
10721
|
-
|
10722
|
-
|
10723
|
-
|
10724
|
-
|
10727
|
+
if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
|
10728
|
+
DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
|
10729
|
+
" len %zu, PNL",
|
10730
|
+
id, txn->mt_dbs[FREE_DBI].md_root, gc_len);
|
10731
|
+
for (size_t i = gc_len; i; i--)
|
10732
|
+
DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
|
10733
|
+
DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
|
10734
|
+
}
|
10735
|
+
|
10736
|
+
/* Merge in descending sorted order */
|
10737
|
+
re_len = pnl_merge(txn->tw.relist, gc_pnl);
|
10738
|
+
flags |= MDBX_ALLOC_SHOULD_SCAN;
|
10739
|
+
if (AUDIT_ENABLED()) {
|
10740
|
+
if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
|
10741
|
+
ret.err = MDBX_CORRUPTED;
|
10742
|
+
goto fail;
|
10725
10743
|
}
|
10726
|
-
|
10744
|
+
} else {
|
10745
|
+
eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno));
|
10746
|
+
}
|
10747
|
+
eASSERT(env, dirtylist_check(txn));
|
10727
10748
|
|
10728
|
-
|
10729
|
-
|
10730
|
-
|
10731
|
-
|
10732
|
-
|
10733
|
-
|
10734
|
-
|
10735
|
-
|
10736
|
-
|
10737
|
-
|
10738
|
-
|
10749
|
+
eASSERT(env,
|
10750
|
+
re_len == 0 || MDBX_PNL_MOST(txn->tw.relist) < txn->mt_next_pgno);
|
10751
|
+
if (MDBX_ENABLE_REFUND && re_len &&
|
10752
|
+
unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
|
10753
|
+
/* Refund suitable pages into "unallocated" space */
|
10754
|
+
txn_refund(txn);
|
10755
|
+
re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
10756
|
+
}
|
10757
|
+
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
10758
|
+
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
10759
|
+
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
10739
10760
|
|
10740
|
-
|
10741
|
-
|
10742
|
-
|
10743
|
-
|
10744
|
-
|
10761
|
+
/* Done for a kick-reclaim mode, actually no page needed */
|
10762
|
+
if (unlikely(num == 0)) {
|
10763
|
+
eASSERT(env, ret.err == MDBX_SUCCESS);
|
10764
|
+
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
|
10765
|
+
re_len);
|
10766
|
+
goto early_exit;
|
10767
|
+
}
|
10745
10768
|
|
10746
|
-
|
10769
|
+
/* TODO: delete reclaimed records */
|
10747
10770
|
|
10748
|
-
|
10749
|
-
|
10750
|
-
|
10751
|
-
|
10752
|
-
|
10753
|
-
|
10754
|
-
#endif /* MDBX_ENABLE_PROFGC */
|
10755
|
-
goto next_gc;
|
10756
|
-
}
|
10757
|
-
TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
|
10758
|
-
flags &= ~MDBX_ALLOC_COALESCE;
|
10759
|
-
}
|
10771
|
+
eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
|
10772
|
+
if (flags & MDBX_ALLOC_COALESCE) {
|
10773
|
+
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id,
|
10774
|
+
re_len);
|
10775
|
+
goto next_gc;
|
10776
|
+
}
|
10760
10777
|
|
10761
|
-
|
10762
|
-
|
10763
|
-
|
10764
|
-
|
10765
|
-
|
10766
|
-
|
10778
|
+
scan:
|
10779
|
+
eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
|
10780
|
+
eASSERT(env, num > 0);
|
10781
|
+
if (re_len >= num) {
|
10782
|
+
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
10783
|
+
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
10784
|
+
range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
|
10785
|
+
pgno = *range;
|
10786
|
+
if (num == 1)
|
10787
|
+
goto done;
|
10788
|
+
range = scan4seq(range, re_len, num - 1);
|
10789
|
+
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
10790
|
+
if (likely(range)) {
|
10767
10791
|
pgno = *range;
|
10768
|
-
|
10769
|
-
goto done;
|
10770
|
-
range = scan4seq(range, re_len, num - 1);
|
10771
|
-
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
10772
|
-
if (likely(range)) {
|
10773
|
-
pgno = *range;
|
10774
|
-
goto done;
|
10775
|
-
}
|
10792
|
+
goto done;
|
10776
10793
|
}
|
10777
|
-
|
10778
|
-
|
10779
|
-
|
10794
|
+
}
|
10795
|
+
flags -= MDBX_ALLOC_SHOULD_SCAN;
|
10796
|
+
if (ret.err == MDBX_SUCCESS) {
|
10797
|
+
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id,
|
10798
|
+
re_len);
|
10799
|
+
goto next_gc;
|
10800
|
+
}
|
10780
10801
|
|
10781
|
-
|
10782
|
-
|
10783
|
-
|
10784
|
-
|
10802
|
+
depleted_gc:
|
10803
|
+
ret.err = MDBX_NOTFOUND;
|
10804
|
+
if (flags & MDBX_ALLOC_SHOULD_SCAN)
|
10805
|
+
goto scan;
|
10785
10806
|
|
10786
|
-
|
10807
|
+
//-------------------------------------------------------------------------
|
10808
|
+
|
10809
|
+
/* There is no suitable pages in the GC and to be able to allocate
|
10810
|
+
* we should CHOICE one of:
|
10811
|
+
* - make a new steady checkpoint if reclaiming was stopped by
|
10812
|
+
* the last steady-sync, or wipe it in the MDBX_UTTERLY_NOSYNC mode;
|
10813
|
+
* - kick lagging reader(s) if reclaiming was stopped by ones of it.
|
10814
|
+
* - extend the database file. */
|
10787
10815
|
|
10788
|
-
|
10789
|
-
|
10790
|
-
|
10791
|
-
|
10792
|
-
|
10793
|
-
|
10794
|
-
|
10795
|
-
|
10796
|
-
|
10797
|
-
|
10798
|
-
|
10799
|
-
|
10800
|
-
const
|
10801
|
-
|
10802
|
-
|
10803
|
-
|
10804
|
-
|
10805
|
-
|
10806
|
-
|
10807
|
-
|
10808
|
-
|
10809
|
-
|
10810
|
-
|
10811
|
-
|
10812
|
-
|
10813
|
-
|
10814
|
-
|
10815
|
-
|
10816
|
-
|
10817
|
-
*
|
10818
|
-
* - upper limit of database size is reached;
|
10819
|
-
* - database is full (with the current file size)
|
10820
|
-
* AND auto-sync threshold it NOT specified */
|
10821
|
-
if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
|
10822
|
-
((autosync_threshold | autosync_period) == 0 ||
|
10823
|
-
newnext >= prefer_steady.ptr_c->mm_geo.now)) {
|
10824
|
-
/* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
|
10825
|
-
* without any auto-sync threshold(s). */
|
10816
|
+
/* Will use new pages from the map if nothing is suitable in the GC. */
|
10817
|
+
newnext = (pgno = txn->mt_next_pgno) + num;
|
10818
|
+
|
10819
|
+
/* Does reclaiming stopped at the last steady point? */
|
10820
|
+
const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
|
10821
|
+
const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
|
10822
|
+
if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
|
10823
|
+
detent == prefer_steady.txnid + 1) {
|
10824
|
+
DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
|
10825
|
+
"-%s, detent %" PRIaTXN,
|
10826
|
+
recent.txnid, durable_caption(recent.ptr_c), prefer_steady.txnid,
|
10827
|
+
durable_caption(prefer_steady.ptr_c), detent);
|
10828
|
+
const pgno_t autosync_threshold =
|
10829
|
+
atomic_load32(&env->me_lck->mti_autosync_threshold, mo_Relaxed);
|
10830
|
+
const uint64_t autosync_period =
|
10831
|
+
atomic_load64(&env->me_lck->mti_autosync_period, mo_Relaxed);
|
10832
|
+
uint64_t eoos_timestamp;
|
10833
|
+
/* wipe the last steady-point if one of:
|
10834
|
+
* - UTTERLY_NOSYNC mode AND auto-sync threshold is NOT specified
|
10835
|
+
* - UTTERLY_NOSYNC mode AND free space at steady-point is exhausted
|
10836
|
+
* otherwise, make a new steady-point if one of:
|
10837
|
+
* - auto-sync threshold is specified and reached;
|
10838
|
+
* - upper limit of database size is reached;
|
10839
|
+
* - database is full (with the current file size)
|
10840
|
+
* AND auto-sync threshold it NOT specified */
|
10841
|
+
if (F_ISSET(env->me_flags, MDBX_UTTERLY_NOSYNC) &&
|
10842
|
+
((autosync_threshold | autosync_period) == 0 ||
|
10843
|
+
newnext >= prefer_steady.ptr_c->mm_geo.now)) {
|
10844
|
+
/* wipe steady checkpoint in MDBX_UTTERLY_NOSYNC mode
|
10845
|
+
* without any auto-sync threshold(s). */
|
10826
10846
|
#if MDBX_ENABLE_PROFGC
|
10827
|
-
|
10847
|
+
env->me_lck->mti_pgop_stat.gc_prof.wipes += 1;
|
10828
10848
|
#endif /* MDBX_ENABLE_PROFGC */
|
10829
|
-
|
10830
|
-
|
10831
|
-
|
10832
|
-
|
10833
|
-
|
10834
|
-
|
10835
|
-
|
10836
|
-
|
10837
|
-
|
10838
|
-
|
10839
|
-
|
10840
|
-
|
10841
|
-
|
10842
|
-
|
10843
|
-
|
10844
|
-
|
10845
|
-
|
10846
|
-
|
10847
|
-
|
10848
|
-
/* make steady checkpoint. */
|
10849
|
+
ret.err = wipe_steady(txn, detent);
|
10850
|
+
DEBUG("gc-wipe-steady, rc %d", ret.err);
|
10851
|
+
if (unlikely(ret.err != MDBX_SUCCESS))
|
10852
|
+
goto fail;
|
10853
|
+
eASSERT(env, prefer_steady.ptr_c !=
|
10854
|
+
meta_prefer_steady(env, &txn->tw.troika).ptr_c);
|
10855
|
+
goto retry_gc_refresh_oldest;
|
10856
|
+
}
|
10857
|
+
if ((autosync_threshold &&
|
10858
|
+
atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
|
10859
|
+
autosync_threshold) ||
|
10860
|
+
(autosync_period &&
|
10861
|
+
(eoos_timestamp =
|
10862
|
+
atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
|
10863
|
+
osal_monotime() - eoos_timestamp >= autosync_period) ||
|
10864
|
+
newnext >= txn->mt_geo.upper ||
|
10865
|
+
((num == 0 || newnext >= txn->mt_end_pgno) &&
|
10866
|
+
(autosync_threshold | autosync_period) == 0)) {
|
10867
|
+
/* make steady checkpoint. */
|
10849
10868
|
#if MDBX_ENABLE_PROFGC
|
10850
|
-
|
10869
|
+
env->me_lck->mti_pgop_stat.gc_prof.flushes += 1;
|
10851
10870
|
#endif /* MDBX_ENABLE_PROFGC */
|
10852
|
-
|
10853
|
-
|
10854
|
-
|
10855
|
-
|
10856
|
-
|
10857
|
-
|
10858
|
-
|
10859
|
-
|
10860
|
-
|
10861
|
-
|
10862
|
-
}
|
10871
|
+
MDBX_meta meta = *recent.ptr_c;
|
10872
|
+
ret.err = sync_locked(env, env->me_flags & MDBX_WRITEMAP, &meta,
|
10873
|
+
&txn->tw.troika);
|
10874
|
+
DEBUG("gc-make-steady, rc %d", ret.err);
|
10875
|
+
eASSERT(env, ret.err != MDBX_RESULT_TRUE);
|
10876
|
+
if (unlikely(ret.err != MDBX_SUCCESS))
|
10877
|
+
goto fail;
|
10878
|
+
eASSERT(env, prefer_steady.ptr_c !=
|
10879
|
+
meta_prefer_steady(env, &txn->tw.troika).ptr_c);
|
10880
|
+
goto retry_gc_refresh_oldest;
|
10863
10881
|
}
|
10882
|
+
}
|
10864
10883
|
|
10865
|
-
|
10866
|
-
|
10867
|
-
|
10868
|
-
|
10869
|
-
|
10870
|
-
|
10871
|
-
goto retry_gc_have_oldest;
|
10872
|
-
}
|
10884
|
+
if (unlikely(true == atomic_load32(&env->me_lck->mti_readers_refresh_flag,
|
10885
|
+
mo_AcquireRelease))) {
|
10886
|
+
oldest = txn_oldest_reader(txn);
|
10887
|
+
if (oldest >= detent)
|
10888
|
+
goto retry_gc_have_oldest;
|
10889
|
+
}
|
10873
10890
|
|
10874
|
-
|
10875
|
-
|
10876
|
-
|
10877
|
-
|
10878
|
-
|
10879
|
-
|
10891
|
+
/* Avoid kick lagging reader(s) if is enough unallocated space
|
10892
|
+
* at the end of database file. */
|
10893
|
+
if (!(flags & MDBX_ALLOC_RESERVE) && newnext <= txn->mt_end_pgno) {
|
10894
|
+
eASSERT(env, range == nullptr);
|
10895
|
+
goto done;
|
10896
|
+
}
|
10880
10897
|
|
10881
|
-
|
10882
|
-
|
10883
|
-
|
10884
|
-
|
10885
|
-
}
|
10898
|
+
if (oldest < txn->mt_txnid - xMDBX_TXNID_STEP) {
|
10899
|
+
oldest = kick_longlived_readers(env, oldest);
|
10900
|
+
if (oldest >= detent)
|
10901
|
+
goto retry_gc_have_oldest;
|
10886
10902
|
}
|
10887
10903
|
|
10888
10904
|
//---------------------------------------------------------------------------
|
10889
10905
|
|
10890
10906
|
no_gc:
|
10891
|
-
if (
|
10907
|
+
if (flags & MDBX_ALLOC_RESERVE) {
|
10892
10908
|
ret.err = MDBX_NOTFOUND;
|
10893
10909
|
goto fail;
|
10894
10910
|
}
|
10895
10911
|
|
10896
10912
|
/* Will use new pages from the map if nothing is suitable in the GC. */
|
10897
|
-
pgno = txn->mt_next_pgno;
|
10898
|
-
const size_t newnext = num + pgno;
|
10913
|
+
newnext = (pgno = txn->mt_next_pgno) + num;
|
10899
10914
|
if (newnext <= txn->mt_end_pgno)
|
10900
10915
|
goto done;
|
10901
10916
|
|
@@ -10932,12 +10947,12 @@ no_gc:
|
|
10932
10947
|
|
10933
10948
|
done:
|
10934
10949
|
ret.err = MDBX_SUCCESS;
|
10935
|
-
if (likely((flags &
|
10950
|
+
if (likely((flags & MDBX_ALLOC_RESERVE) == 0)) {
|
10936
10951
|
ENSURE(env, pgno >= NUM_METAS);
|
10937
10952
|
if (range) {
|
10938
|
-
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
|
10939
10953
|
eASSERT(env, pgno == *range);
|
10940
10954
|
eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
|
10955
|
+
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
10941
10956
|
/* Cutoff allocated pages from tw.relist */
|
10942
10957
|
#if MDBX_PNL_ASCENDING
|
10943
10958
|
for (const pgno_t *const end = re_list + re_len - num; range <= end;
|
@@ -10951,7 +10966,6 @@ done:
|
|
10951
10966
|
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
10952
10967
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
10953
10968
|
} else {
|
10954
|
-
eASSERT(env, flags & MDBX_ALLOC_NEW);
|
10955
10969
|
pgno = txn->mt_next_pgno;
|
10956
10970
|
txn->mt_next_pgno += (pgno_t)num;
|
10957
10971
|
eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno);
|
@@ -10995,8 +11009,9 @@ done:
|
|
10995
11009
|
int level;
|
10996
11010
|
const char *what;
|
10997
11011
|
if (flags & MDBX_ALLOC_RESERVE) {
|
10998
|
-
level =
|
10999
|
-
|
11012
|
+
level =
|
11013
|
+
(flags & MDBX_ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
|
11014
|
+
what = num ? "reserve-pages" : "fetch-slot";
|
11000
11015
|
} else {
|
11001
11016
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
11002
11017
|
level = MDBX_LOG_ERROR;
|
@@ -11011,7 +11026,7 @@ done:
|
|
11011
11026
|
} else {
|
11012
11027
|
early_exit:
|
11013
11028
|
DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num,
|
11014
|
-
|
11029
|
+
num ? "RESERVE" : "SLOT", ret.err);
|
11015
11030
|
ret.page = NULL;
|
11016
11031
|
}
|
11017
11032
|
|
@@ -11057,84 +11072,103 @@ __hot static pgr_t page_alloc(const MDBX_cursor *mc) {
|
|
11057
11072
|
return ret;
|
11058
11073
|
}
|
11059
11074
|
|
11060
|
-
|
11061
|
-
|
11062
|
-
|
11063
|
-
|
11064
|
-
MDBX_env *const env = txn->mt_env;
|
11075
|
+
MDBX_PNL pnl = txn->tw.relist;
|
11076
|
+
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
11077
|
+
if (likely(len > 0)) {
|
11078
|
+
MDBX_env *const env = txn->mt_env;
|
11065
11079
|
|
11066
|
-
|
11080
|
+
MDBX_PNL_SETSIZE(pnl, len - 1);
|
11067
11081
|
#if MDBX_PNL_ASCENDING
|
11068
|
-
|
11069
|
-
|
11070
|
-
|
11082
|
+
const pgno_t pgno = pnl[1];
|
11083
|
+
for (size_t i = 1; i < len; ++i)
|
11084
|
+
pnl[i] = pnl[i + 1];
|
11071
11085
|
#else
|
11072
|
-
|
11086
|
+
const pgno_t pgno = pnl[len];
|
11073
11087
|
#endif
|
11074
11088
|
|
11075
11089
|
#if MDBX_ENABLE_PROFGC
|
11076
|
-
|
11077
|
-
|
11078
|
-
|
11079
|
-
|
11080
|
-
|
11081
|
-
|
11090
|
+
const uint64_t monotime_before = osal_monotime();
|
11091
|
+
size_t majflt_before;
|
11092
|
+
const uint64_t cputime_before = osal_cputime(&majflt_before);
|
11093
|
+
profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI)
|
11094
|
+
? &env->me_lck->mti_pgop_stat.gc_prof.self
|
11095
|
+
: &env->me_lck->mti_pgop_stat.gc_prof.work;
|
11082
11096
|
#endif /* MDBX_ENABLE_PROFGC */
|
11083
|
-
|
11084
|
-
|
11085
|
-
|
11086
|
-
|
11087
|
-
|
11088
|
-
|
11089
|
-
|
11090
|
-
|
11091
|
-
|
11092
|
-
}
|
11097
|
+
pgr_t ret;
|
11098
|
+
if (env->me_flags & MDBX_WRITEMAP) {
|
11099
|
+
ret.page = pgno2page(env, pgno);
|
11100
|
+
MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
|
11101
|
+
} else {
|
11102
|
+
ret.page = page_malloc(txn, 1);
|
11103
|
+
if (unlikely(!ret.page)) {
|
11104
|
+
ret.err = MDBX_ENOMEM;
|
11105
|
+
goto bailout;
|
11093
11106
|
}
|
11107
|
+
}
|
11094
11108
|
|
11095
|
-
|
11096
|
-
|
11097
|
-
|
11098
|
-
|
11099
|
-
|
11109
|
+
VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
|
11110
|
+
ret.page->mp_pgno = pgno;
|
11111
|
+
ret.page->mp_leaf2_ksize = 0;
|
11112
|
+
ret.page->mp_flags = 0;
|
11113
|
+
tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
|
11100
11114
|
|
11101
|
-
|
11102
|
-
|
11103
|
-
|
11104
|
-
|
11115
|
+
ret.err = page_dirty(txn, ret.page, 1);
|
11116
|
+
bailout:
|
11117
|
+
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
11118
|
+
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
11105
11119
|
#if MDBX_ENABLE_PROFGC
|
11106
|
-
|
11107
|
-
|
11108
|
-
|
11109
|
-
|
11120
|
+
size_t majflt_after;
|
11121
|
+
prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
|
11122
|
+
prof->majflt += majflt_after - majflt_before;
|
11123
|
+
prof->xtime_monotonic += osal_monotime() - monotime_before;
|
11110
11124
|
#endif /* MDBX_ENABLE_PROFGC */
|
11111
|
-
|
11112
|
-
}
|
11125
|
+
return ret;
|
11113
11126
|
}
|
11114
11127
|
|
11115
|
-
return page_alloc_slowpath(mc, 1,
|
11128
|
+
return page_alloc_slowpath(mc, 1, MDBX_ALLOC_DEFAULT);
|
11116
11129
|
}
|
11117
11130
|
|
11118
|
-
/* Copy the used portions of a
|
11119
|
-
__hot static void page_copy(MDBX_page *dst, const MDBX_page *src,
|
11120
|
-
size_t
|
11131
|
+
/* Copy the used portions of a page. */
|
11132
|
+
__hot static void page_copy(MDBX_page *const dst, const MDBX_page *const src,
|
11133
|
+
const size_t size) {
|
11121
11134
|
STATIC_ASSERT(UINT16_MAX > MAX_PAGESIZE - PAGEHDRSZ);
|
11122
11135
|
STATIC_ASSERT(MIN_PAGESIZE > PAGEHDRSZ + NODESIZE * 4);
|
11136
|
+
char *copy_dst = (void *)dst;
|
11137
|
+
const char *copy_src = (const void *)src;
|
11138
|
+
size_t copy_len = size;
|
11139
|
+
if (src->mp_flags & P_LEAF2) {
|
11140
|
+
copy_len = PAGEHDRSZ + src->mp_leaf2_ksize * page_numkeys(src);
|
11141
|
+
if (unlikely(copy_len > size))
|
11142
|
+
goto bailout;
|
11143
|
+
}
|
11123
11144
|
if ((src->mp_flags & (P_LEAF2 | P_OVERFLOW)) == 0) {
|
11124
|
-
size_t upper = src->mp_upper, lower = src->mp_lower
|
11125
|
-
|
11145
|
+
size_t upper = src->mp_upper, lower = src->mp_lower;
|
11146
|
+
intptr_t unused = upper - lower;
|
11126
11147
|
/* If page isn't full, just copy the used portion. Adjust
|
11127
11148
|
* alignment so memcpy may copy words instead of bytes. */
|
11128
|
-
if (unused
|
11149
|
+
if (unused > MDBX_CACHELINE_SIZE * 3) {
|
11129
11150
|
lower = ceil_powerof2(lower + PAGEHDRSZ, sizeof(void *));
|
11130
11151
|
upper = floor_powerof2(upper + PAGEHDRSZ, sizeof(void *));
|
11131
|
-
|
11132
|
-
|
11133
|
-
|
11134
|
-
|
11152
|
+
if (unlikely(upper > copy_len))
|
11153
|
+
goto bailout;
|
11154
|
+
memcpy(copy_dst, copy_src, lower);
|
11155
|
+
copy_dst += upper;
|
11156
|
+
copy_src += upper;
|
11157
|
+
copy_len -= upper;
|
11135
11158
|
}
|
11136
11159
|
}
|
11137
|
-
memcpy(
|
11160
|
+
memcpy(copy_dst, copy_src, copy_len);
|
11161
|
+
return;
|
11162
|
+
|
11163
|
+
bailout:
|
11164
|
+
if (src->mp_flags & P_LEAF2)
|
11165
|
+
bad_page(src, "%s addr %p, n-keys %zu, ksize %u",
|
11166
|
+
"invalid/corrupted source page", __Wpedantic_format_voidptr(src),
|
11167
|
+
page_numkeys(src), src->mp_leaf2_ksize);
|
11168
|
+
else
|
11169
|
+
bad_page(src, "%s addr %p, upper %u", "invalid/corrupted source page",
|
11170
|
+
__Wpedantic_format_voidptr(src), src->mp_upper);
|
11171
|
+
memset(dst, -1, size);
|
11138
11172
|
}
|
11139
11173
|
|
11140
11174
|
/* Pull a page off the txn's spill list, if present.
|
@@ -11541,7 +11575,9 @@ __cold int mdbx_env_sync_poll(MDBX_env *env) {
|
|
11541
11575
|
|
11542
11576
|
/* Back up parent txn's cursors, then grab the originals for tracking */
|
11543
11577
|
static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
|
11544
|
-
|
11578
|
+
tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr);
|
11579
|
+
nested->mt_cursors[FREE_DBI] = nullptr;
|
11580
|
+
for (int i = parent->mt_numdbs; --i > FREE_DBI;) {
|
11545
11581
|
nested->mt_cursors[i] = NULL;
|
11546
11582
|
MDBX_cursor *mc = parent->mt_cursors[i];
|
11547
11583
|
if (mc != NULL) {
|
@@ -11586,7 +11622,8 @@ static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) {
|
|
11586
11622
|
*
|
11587
11623
|
* Returns 0 on success, non-zero on failure. */
|
11588
11624
|
static void cursors_eot(MDBX_txn *txn, const bool merge) {
|
11589
|
-
|
11625
|
+
tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr);
|
11626
|
+
for (intptr_t i = txn->mt_numdbs; --i > FREE_DBI;) {
|
11590
11627
|
MDBX_cursor *next, *mc = txn->mt_cursors[i];
|
11591
11628
|
if (!mc)
|
11592
11629
|
continue;
|
@@ -11856,7 +11893,7 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) {
|
|
11856
11893
|
return MDBX_SUCCESS;
|
11857
11894
|
}
|
11858
11895
|
|
11859
|
-
/* check against https://
|
11896
|
+
/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
11860
11897
|
static bool coherency_check(const MDBX_env *env, const txnid_t txnid,
|
11861
11898
|
const volatile MDBX_db *dbs,
|
11862
11899
|
const volatile MDBX_meta *meta, bool report) {
|
@@ -11957,7 +11994,7 @@ __cold static int coherency_timeout(uint64_t *timestamp, pgno_t pgno) {
|
|
11957
11994
|
}
|
11958
11995
|
|
11959
11996
|
/* check with timeout as the workaround
|
11960
|
-
* for https://
|
11997
|
+
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
11961
11998
|
__hot static int coherency_check_readed(const MDBX_env *env,
|
11962
11999
|
const txnid_t txnid,
|
11963
12000
|
const volatile MDBX_db *dbs,
|
@@ -12193,8 +12230,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|
12193
12230
|
txn->tw.troika = meta_tap(env);
|
12194
12231
|
const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
|
12195
12232
|
uint64_t timestamp = 0;
|
12196
|
-
while (
|
12197
|
-
"workaround for https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/269") {
|
12233
|
+
while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
|
12198
12234
|
rc = coherency_check_readed(env, head.txnid, head.ptr_v->mm_dbs,
|
12199
12235
|
head.ptr_v, ×tamp);
|
12200
12236
|
if (likely(rc == MDBX_SUCCESS))
|
@@ -12219,8 +12255,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|
12219
12255
|
txn->tw.loose_refund_wl = 0;
|
12220
12256
|
#endif /* MDBX_ENABLE_REFUND */
|
12221
12257
|
MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0);
|
12222
|
-
txn->tw.
|
12223
|
-
txn->tw.
|
12258
|
+
txn->tw.spilled.list = NULL;
|
12259
|
+
txn->tw.spilled.least_removed = 0;
|
12224
12260
|
txn->tw.last_reclaimed = 0;
|
12225
12261
|
if (txn->tw.lifo_reclaimed)
|
12226
12262
|
MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0);
|
@@ -12297,6 +12333,19 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) {
|
|
12297
12333
|
osal_srwlock_AcquireShared(&env->me_remap_guard);
|
12298
12334
|
}
|
12299
12335
|
#endif /* Windows */
|
12336
|
+
} else {
|
12337
|
+
if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) {
|
12338
|
+
ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB",
|
12339
|
+
txn->mt_dbs[FREE_DBI].md_flags);
|
12340
|
+
rc = MDBX_INCOMPATIBLE;
|
12341
|
+
goto bailout;
|
12342
|
+
}
|
12343
|
+
|
12344
|
+
tASSERT(txn, txn == env->me_txn0);
|
12345
|
+
MDBX_cursor *const gc = (MDBX_cursor *)((char *)txn + sizeof(MDBX_txn));
|
12346
|
+
rc = cursor_init(gc, txn, FREE_DBI);
|
12347
|
+
if (rc != MDBX_SUCCESS)
|
12348
|
+
goto bailout;
|
12300
12349
|
}
|
12301
12350
|
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
|
12302
12351
|
txn_valgrind(env, txn);
|
@@ -12514,7 +12563,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
|
|
12514
12563
|
txn->tw.dirtylru = parent->tw.dirtylru;
|
12515
12564
|
|
12516
12565
|
dpl_sort(parent);
|
12517
|
-
if (parent->tw.
|
12566
|
+
if (parent->tw.spilled.list)
|
12518
12567
|
spill_purge(parent);
|
12519
12568
|
|
12520
12569
|
tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >=
|
@@ -12591,7 +12640,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
|
|
12591
12640
|
eASSERT(env, (txn->mt_flags &
|
12592
12641
|
~(MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED | MDBX_NOMETASYNC |
|
12593
12642
|
MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0);
|
12594
|
-
assert(!txn->tw.
|
12643
|
+
assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed);
|
12595
12644
|
}
|
12596
12645
|
txn->mt_signature = MDBX_MT_SIGNATURE;
|
12597
12646
|
txn->mt_userctx = context;
|
@@ -12696,10 +12745,9 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
|
|
12696
12745
|
env, txn->mt_child ? (size_t)txn->tw.retired_pages
|
12697
12746
|
: MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
12698
12747
|
info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
|
12699
|
-
info->txn_space_dirty =
|
12700
|
-
txn->tw.dirtylist
|
12701
|
-
|
12702
|
-
: 0;
|
12748
|
+
info->txn_space_dirty = pgno2bytes(
|
12749
|
+
env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
12750
|
+
: txn->tw.writemap_dirty_npages);
|
12703
12751
|
info->txn_reader_lag = INT64_MAX;
|
12704
12752
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
12705
12753
|
if (scan_rlt && lck) {
|
@@ -13015,8 +13063,8 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) {
|
|
13015
13063
|
txn->mt_flags = MDBX_TXN_FINISHED;
|
13016
13064
|
txn->mt_owner = 0;
|
13017
13065
|
env->me_txn = txn->mt_parent;
|
13018
|
-
pnl_free(txn->tw.
|
13019
|
-
txn->tw.
|
13066
|
+
pnl_free(txn->tw.spilled.list);
|
13067
|
+
txn->tw.spilled.list = nullptr;
|
13020
13068
|
if (txn == env->me_txn0) {
|
13021
13069
|
eASSERT(env, txn->mt_parent == NULL);
|
13022
13070
|
/* Export or close DBI handles created in this txn */
|
@@ -13283,7 +13331,7 @@ typedef struct gc_update_context {
|
|
13283
13331
|
#if MDBX_ENABLE_BIGFOOT
|
13284
13332
|
txnid_t bigfoot;
|
13285
13333
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
13286
|
-
|
13334
|
+
MDBX_cursor cursor;
|
13287
13335
|
} gcu_context_t;
|
13288
13336
|
|
13289
13337
|
static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
|
@@ -13292,7 +13340,7 @@ static __inline int gcu_context_init(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13292
13340
|
#if MDBX_ENABLE_BIGFOOT
|
13293
13341
|
ctx->bigfoot = txn->mt_txnid;
|
13294
13342
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
13295
|
-
return cursor_init(&ctx->cursor
|
13343
|
+
return cursor_init(&ctx->cursor, txn, FREE_DBI);
|
13296
13344
|
}
|
13297
13345
|
|
13298
13346
|
static __always_inline size_t gcu_backlog_size(MDBX_txn *txn) {
|
@@ -13311,10 +13359,10 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13311
13359
|
#endif /* MDBX_ENABLE_BIGFOOT */
|
13312
13360
|
key.iov_len = sizeof(txnid_t);
|
13313
13361
|
const struct cursor_set_result csr =
|
13314
|
-
cursor_set(&ctx->cursor
|
13362
|
+
cursor_set(&ctx->cursor, &key, &val, MDBX_SET);
|
13315
13363
|
if (csr.err == MDBX_SUCCESS && csr.exact) {
|
13316
13364
|
ctx->retired_stored = 0;
|
13317
|
-
err = mdbx_cursor_del(&ctx->cursor
|
13365
|
+
err = mdbx_cursor_del(&ctx->cursor, 0);
|
13318
13366
|
TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn),
|
13319
13367
|
err);
|
13320
13368
|
}
|
@@ -13327,6 +13375,13 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13327
13375
|
return err;
|
13328
13376
|
}
|
13329
13377
|
|
13378
|
+
static int gcu_touch(gcu_context_t *ctx) {
|
13379
|
+
ctx->cursor.mc_flags |= C_GCU;
|
13380
|
+
int err = cursor_touch(&ctx->cursor);
|
13381
|
+
ctx->cursor.mc_flags -= C_GCU;
|
13382
|
+
return err;
|
13383
|
+
}
|
13384
|
+
|
13330
13385
|
/* Prepare a backlog of pages to modify GC itself, while reclaiming is
|
13331
13386
|
* prohibited. It should be enough to prevent search in page_alloc_slowpath()
|
13332
13387
|
* during a deleting, when GC tree is unbalanced. */
|
@@ -13356,14 +13411,12 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|
13356
13411
|
key.iov_base = val.iov_base = nullptr;
|
13357
13412
|
key.iov_len = sizeof(txnid_t);
|
13358
13413
|
val.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
|
13359
|
-
err = cursor_spill(&ctx->cursor
|
13414
|
+
err = cursor_spill(&ctx->cursor, &key, &val);
|
13360
13415
|
if (unlikely(err != MDBX_SUCCESS))
|
13361
13416
|
return err;
|
13362
13417
|
}
|
13363
13418
|
|
13364
|
-
|
13365
|
-
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
13366
|
-
err = cursor_touch(&ctx->cursor.outer);
|
13419
|
+
err = gcu_touch(ctx);
|
13367
13420
|
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
13368
13421
|
|
13369
13422
|
if (unlikely(pages4retiredlist > 1) &&
|
@@ -13373,22 +13426,20 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|
13373
13426
|
err = gcu_clean_stored_retired(txn, ctx);
|
13374
13427
|
if (unlikely(err != MDBX_SUCCESS))
|
13375
13428
|
return err;
|
13376
|
-
err =
|
13377
|
-
|
13378
|
-
|
13429
|
+
err =
|
13430
|
+
page_alloc_slowpath(&ctx->cursor, pages4retiredlist, MDBX_ALLOC_RESERVE)
|
13431
|
+
.err;
|
13379
13432
|
TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
13380
|
-
cASSERT(&ctx->cursor
|
13433
|
+
cASSERT(&ctx->cursor,
|
13381
13434
|
gcu_backlog_size(txn) >= pages4retiredlist || err != MDBX_SUCCESS);
|
13382
13435
|
}
|
13383
13436
|
|
13384
13437
|
while (gcu_backlog_size(txn) < backlog4cow + pages4retiredlist &&
|
13385
13438
|
err == MDBX_SUCCESS)
|
13386
|
-
err = page_alloc_slowpath(&ctx->cursor
|
13387
|
-
|
13388
|
-
MDBX_ALLOC_RESERVE | MDBX_ALLOC_BACKLOG)
|
13439
|
+
err = page_alloc_slowpath(&ctx->cursor, 0,
|
13440
|
+
MDBX_ALLOC_RESERVE | MDBX_ALLOC_UNIMPORTANT)
|
13389
13441
|
.err;
|
13390
13442
|
|
13391
|
-
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
13392
13443
|
TRACE("<< backlog %zu, err %d", gcu_backlog_size(txn), err);
|
13393
13444
|
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
13394
13445
|
}
|
@@ -13417,9 +13468,8 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) {
|
|
13417
13468
|
MDBX_env *const env = txn->mt_env;
|
13418
13469
|
const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo";
|
13419
13470
|
(void)dbg_prefix_mode;
|
13420
|
-
|
13421
|
-
|
13422
|
-
txn->mt_cursors[FREE_DBI] = &ctx->cursor.outer;
|
13471
|
+
ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI];
|
13472
|
+
txn->mt_cursors[FREE_DBI] = &ctx->cursor;
|
13423
13473
|
|
13424
13474
|
/* txn->tw.relist[] can grow and shrink during this call.
|
13425
13475
|
* txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow.
|
@@ -13481,7 +13531,7 @@ retry:
|
|
13481
13531
|
ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
|
13482
13532
|
key.iov_base = &ctx->cleaned_id;
|
13483
13533
|
key.iov_len = sizeof(ctx->cleaned_id);
|
13484
|
-
rc = mdbx_cursor_get(&ctx->cursor
|
13534
|
+
rc = mdbx_cursor_get(&ctx->cursor, &key, NULL, MDBX_SET);
|
13485
13535
|
if (rc == MDBX_NOTFOUND)
|
13486
13536
|
continue;
|
13487
13537
|
if (unlikely(rc != MDBX_SUCCESS))
|
@@ -13494,18 +13544,17 @@ retry:
|
|
13494
13544
|
tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
|
13495
13545
|
TRACE("%s: cleanup-reclaimed-id [%zu]%" PRIaTXN, dbg_prefix_mode,
|
13496
13546
|
ctx->cleaned_slot, ctx->cleaned_id);
|
13497
|
-
tASSERT(txn, *txn->mt_cursors == &ctx->cursor
|
13498
|
-
rc = mdbx_cursor_del(&ctx->cursor
|
13547
|
+
tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
|
13548
|
+
rc = mdbx_cursor_del(&ctx->cursor, 0);
|
13499
13549
|
if (unlikely(rc != MDBX_SUCCESS))
|
13500
13550
|
goto bailout;
|
13501
13551
|
} while (ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
|
13502
13552
|
txl_sort(txn->tw.lifo_reclaimed);
|
13503
13553
|
}
|
13504
13554
|
} else {
|
13505
|
-
/*
|
13506
|
-
* now delete them and any we reserved for tw.relist. */
|
13555
|
+
/* Удаляем оставшиеся вынутые из GC записи. */
|
13507
13556
|
while (ctx->cleaned_id <= txn->tw.last_reclaimed) {
|
13508
|
-
rc = cursor_first(&ctx->cursor
|
13557
|
+
rc = cursor_first(&ctx->cursor, &key, NULL);
|
13509
13558
|
if (rc == MDBX_NOTFOUND)
|
13510
13559
|
break;
|
13511
13560
|
if (unlikely(rc != MDBX_SUCCESS))
|
@@ -13530,8 +13579,8 @@ retry:
|
|
13530
13579
|
tASSERT(txn, ctx->cleaned_id <= env->me_lck->mti_oldest_reader.weak);
|
13531
13580
|
TRACE("%s: cleanup-reclaimed-id %" PRIaTXN, dbg_prefix_mode,
|
13532
13581
|
ctx->cleaned_id);
|
13533
|
-
tASSERT(txn, *txn->mt_cursors == &ctx->cursor
|
13534
|
-
rc = mdbx_cursor_del(&ctx->cursor
|
13582
|
+
tASSERT(txn, *txn->mt_cursors == &ctx->cursor);
|
13583
|
+
rc = mdbx_cursor_del(&ctx->cursor, 0);
|
13535
13584
|
if (unlikely(rc != MDBX_SUCCESS))
|
13536
13585
|
goto bailout;
|
13537
13586
|
}
|
@@ -13566,10 +13615,7 @@ retry:
|
|
13566
13615
|
if (txn->tw.loose_count > 0) {
|
13567
13616
|
TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode,
|
13568
13617
|
txn->tw.loose_count);
|
13569
|
-
rc = page_alloc_slowpath(&ctx->cursor
|
13570
|
-
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
13571
|
-
MDBX_ALLOC_RESERVE)
|
13572
|
-
.err;
|
13618
|
+
rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
|
13573
13619
|
if (rc == MDBX_SUCCESS) {
|
13574
13620
|
TRACE("%s: retry since gc-slot for %zu loose-pages available",
|
13575
13621
|
dbg_prefix_mode, txn->tw.loose_count);
|
@@ -13651,10 +13697,9 @@ retry:
|
|
13651
13697
|
if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
13652
13698
|
if (unlikely(!ctx->retired_stored)) {
|
13653
13699
|
/* Make sure last page of GC is touched and on retired-list */
|
13654
|
-
|
13655
|
-
rc
|
13656
|
-
|
13657
|
-
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
13700
|
+
rc = cursor_last(&ctx->cursor, nullptr, nullptr);
|
13701
|
+
if (likely(rc != MDBX_SUCCESS))
|
13702
|
+
rc = gcu_touch(ctx);
|
13658
13703
|
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
|
13659
13704
|
goto bailout;
|
13660
13705
|
}
|
@@ -13664,6 +13709,8 @@ retry:
|
|
13664
13709
|
do {
|
13665
13710
|
if (ctx->bigfoot > txn->mt_txnid) {
|
13666
13711
|
rc = gcu_clean_stored_retired(txn, ctx);
|
13712
|
+
if (unlikely(rc != MDBX_SUCCESS))
|
13713
|
+
goto bailout;
|
13667
13714
|
tASSERT(txn, ctx->bigfoot <= txn->mt_txnid);
|
13668
13715
|
}
|
13669
13716
|
|
@@ -13685,7 +13732,7 @@ retry:
|
|
13685
13732
|
? env->me_maxgc_ov1page
|
13686
13733
|
: left;
|
13687
13734
|
data.iov_len = (chunk + 1) * sizeof(pgno_t);
|
13688
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
13735
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
|
13689
13736
|
if (unlikely(rc != MDBX_SUCCESS))
|
13690
13737
|
goto bailout;
|
13691
13738
|
|
@@ -13723,7 +13770,7 @@ retry:
|
|
13723
13770
|
do {
|
13724
13771
|
gcu_prepare_backlog(txn, ctx, true);
|
13725
13772
|
data.iov_len = MDBX_PNL_SIZEOF(txn->tw.retired_pages);
|
13726
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
13773
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data, MDBX_RESERVE);
|
13727
13774
|
if (unlikely(rc != MDBX_SUCCESS))
|
13728
13775
|
goto bailout;
|
13729
13776
|
/* Retry if tw.retired_pages[] grew during the Put() */
|
@@ -13790,17 +13837,13 @@ retry:
|
|
13790
13837
|
left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
13791
13838
|
env->me_maxgc_ov1page &&
|
13792
13839
|
!ctx->dense) {
|
13793
|
-
/*
|
13840
|
+
/* Hужен свободный для для сохранения списка страниц. */
|
13794
13841
|
bool need_cleanup = false;
|
13795
|
-
txnid_t snap_oldest;
|
13842
|
+
txnid_t snap_oldest = 0;
|
13796
13843
|
retry_rid:
|
13797
|
-
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
13798
13844
|
do {
|
13799
|
-
|
13800
|
-
|
13801
|
-
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
13802
|
-
MDBX_ALLOC_RESERVE)
|
13803
|
-
.err;
|
13845
|
+
rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
|
13846
|
+
snap_oldest = env->me_lck->mti_oldest_reader.weak;
|
13804
13847
|
if (likely(rc == MDBX_SUCCESS)) {
|
13805
13848
|
TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
|
13806
13849
|
MDBX_PNL_LAST(txn->tw.lifo_reclaimed));
|
@@ -13812,7 +13855,6 @@ retry:
|
|
13812
13855
|
left >
|
13813
13856
|
(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
13814
13857
|
env->me_maxgc_ov1page);
|
13815
|
-
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
13816
13858
|
|
13817
13859
|
if (likely(rc == MDBX_SUCCESS)) {
|
13818
13860
|
TRACE("%s: got enough from GC.", dbg_prefix_mode);
|
@@ -13830,7 +13872,7 @@ retry:
|
|
13830
13872
|
} else {
|
13831
13873
|
tASSERT(txn, txn->tw.last_reclaimed == 0);
|
13832
13874
|
if (unlikely(txn_oldest_reader(txn) != snap_oldest))
|
13833
|
-
/* should retry page_alloc_slowpath(
|
13875
|
+
/* should retry page_alloc_slowpath()
|
13834
13876
|
* if the oldest reader changes since the last attempt */
|
13835
13877
|
goto retry_rid;
|
13836
13878
|
/* no reclaimable GC entries,
|
@@ -13840,7 +13882,8 @@ retry:
|
|
13840
13882
|
ctx->rid);
|
13841
13883
|
}
|
13842
13884
|
|
13843
|
-
/*
|
13885
|
+
/* В GC нет годных к переработке записей,
|
13886
|
+
* будем использовать свободные id в обратном порядке. */
|
13844
13887
|
while (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) < prefer_max_scatter &&
|
13845
13888
|
left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) -
|
13846
13889
|
ctx->reused_slot) *
|
@@ -13858,26 +13901,20 @@ retry:
|
|
13858
13901
|
}
|
13859
13902
|
|
13860
13903
|
tASSERT(txn, ctx->rid >= MIN_TXNID && ctx->rid <= MAX_TXNID);
|
13861
|
-
|
13904
|
+
ctx->rid -= 1;
|
13862
13905
|
key.iov_base = &ctx->rid;
|
13863
13906
|
key.iov_len = sizeof(ctx->rid);
|
13864
|
-
rc = mdbx_cursor_get(&ctx->cursor
|
13907
|
+
rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
|
13865
13908
|
if (unlikely(rc == MDBX_SUCCESS)) {
|
13866
|
-
DEBUG("%s: GC's id %" PRIaTXN " is
|
13909
|
+
DEBUG("%s: GC's id %" PRIaTXN " is present, going to first",
|
13867
13910
|
dbg_prefix_mode, ctx->rid);
|
13868
|
-
|
13869
|
-
rc = mdbx_cursor_get(&ctx->cursor.outer, &key, &data, MDBX_FIRST);
|
13870
|
-
if (rc == MDBX_NOTFOUND) {
|
13871
|
-
DEBUG("%s: GC is empty (going dense-mode)", dbg_prefix_mode);
|
13872
|
-
ctx->dense = true;
|
13873
|
-
break;
|
13874
|
-
}
|
13911
|
+
rc = cursor_first(&ctx->cursor, &key, nullptr);
|
13875
13912
|
if (unlikely(rc != MDBX_SUCCESS ||
|
13876
13913
|
key.iov_len != sizeof(txnid_t))) {
|
13877
13914
|
rc = MDBX_CORRUPTED;
|
13878
13915
|
goto bailout;
|
13879
13916
|
}
|
13880
|
-
txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13917
|
+
const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13881
13918
|
if (gc_first <= MIN_TXNID) {
|
13882
13919
|
DEBUG("%s: no free GC's id(s) less than %" PRIaTXN
|
13883
13920
|
" (going dense-mode)",
|
@@ -13925,13 +13962,13 @@ retry:
|
|
13925
13962
|
tASSERT(txn, txn->tw.lifo_reclaimed == NULL);
|
13926
13963
|
if (unlikely(ctx->rid == 0)) {
|
13927
13964
|
ctx->rid = txn_oldest_reader(txn);
|
13928
|
-
rc =
|
13929
|
-
if (rc == MDBX_SUCCESS) {
|
13965
|
+
rc = cursor_first(&ctx->cursor, &key, nullptr);
|
13966
|
+
if (likely(rc == MDBX_SUCCESS)) {
|
13930
13967
|
if (unlikely(key.iov_len != sizeof(txnid_t))) {
|
13931
13968
|
rc = MDBX_CORRUPTED;
|
13932
13969
|
goto bailout;
|
13933
13970
|
}
|
13934
|
-
txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13971
|
+
const txnid_t gc_first = unaligned_peek_u64(4, key.iov_base);
|
13935
13972
|
if (ctx->rid >= gc_first)
|
13936
13973
|
ctx->rid = gc_first - 1;
|
13937
13974
|
if (unlikely(ctx->rid == 0)) {
|
@@ -14022,7 +14059,7 @@ retry:
|
|
14022
14059
|
TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk,
|
14023
14060
|
ctx->settled + 1, ctx->settled + chunk + 1, reservation_gc_id);
|
14024
14061
|
gcu_prepare_backlog(txn, ctx, true);
|
14025
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
14062
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
|
14026
14063
|
MDBX_RESERVE | MDBX_NOOVERWRITE);
|
14027
14064
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
14028
14065
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
@@ -14070,7 +14107,7 @@ retry:
|
|
14070
14107
|
size_t left = amount;
|
14071
14108
|
if (txn->tw.lifo_reclaimed == nullptr) {
|
14072
14109
|
tASSERT(txn, ctx->lifo == 0);
|
14073
|
-
rc = cursor_first(&ctx->cursor
|
14110
|
+
rc = cursor_first(&ctx->cursor, &key, &data);
|
14074
14111
|
if (unlikely(rc != MDBX_SUCCESS))
|
14075
14112
|
goto bailout;
|
14076
14113
|
} else {
|
@@ -14104,7 +14141,7 @@ retry:
|
|
14104
14141
|
dbg_prefix_mode, fill_gc_id, ctx->filled_slot);
|
14105
14142
|
key.iov_base = &fill_gc_id;
|
14106
14143
|
key.iov_len = sizeof(fill_gc_id);
|
14107
|
-
rc = mdbx_cursor_get(&ctx->cursor
|
14144
|
+
rc = mdbx_cursor_get(&ctx->cursor, &key, &data, MDBX_SET_KEY);
|
14108
14145
|
if (unlikely(rc != MDBX_SUCCESS))
|
14109
14146
|
goto bailout;
|
14110
14147
|
}
|
@@ -14118,7 +14155,6 @@ retry:
|
|
14118
14155
|
key.iov_len = sizeof(fill_gc_id);
|
14119
14156
|
|
14120
14157
|
tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
14121
|
-
txn->mt_flags += MDBX_TXN_FROZEN_RE;
|
14122
14158
|
size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
|
14123
14159
|
if (unlikely(chunk > left)) {
|
14124
14160
|
TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk,
|
@@ -14126,14 +14162,11 @@ retry:
|
|
14126
14162
|
if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) ||
|
14127
14163
|
chunk - left > env->me_maxgc_ov1page) {
|
14128
14164
|
data.iov_len = (left + 1) * sizeof(pgno_t);
|
14129
|
-
if (ctx->loop < 7)
|
14130
|
-
txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
|
14131
14165
|
}
|
14132
14166
|
chunk = left;
|
14133
14167
|
}
|
14134
|
-
rc = mdbx_cursor_put(&ctx->cursor
|
14168
|
+
rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
|
14135
14169
|
MDBX_CURRENT | MDBX_RESERVE);
|
14136
|
-
txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
|
14137
14170
|
if (unlikely(rc != MDBX_SUCCESS))
|
14138
14171
|
goto bailout;
|
14139
14172
|
gcu_clean_reserved(env, data);
|
@@ -14182,7 +14215,7 @@ retry:
|
|
14182
14215
|
|
14183
14216
|
if (txn->tw.lifo_reclaimed == nullptr) {
|
14184
14217
|
tASSERT(txn, ctx->lifo == 0);
|
14185
|
-
rc = cursor_next(&ctx->cursor
|
14218
|
+
rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT);
|
14186
14219
|
if (unlikely(rc != MDBX_SUCCESS))
|
14187
14220
|
goto bailout;
|
14188
14221
|
} else {
|
@@ -14213,7 +14246,7 @@ retry:
|
|
14213
14246
|
ctx->cleaned_slot == MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed));
|
14214
14247
|
|
14215
14248
|
bailout:
|
14216
|
-
txn->mt_cursors[FREE_DBI] = ctx->cursor.
|
14249
|
+
txn->mt_cursors[FREE_DBI] = ctx->cursor.mc_next;
|
14217
14250
|
|
14218
14251
|
MDBX_PNL_SETSIZE(txn->tw.relist, 0);
|
14219
14252
|
#if MDBX_ENABLE_PROFGC
|
@@ -14363,7 +14396,8 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14363
14396
|
MDBX_PNL_SETSIZE(parent->tw.retired_pages, w);
|
14364
14397
|
|
14365
14398
|
/* Filter-out parent spill list */
|
14366
|
-
if (parent->tw.
|
14399
|
+
if (parent->tw.spilled.list &&
|
14400
|
+
MDBX_PNL_GETSIZE(parent->tw.spilled.list) > 0) {
|
14367
14401
|
const MDBX_PNL sl = spill_purge(parent);
|
14368
14402
|
size_t len = MDBX_PNL_GETSIZE(sl);
|
14369
14403
|
if (len) {
|
@@ -14378,7 +14412,7 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14378
14412
|
DEBUG("refund parent's spilled page %" PRIaPGNO, sl[i] >> 1);
|
14379
14413
|
i -= 1;
|
14380
14414
|
} while (i && sl[i] >= (parent->mt_next_pgno << 1));
|
14381
|
-
|
14415
|
+
MDBX_PNL_SETSIZE(sl, i);
|
14382
14416
|
#else
|
14383
14417
|
assert(MDBX_PNL_MOST(sl) == MDBX_PNL_FIRST(sl));
|
14384
14418
|
size_t i = 0;
|
@@ -14451,10 +14485,10 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14451
14485
|
}
|
14452
14486
|
|
14453
14487
|
/* Remove anything in our spill list from parent's dirty list */
|
14454
|
-
if (txn->tw.
|
14455
|
-
tASSERT(txn, pnl_check_allocated(txn->tw.
|
14488
|
+
if (txn->tw.spilled.list) {
|
14489
|
+
tASSERT(txn, pnl_check_allocated(txn->tw.spilled.list,
|
14456
14490
|
(size_t)parent->mt_next_pgno << 1));
|
14457
|
-
dpl_sift(parent, txn->tw.
|
14491
|
+
dpl_sift(parent, txn->tw.spilled.list, true);
|
14458
14492
|
tASSERT(parent,
|
14459
14493
|
parent->tw.dirtyroom + parent->tw.dirtylist->length ==
|
14460
14494
|
(parent->mt_parent ? parent->mt_parent->tw.dirtyroom
|
@@ -14606,23 +14640,23 @@ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
|
14606
14640
|
tASSERT(parent, dirtylist_check(parent));
|
14607
14641
|
dpl_free(txn);
|
14608
14642
|
|
14609
|
-
if (txn->tw.
|
14610
|
-
if (parent->tw.
|
14643
|
+
if (txn->tw.spilled.list) {
|
14644
|
+
if (parent->tw.spilled.list) {
|
14611
14645
|
/* Must not fail since space was preserved above. */
|
14612
|
-
pnl_merge(parent->tw.
|
14613
|
-
pnl_free(txn->tw.
|
14646
|
+
pnl_merge(parent->tw.spilled.list, txn->tw.spilled.list);
|
14647
|
+
pnl_free(txn->tw.spilled.list);
|
14614
14648
|
} else {
|
14615
|
-
parent->tw.
|
14616
|
-
parent->tw.
|
14649
|
+
parent->tw.spilled.list = txn->tw.spilled.list;
|
14650
|
+
parent->tw.spilled.least_removed = txn->tw.spilled.least_removed;
|
14617
14651
|
}
|
14618
14652
|
tASSERT(parent, dirtylist_check(parent));
|
14619
14653
|
}
|
14620
14654
|
|
14621
14655
|
parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
|
14622
|
-
if (parent->tw.
|
14623
|
-
assert(pnl_check_allocated(parent->tw.
|
14656
|
+
if (parent->tw.spilled.list) {
|
14657
|
+
assert(pnl_check_allocated(parent->tw.spilled.list,
|
14624
14658
|
(size_t)parent->mt_next_pgno << 1));
|
14625
|
-
if (MDBX_PNL_GETSIZE(parent->tw.
|
14659
|
+
if (MDBX_PNL_GETSIZE(parent->tw.spilled.list))
|
14626
14660
|
parent->mt_flags |= MDBX_TXN_SPILLS;
|
14627
14661
|
}
|
14628
14662
|
}
|
@@ -14693,8 +14727,8 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
|
14693
14727
|
sizeof(parent->mt_geo)) == 0);
|
14694
14728
|
tASSERT(txn, memcmp(&parent->mt_canary, &txn->mt_canary,
|
14695
14729
|
sizeof(parent->mt_canary)) == 0);
|
14696
|
-
tASSERT(txn, !txn->tw.
|
14697
|
-
MDBX_PNL_GETSIZE(txn->tw.
|
14730
|
+
tASSERT(txn, !txn->tw.spilled.list ||
|
14731
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0);
|
14698
14732
|
tASSERT(txn, txn->tw.loose_count == 0);
|
14699
14733
|
|
14700
14734
|
/* fast completion of pure nested transaction */
|
@@ -14714,10 +14748,10 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
|
14714
14748
|
goto fail;
|
14715
14749
|
}
|
14716
14750
|
|
14717
|
-
if (txn->tw.
|
14718
|
-
if (parent->tw.
|
14719
|
-
rc = pnl_need(&parent->tw.
|
14720
|
-
MDBX_PNL_GETSIZE(txn->tw.
|
14751
|
+
if (txn->tw.spilled.list) {
|
14752
|
+
if (parent->tw.spilled.list) {
|
14753
|
+
rc = pnl_need(&parent->tw.spilled.list,
|
14754
|
+
MDBX_PNL_GETSIZE(txn->tw.spilled.list));
|
14721
14755
|
if (unlikely(rc != MDBX_SUCCESS))
|
14722
14756
|
goto fail;
|
14723
14757
|
}
|
@@ -15837,7 +15871,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending,
|
|
15837
15871
|
}
|
15838
15872
|
|
15839
15873
|
uint64_t timestamp = 0;
|
15840
|
-
while ("workaround for https://
|
15874
|
+
while ("workaround for https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
|
15841
15875
|
rc =
|
15842
15876
|
coherency_check_written(env, pending->unsafe_txnid, target, ×tamp);
|
15843
15877
|
if (likely(rc == MDBX_SUCCESS))
|
@@ -16359,7 +16393,7 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
|
16359
16393
|
|
16360
16394
|
uint64_t timestamp = 0;
|
16361
16395
|
while ("workaround for "
|
16362
|
-
"https://
|
16396
|
+
"https://libmdbx.dqdkfa.ru/dead-github/issues/269") {
|
16363
16397
|
meta = *head.ptr_c;
|
16364
16398
|
rc = coherency_check_readed(env, head.txnid, meta.mm_dbs, &meta,
|
16365
16399
|
×tamp);
|
@@ -17503,13 +17537,13 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx,
|
|
17503
17537
|
}
|
17504
17538
|
#else
|
17505
17539
|
struct stat st;
|
17506
|
-
if (stat(pathname, &st)) {
|
17540
|
+
if (stat(pathname, &st) != 0) {
|
17507
17541
|
rc = errno;
|
17508
17542
|
if (rc != MDBX_ENOFILE)
|
17509
17543
|
return rc;
|
17510
17544
|
if (mode == 0 || (*flags & MDBX_RDONLY) != 0)
|
17511
17545
|
/* can't open existing */
|
17512
|
-
return rc
|
17546
|
+
return rc /* MDBX_ENOFILE */;
|
17513
17547
|
|
17514
17548
|
/* auto-create directory if requested */
|
17515
17549
|
const mdbx_mode_t dir_mode =
|
@@ -17702,7 +17736,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
|
|
17702
17736
|
} else {
|
17703
17737
|
#if MDBX_MMAP_INCOHERENT_FILE_WRITE
|
17704
17738
|
/* Temporary `workaround` for OpenBSD kernel's flaw.
|
17705
|
-
* See https://
|
17739
|
+
* See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */
|
17706
17740
|
if ((flags & MDBX_WRITEMAP) == 0) {
|
17707
17741
|
if (flags & MDBX_ACCEDE)
|
17708
17742
|
flags |= MDBX_WRITEMAP;
|
@@ -18014,7 +18048,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname,
|
|
18014
18048
|
}
|
18015
18049
|
|
18016
18050
|
if ((flags & MDBX_RDONLY) == 0) {
|
18017
|
-
const size_t tsize = sizeof(MDBX_txn),
|
18051
|
+
const size_t tsize = sizeof(MDBX_txn) + sizeof(MDBX_cursor),
|
18018
18052
|
size = tsize + env->me_maxdbs *
|
18019
18053
|
(sizeof(MDBX_db) + sizeof(MDBX_cursor *) +
|
18020
18054
|
sizeof(MDBX_atomic_uint32_t) + 1);
|
@@ -18139,9 +18173,10 @@ __cold static int env_close(MDBX_env *env) {
|
|
18139
18173
|
}
|
18140
18174
|
|
18141
18175
|
if (env->me_dbxs) {
|
18142
|
-
for (size_t i = env->me_numdbs;
|
18176
|
+
for (size_t i = CORE_DBS; i < env->me_numdbs; ++i)
|
18143
18177
|
osal_free(env->me_dbxs[i].md_name.iov_base);
|
18144
18178
|
osal_free(env->me_dbxs);
|
18179
|
+
env->me_numdbs = CORE_DBS;
|
18145
18180
|
env->me_dbxs = nullptr;
|
18146
18181
|
}
|
18147
18182
|
if (env->me_pbuf) {
|
@@ -18164,7 +18199,7 @@ __cold static int env_close(MDBX_env *env) {
|
|
18164
18199
|
dpl_free(env->me_txn0);
|
18165
18200
|
txl_free(env->me_txn0->tw.lifo_reclaimed);
|
18166
18201
|
pnl_free(env->me_txn0->tw.retired_pages);
|
18167
|
-
pnl_free(env->me_txn0->tw.
|
18202
|
+
pnl_free(env->me_txn0->tw.spilled.list);
|
18168
18203
|
pnl_free(env->me_txn0->tw.relist);
|
18169
18204
|
osal_free(env->me_txn0);
|
18170
18205
|
env->me_txn0 = nullptr;
|
@@ -18907,7 +18942,8 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
|
|
18907
18942
|
if (!MDBX_DISABLE_VALIDATION) {
|
18908
18943
|
const MDBX_env *env = mc->mc_txn->mt_env;
|
18909
18944
|
const size_t dsize = data->iov_len;
|
18910
|
-
if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax)
|
18945
|
+
if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax) &&
|
18946
|
+
mc->mc_dbi != FREE_DBI)
|
18911
18947
|
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
18912
18948
|
const unsigned npages = number_of_ovpages(env, dsize);
|
18913
18949
|
if (unlikely(lp.page->mp_pages != npages)) {
|
@@ -18915,7 +18951,7 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
|
|
18915
18951
|
return bad_page(lp.page,
|
18916
18952
|
"too less n-pages %u for bigdata-node (%zu bytes)",
|
18917
18953
|
lp.page->mp_pages, dsize);
|
18918
|
-
else
|
18954
|
+
else if (mc->mc_dbi != FREE_DBI)
|
18919
18955
|
poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)",
|
18920
18956
|
lp.page->mp_pages, dsize);
|
18921
18957
|
}
|
@@ -20011,7 +20047,6 @@ static int touch_dbi(MDBX_cursor *mc) {
|
|
20011
20047
|
*mc->mc_dbistate |= DBI_DIRTY;
|
20012
20048
|
mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
|
20013
20049
|
if (mc->mc_dbi >= CORE_DBS) {
|
20014
|
-
cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_UPDATE_GC) == 0);
|
20015
20050
|
/* Touch DB record of named DB */
|
20016
20051
|
MDBX_cursor_couple cx;
|
20017
20052
|
int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI);
|
@@ -20424,9 +20459,9 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|
20424
20459
|
|
20425
20460
|
/* Large/Overflow page overwrites need special handling */
|
20426
20461
|
if (unlikely(node_flags(node) & F_BIGDATA)) {
|
20427
|
-
|
20428
|
-
|
20429
|
-
|
20462
|
+
const size_t dpages = (node_size(key, data) > env->me_leaf_nodemax)
|
20463
|
+
? number_of_ovpages(env, data->iov_len)
|
20464
|
+
: 0;
|
20430
20465
|
|
20431
20466
|
const pgno_t pgno = node_largedata_pgno(node);
|
20432
20467
|
pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid);
|
@@ -20435,13 +20470,13 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|
20435
20470
|
cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW);
|
20436
20471
|
|
20437
20472
|
/* Is the ov page from this txn (or a parent) and big enough? */
|
20438
|
-
|
20439
|
-
|
20440
|
-
(
|
20441
|
-
|
20442
|
-
|
20443
|
-
|
20444
|
-
|
20473
|
+
const size_t ovpages = lp.page->mp_pages;
|
20474
|
+
const size_t extra_threshold =
|
20475
|
+
(mc->mc_dbi == FREE_DBI)
|
20476
|
+
? 1
|
20477
|
+
: /* LY: add configurable threshold to keep reserve space */ 0;
|
20478
|
+
if (!IS_FROZEN(mc->mc_txn, lp.page) && ovpages >= dpages &&
|
20479
|
+
ovpages <= dpages + extra_threshold) {
|
20445
20480
|
/* yes, overwrite it. */
|
20446
20481
|
if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) {
|
20447
20482
|
if (IS_SPILLED(mc->mc_txn, lp.page)) {
|
@@ -20972,7 +21007,6 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
|
|
20972
21007
|
|
20973
21008
|
DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno);
|
20974
21009
|
ret.page->mp_flags = (uint16_t)flags;
|
20975
|
-
ret.page->mp_txnid = mc->mc_txn->mt_front;
|
20976
21010
|
cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
|
20977
21011
|
cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
|
20978
21012
|
#if MDBX_ENABLE_PGOP_STAT
|
@@ -20994,25 +21028,24 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
|
|
20994
21028
|
return ret;
|
20995
21029
|
}
|
20996
21030
|
|
20997
|
-
static pgr_t page_new_large(MDBX_cursor *mc, const
|
21031
|
+
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) {
|
20998
21032
|
pgr_t ret = likely(npages == 1)
|
20999
21033
|
? page_alloc(mc)
|
21000
|
-
: page_alloc_slowpath(mc, npages,
|
21034
|
+
: page_alloc_slowpath(mc, npages, MDBX_ALLOC_DEFAULT);
|
21001
21035
|
if (unlikely(ret.err != MDBX_SUCCESS))
|
21002
21036
|
return ret;
|
21003
21037
|
|
21004
|
-
DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %
|
21038
|
+
DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi,
|
21005
21039
|
ret.page->mp_pgno, npages);
|
21006
21040
|
ret.page->mp_flags = P_OVERFLOW;
|
21007
|
-
ret.page->mp_txnid = mc->mc_txn->mt_front;
|
21008
21041
|
cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY);
|
21009
21042
|
cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY);
|
21010
21043
|
#if MDBX_ENABLE_PGOP_STAT
|
21011
21044
|
mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages;
|
21012
21045
|
#endif /* MDBX_ENABLE_PGOP_STAT */
|
21013
21046
|
|
21014
|
-
mc->mc_db->md_overflow_pages += npages;
|
21015
|
-
ret.page->mp_pages = npages;
|
21047
|
+
mc->mc_db->md_overflow_pages += (pgno_t)npages;
|
21048
|
+
ret.page->mp_pages = (pgno_t)npages;
|
21016
21049
|
cASSERT(mc, !(mc->mc_flags & C_SUB));
|
21017
21050
|
return ret;
|
21018
21051
|
}
|
@@ -21109,7 +21142,6 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21109
21142
|
key ? key->iov_len : 0, DKEY_DEBUG(key));
|
21110
21143
|
cASSERT(mc, key != NULL && data != NULL);
|
21111
21144
|
cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF);
|
21112
|
-
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
21113
21145
|
MDBX_page *largepage = NULL;
|
21114
21146
|
|
21115
21147
|
size_t node_bytes;
|
@@ -21118,6 +21150,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21118
21150
|
STATIC_ASSERT(sizeof(pgno_t) % 2 == 0);
|
21119
21151
|
node_bytes =
|
21120
21152
|
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
21153
|
+
cASSERT(mc, page_room(mp) >= node_bytes);
|
21121
21154
|
} else if (unlikely(node_size(key, data) >
|
21122
21155
|
mc->mc_txn->mt_env->me_leaf_nodemax)) {
|
21123
21156
|
/* Put data on large/overflow page. */
|
@@ -21131,6 +21164,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21131
21164
|
flags);
|
21132
21165
|
return MDBX_PROBLEM;
|
21133
21166
|
}
|
21167
|
+
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
21134
21168
|
const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len);
|
21135
21169
|
const pgr_t npr = page_new_large(mc, ovpages);
|
21136
21170
|
if (unlikely(npr.err != MDBX_SUCCESS))
|
@@ -21142,10 +21176,12 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|
21142
21176
|
flags |= F_BIGDATA;
|
21143
21177
|
node_bytes =
|
21144
21178
|
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
21179
|
+
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
21145
21180
|
} else {
|
21181
|
+
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
21146
21182
|
node_bytes = node_size(key, data) + sizeof(indx_t);
|
21183
|
+
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
21147
21184
|
}
|
21148
|
-
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
21149
21185
|
|
21150
21186
|
/* Move higher pointers up one slot. */
|
21151
21187
|
const size_t nkeys = page_numkeys(mp);
|
@@ -22886,7 +22922,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
|
|
22886
22922
|
"big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
|
22887
22923
|
dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
|
22888
22924
|
if (unlikely(node_size_len(node_ks(node), dsize) <=
|
22889
|
-
mc->mc_txn->mt_env->me_leaf_nodemax)
|
22925
|
+
mc->mc_txn->mt_env->me_leaf_nodemax) &&
|
22926
|
+
mc->mc_dbi != FREE_DBI)
|
22890
22927
|
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
22891
22928
|
|
22892
22929
|
if ((mc->mc_checking & CC_RETIRING) == 0) {
|
@@ -22901,7 +22938,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
|
|
22901
22938
|
rc = bad_page(lp.page,
|
22902
22939
|
"too less n-pages %u for bigdata-node (%zu bytes)",
|
22903
22940
|
lp.page->mp_pages, dsize);
|
22904
|
-
else
|
22941
|
+
else if (mc->mc_dbi != FREE_DBI)
|
22905
22942
|
poor_page(lp.page,
|
22906
22943
|
"extra n-pages %u for bigdata-node (%zu bytes)",
|
22907
22944
|
lp.page->mp_pages, dsize);
|
@@ -23327,7 +23364,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
|
|
23327
23364
|
int rc = MDBX_SUCCESS, foliage = 0;
|
23328
23365
|
size_t i, ptop;
|
23329
23366
|
MDBX_env *const env = mc->mc_txn->mt_env;
|
23330
|
-
MDBX_val
|
23367
|
+
MDBX_val rkey, xdata;
|
23331
23368
|
MDBX_page *tmp_ki_copy = NULL;
|
23332
23369
|
DKBUF;
|
23333
23370
|
|
@@ -23419,6 +23456,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
|
|
23419
23456
|
eASSERT(env, split_indx >= minkeys && split_indx <= nkeys - minkeys + 1);
|
23420
23457
|
|
23421
23458
|
cASSERT(mc, !IS_BRANCH(mp) || newindx > 0);
|
23459
|
+
MDBX_val sepkey = {nullptr, 0};
|
23422
23460
|
/* It is reasonable and possible to split the page at the begin */
|
23423
23461
|
if (unlikely(newindx < minkeys)) {
|
23424
23462
|
split_indx = minkeys;
|
@@ -23751,7 +23789,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey,
|
|
23751
23789
|
break;
|
23752
23790
|
}
|
23753
23791
|
}
|
23754
|
-
} else if (!IS_LEAF2(mp)) {
|
23792
|
+
} else if (tmp_ki_copy /* !IS_LEAF2(mp) */) {
|
23755
23793
|
/* Move nodes */
|
23756
23794
|
mc->mc_pg[mc->mc_top] = sister;
|
23757
23795
|
i = split_indx;
|
@@ -25053,7 +25091,7 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn,
|
|
25053
25091
|
const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat);
|
25054
25092
|
|
25055
25093
|
/* is the environment open?
|
25056
|
-
* (https://
|
25094
|
+
* (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */
|
25057
25095
|
if (unlikely(!env->me_map)) {
|
25058
25096
|
/* environment not yet opened */
|
25059
25097
|
#if 1
|
@@ -27864,7 +27902,7 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn,
|
|
27864
27902
|
if (getrlimit(RLIMIT_RSS, &rss) == 0 && rss.rlim_cur < estimated_rss) {
|
27865
27903
|
rss.rlim_cur = estimated_rss;
|
27866
27904
|
if (rss.rlim_max < estimated_rss)
|
27867
|
-
rss.rlim_max =
|
27905
|
+
rss.rlim_max = estimated_rss;
|
27868
27906
|
if (setrlimit(RLIMIT_RSS, &rss)) {
|
27869
27907
|
rc = errno;
|
27870
27908
|
WARNING("setrlimit(%s, {%zu, %zu}) error %d", "RLIMIT_RSS",
|
@@ -29696,7 +29734,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
|
29696
29734
|
flags |= O_CLOEXEC;
|
29697
29735
|
#endif /* O_CLOEXEC */
|
29698
29736
|
|
29699
|
-
/* Safeguard for https://
|
29737
|
+
/* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
|
29700
29738
|
#if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
|
29701
29739
|
int stub_fd0 = -1, stub_fd1 = -1, stub_fd2 = -1;
|
29702
29740
|
static const char dev_null[] = "/dev/null";
|
@@ -29734,7 +29772,7 @@ MDBX_INTERNAL_FUNC int osal_openfile(const enum osal_openfile_purpose purpose,
|
|
29734
29772
|
errno = EACCES /* restore errno if file exists */;
|
29735
29773
|
}
|
29736
29774
|
|
29737
|
-
/* Safeguard for https://
|
29775
|
+
/* Safeguard for https://libmdbx.dqdkfa.ru/dead-github/issues/144 */
|
29738
29776
|
#if STDIN_FILENO == 0 && STDOUT_FILENO == 1 && STDERR_FILENO == 2
|
29739
29777
|
if (*fd == STDIN_FILENO) {
|
29740
29778
|
WARNING("Got STD%s_FILENO/%d, avoid using it by dup(fd)", "IN",
|
@@ -30091,10 +30129,15 @@ MDBX_INTERNAL_FUNC int osal_msync(const osal_mmap_t *map, size_t offset,
|
|
30091
30129
|
return (int)GetLastError();
|
30092
30130
|
#else
|
30093
30131
|
#if defined(__linux__) || defined(__gnu_linux__)
|
30094
|
-
assert(linux_kernel_version > 0x02061300);
|
30095
30132
|
/* Since Linux 2.6.19, MS_ASYNC is in fact a no-op. The kernel properly
|
30096
|
-
* tracks dirty pages and flushes
|
30097
|
-
|
30133
|
+
* tracks dirty pages and flushes ones as necessary. */
|
30134
|
+
//
|
30135
|
+
// However, this behavior may be changed in custom kernels,
|
30136
|
+
// so just leave such optimization to the libc discretion.
|
30137
|
+
//
|
30138
|
+
// assert(linux_kernel_version > 0x02061300);
|
30139
|
+
// if (mode_bits == MDBX_SYNC_NONE)
|
30140
|
+
// return MDBX_SUCCESS;
|
30098
30141
|
#endif /* Linux */
|
30099
30142
|
if (msync(ptr, length, (mode_bits & MDBX_SYNC_DATA) ? MS_SYNC : MS_ASYNC))
|
30100
30143
|
return errno;
|
@@ -30577,7 +30620,7 @@ MDBX_INTERNAL_FUNC int osal_munmap(osal_mmap_t *map) {
|
|
30577
30620
|
VALGRIND_MAKE_MEM_NOACCESS(map->address, map->current);
|
30578
30621
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30579
30622
|
* when this memory will re-used by malloc or another mmapping.
|
30580
|
-
* See https://
|
30623
|
+
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30581
30624
|
*/
|
30582
30625
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address,
|
30583
30626
|
(map->filesize && map->filesize < map->limit)
|
@@ -30656,7 +30699,7 @@ MDBX_INTERNAL_FUNC int osal_mresize(const int flags, osal_mmap_t *map,
|
|
30656
30699
|
|
30657
30700
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30658
30701
|
* when this memory will re-used by malloc or another mmapping.
|
30659
|
-
* See https://
|
30702
|
+
* See https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30660
30703
|
*/
|
30661
30704
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(map->address, map->limit);
|
30662
30705
|
status = NtUnmapViewOfSection(GetCurrentProcess(), map->address);
|
@@ -30937,7 +30980,7 @@ retry_mapview:;
|
|
30937
30980
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30938
30981
|
* when this memory will re-used by malloc or another mmapping.
|
30939
30982
|
* See
|
30940
|
-
* https://
|
30983
|
+
* https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30941
30984
|
*/
|
30942
30985
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(
|
30943
30986
|
map->address,
|
@@ -30959,7 +31002,7 @@ retry_mapview:;
|
|
30959
31002
|
/* Unpoisoning is required for ASAN to avoid false-positive diagnostic
|
30960
31003
|
* when this memory will re-used by malloc or another mmapping.
|
30961
31004
|
* See
|
30962
|
-
* https://
|
31005
|
+
* https://libmdbx.dqdkfa.ru/dead-github/pull/93#issuecomment-613687203
|
30963
31006
|
*/
|
30964
31007
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(
|
30965
31008
|
map->address, (map->current < map->limit) ? map->current : map->limit);
|
@@ -31782,9 +31825,9 @@ __dll_export
|
|
31782
31825
|
0,
|
31783
31826
|
12,
|
31784
31827
|
2,
|
31785
|
-
|
31786
|
-
{"2022-11-
|
31787
|
-
"v0.12.2-
|
31828
|
+
18,
|
31829
|
+
{"2022-11-28T15:45:29+03:00", "9558651eb24ab172a73a7bc6149cadad4c4df990", "b3248442962cfdda728656d6d9085147a7d42b63",
|
31830
|
+
"v0.12.2-18-gb3248442"},
|
31788
31831
|
sourcery};
|
31789
31832
|
|
31790
31833
|
__dll_export
|