lmdb 0.7.3 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -4
- data/Rakefile +48 -0
- data/ext/lmdb_ext/extconf.rb +27 -10
- data/ext/lmdb_ext/lmdb_ext.c +22 -8
- data/lib/lmdb/database.rb +2 -1
- data/lib/lmdb/version.rb +1 -1
- data/lmdb.gemspec +4 -1
- data/spec/lmdb_spec.rb +9 -0
- data/vendor/liblmdb/VERSION +1 -0
- data/vendor/{libraries/liblmdb → liblmdb}/lmdb.h +18 -63
- data/vendor/{libraries/liblmdb → liblmdb}/mdb.c +583 -1480
- data/vendor/{libraries/liblmdb → liblmdb}/midl.c +0 -62
- data/vendor/{libraries/liblmdb → liblmdb}/midl.h +4 -16
- metadata +7 -34
- data/CONTRIBUTORS +0 -8
- data/behaviour.org +0 -35
- data/ext/lmdb_ext/prototypes.sh +0 -4
- data/vendor/libraries/liblmdb/.gitignore +0 -24
- data/vendor/libraries/liblmdb/COPYRIGHT +0 -20
- data/vendor/libraries/liblmdb/Doxyfile +0 -1631
- data/vendor/libraries/liblmdb/LICENSE +0 -47
- data/vendor/libraries/liblmdb/Makefile +0 -118
- data/vendor/libraries/liblmdb/intro.doc +0 -192
- data/vendor/libraries/liblmdb/mdb_copy.1 +0 -61
- data/vendor/libraries/liblmdb/mdb_copy.c +0 -84
- data/vendor/libraries/liblmdb/mdb_drop.1 +0 -40
- data/vendor/libraries/liblmdb/mdb_drop.c +0 -135
- data/vendor/libraries/liblmdb/mdb_dump.1 +0 -81
- data/vendor/libraries/liblmdb/mdb_dump.c +0 -319
- data/vendor/libraries/liblmdb/mdb_load.1 +0 -84
- data/vendor/libraries/liblmdb/mdb_load.c +0 -492
- data/vendor/libraries/liblmdb/mdb_stat.1 +0 -70
- data/vendor/libraries/liblmdb/mdb_stat.c +0 -264
- data/vendor/libraries/liblmdb/mtest.c +0 -177
- data/vendor/libraries/liblmdb/mtest2.c +0 -124
- data/vendor/libraries/liblmdb/mtest3.c +0 -133
- data/vendor/libraries/liblmdb/mtest4.c +0 -168
- data/vendor/libraries/liblmdb/mtest5.c +0 -135
- data/vendor/libraries/liblmdb/mtest6.c +0 -141
- data/vendor/libraries/liblmdb/sample-bdb.txt +0 -73
- data/vendor/libraries/liblmdb/sample-mdb.txt +0 -62
- data/vendor/libraries/liblmdb/tooltag +0 -27
|
@@ -35,7 +35,7 @@
|
|
|
35
35
|
#ifndef _GNU_SOURCE
|
|
36
36
|
#define _GNU_SOURCE 1
|
|
37
37
|
#endif
|
|
38
|
-
#if defined(
|
|
38
|
+
#if defined(__WIN64__)
|
|
39
39
|
#define _FILE_OFFSET_BITS 64
|
|
40
40
|
#endif
|
|
41
41
|
#ifdef _WIN32
|
|
@@ -43,41 +43,6 @@
|
|
|
43
43
|
#include <windows.h>
|
|
44
44
|
#include <wchar.h> /* get wcscpy() */
|
|
45
45
|
|
|
46
|
-
/* We use native NT APIs to setup the memory map, so that we can
|
|
47
|
-
* let the DB file grow incrementally instead of always preallocating
|
|
48
|
-
* the full size. These APIs are defined in <wdm.h> and <ntifs.h>
|
|
49
|
-
* but those headers are meant for driver-level development and
|
|
50
|
-
* conflict with the regular user-level headers, so we explicitly
|
|
51
|
-
* declare them here. We get pointers to these functions from
|
|
52
|
-
* NTDLL.DLL at runtime, to avoid buildtime dependencies on any
|
|
53
|
-
* NTDLL import libraries.
|
|
54
|
-
*/
|
|
55
|
-
typedef NTSTATUS (WINAPI NtCreateSectionFunc)
|
|
56
|
-
(OUT PHANDLE sh, IN ACCESS_MASK acc,
|
|
57
|
-
IN void * oa OPTIONAL,
|
|
58
|
-
IN PLARGE_INTEGER ms OPTIONAL,
|
|
59
|
-
IN ULONG pp, IN ULONG aa, IN HANDLE fh OPTIONAL);
|
|
60
|
-
|
|
61
|
-
static NtCreateSectionFunc *NtCreateSection;
|
|
62
|
-
|
|
63
|
-
typedef enum _SECTION_INHERIT {
|
|
64
|
-
ViewShare = 1,
|
|
65
|
-
ViewUnmap = 2
|
|
66
|
-
} SECTION_INHERIT;
|
|
67
|
-
|
|
68
|
-
typedef NTSTATUS (WINAPI NtMapViewOfSectionFunc)
|
|
69
|
-
(IN PHANDLE sh, IN HANDLE ph,
|
|
70
|
-
IN OUT PVOID *addr, IN ULONG_PTR zbits,
|
|
71
|
-
IN SIZE_T cs, IN OUT PLARGE_INTEGER off OPTIONAL,
|
|
72
|
-
IN OUT PSIZE_T vs, IN SECTION_INHERIT ih,
|
|
73
|
-
IN ULONG at, IN ULONG pp);
|
|
74
|
-
|
|
75
|
-
static NtMapViewOfSectionFunc *NtMapViewOfSection;
|
|
76
|
-
|
|
77
|
-
typedef NTSTATUS (WINAPI NtCloseFunc)(HANDLE h);
|
|
78
|
-
|
|
79
|
-
static NtCloseFunc *NtClose;
|
|
80
|
-
|
|
81
46
|
/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
|
|
82
47
|
* as int64 which is wrong. MSVC doesn't define it at all, so just
|
|
83
48
|
* don't use it.
|
|
@@ -96,7 +61,6 @@ static NtCloseFunc *NtClose;
|
|
|
96
61
|
# define SSIZE_MAX INT_MAX
|
|
97
62
|
# endif
|
|
98
63
|
#endif
|
|
99
|
-
#define MDB_OFF_T int64_t
|
|
100
64
|
#else
|
|
101
65
|
#include <sys/types.h>
|
|
102
66
|
#include <sys/stat.h>
|
|
@@ -109,13 +73,11 @@ static NtCloseFunc *NtClose;
|
|
|
109
73
|
#include <sys/file.h>
|
|
110
74
|
#endif
|
|
111
75
|
#include <fcntl.h>
|
|
112
|
-
#define MDB_OFF_T off_t
|
|
113
76
|
#endif
|
|
114
77
|
|
|
115
78
|
#if defined(__mips) && defined(__linux)
|
|
116
79
|
/* MIPS has cache coherency issues, requires explicit cache control */
|
|
117
|
-
#include <
|
|
118
|
-
extern int cacheflush(char *addr, int nbytes, int cache);
|
|
80
|
+
#include <sys/cachectl.h>
|
|
119
81
|
#define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache)
|
|
120
82
|
#else
|
|
121
83
|
#define CACHEFLUSH(addr, bytes, cache)
|
|
@@ -146,7 +108,7 @@ typedef SSIZE_T ssize_t;
|
|
|
146
108
|
#include <unistd.h>
|
|
147
109
|
#endif
|
|
148
110
|
|
|
149
|
-
#if defined(__sun) || defined(
|
|
111
|
+
#if defined(__sun) || defined(ANDROID)
|
|
150
112
|
/* Most platforms have posix_memalign, older may only have memalign */
|
|
151
113
|
#define HAVE_MEMALIGN 1
|
|
152
114
|
#include <malloc.h>
|
|
@@ -165,36 +127,36 @@ typedef SSIZE_T ssize_t;
|
|
|
165
127
|
# define MDB_USE_POSIX_MUTEX 1
|
|
166
128
|
# define MDB_USE_ROBUST 1
|
|
167
129
|
#elif defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__)
|
|
168
|
-
#
|
|
169
|
-
#
|
|
130
|
+
# define MDB_USE_POSIX_SEM 1
|
|
131
|
+
# if defined(__APPLE__)
|
|
132
|
+
# define MDB_FDATASYNC(fd) fcntl(fd, F_FULLFSYNC)
|
|
133
|
+
# else
|
|
134
|
+
# define MDB_FDATASYNC fsync
|
|
170
135
|
# endif
|
|
136
|
+
#elif defined(ANDROID)
|
|
171
137
|
# define MDB_FDATASYNC fsync
|
|
172
|
-
#elif defined(
|
|
138
|
+
#elif defined(__HAIKU__)
|
|
139
|
+
# define MDB_USE_POSIX_SEM 1
|
|
173
140
|
# define MDB_FDATASYNC fsync
|
|
174
141
|
#endif
|
|
175
142
|
|
|
143
|
+
/* NetBSD does not define union semun in sys/sem.h */
|
|
144
|
+
#if defined(__NetBSD__) && !defined(_SEM_SEMUN_UNDEFINED)
|
|
145
|
+
# define _SEM_SEMUN_UNDEFINED 1
|
|
146
|
+
#endif
|
|
147
|
+
|
|
176
148
|
#ifndef _WIN32
|
|
177
149
|
#include <pthread.h>
|
|
178
150
|
#include <signal.h>
|
|
179
151
|
#ifdef MDB_USE_POSIX_SEM
|
|
180
152
|
# define MDB_USE_HASH 1
|
|
181
153
|
#include <semaphore.h>
|
|
182
|
-
#elif defined(MDB_USE_SYSV_SEM)
|
|
183
|
-
#include <sys/ipc.h>
|
|
184
|
-
#include <sys/sem.h>
|
|
185
|
-
#ifdef _SEM_SEMUN_UNDEFINED
|
|
186
|
-
union semun {
|
|
187
|
-
int val;
|
|
188
|
-
struct semid_ds *buf;
|
|
189
|
-
unsigned short *array;
|
|
190
|
-
};
|
|
191
|
-
#endif /* _SEM_SEMUN_UNDEFINED */
|
|
192
154
|
#else
|
|
193
155
|
#define MDB_USE_POSIX_MUTEX 1
|
|
194
|
-
#endif
|
|
195
|
-
#endif
|
|
156
|
+
#endif
|
|
157
|
+
#endif
|
|
196
158
|
|
|
197
|
-
#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM)
|
|
159
|
+
#if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \
|
|
198
160
|
+ defined(MDB_USE_POSIX_MUTEX) != 1
|
|
199
161
|
# error "Ambiguous shared-lock implementation"
|
|
200
162
|
#endif
|
|
@@ -245,19 +207,25 @@ union semun {
|
|
|
245
207
|
|
|
246
208
|
#if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN)
|
|
247
209
|
# error "Unknown or unsupported endianness (BYTE_ORDER)"
|
|
248
|
-
#elif (-6 & 5) || CHAR_BIT!=8 || UINT_MAX
|
|
210
|
+
#elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
|
|
249
211
|
# error "Two's complement, reasonably sized integer types, please"
|
|
250
212
|
#endif
|
|
251
213
|
|
|
252
|
-
#
|
|
253
|
-
/**
|
|
254
|
-
|
|
255
|
-
#
|
|
214
|
+
#if (((__clang_major__ << 8) | __clang_minor__) >= 0x0302) || (((__GNUC__ << 8) | __GNUC_MINOR__) >= 0x0403)
|
|
215
|
+
/** Mark infrequently used env functions as cold. This puts them in a separate
|
|
216
|
+
* section, and optimizes them for size */
|
|
217
|
+
#define ESECT __attribute__ ((cold))
|
|
218
|
+
#else
|
|
219
|
+
/* On older compilers, use a separate section */
|
|
220
|
+
# ifdef __GNUC__
|
|
221
|
+
# ifdef __APPLE__
|
|
222
|
+
# define ESECT __attribute__ ((section("__TEXT,text_env")))
|
|
223
|
+
# else
|
|
224
|
+
# define ESECT __attribute__ ((section("text_env")))
|
|
225
|
+
# endif
|
|
256
226
|
# else
|
|
257
|
-
# define
|
|
227
|
+
# define ESECT
|
|
258
228
|
# endif
|
|
259
|
-
#else
|
|
260
|
-
#define ESECT
|
|
261
229
|
#endif
|
|
262
230
|
|
|
263
231
|
#ifdef _WIN32
|
|
@@ -296,8 +264,6 @@ union semun {
|
|
|
296
264
|
#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10)
|
|
297
265
|
#ifdef _WIN32
|
|
298
266
|
#define MDB_OWNERDEAD ((int) WAIT_ABANDONED)
|
|
299
|
-
#elif defined MDB_USE_SYSV_SEM
|
|
300
|
-
#define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11)
|
|
301
267
|
#elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD)
|
|
302
268
|
#define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */
|
|
303
269
|
#endif
|
|
@@ -308,14 +274,13 @@ union semun {
|
|
|
308
274
|
/** Some platforms define the EOWNERDEAD error code
|
|
309
275
|
* even though they don't support Robust Mutexes.
|
|
310
276
|
* Compile with -DMDB_USE_ROBUST=0, or use some other
|
|
311
|
-
* mechanism like -
|
|
312
|
-
* -DMDB_USE_POSIX_MUTEX.
|
|
313
|
-
*
|
|
314
|
-
* either.)
|
|
277
|
+
* mechanism like -DMDB_USE_POSIX_SEM instead of
|
|
278
|
+
* -DMDB_USE_POSIX_MUTEX.
|
|
279
|
+
* (Posix semaphores are not robust.)
|
|
315
280
|
*/
|
|
316
281
|
#ifndef MDB_USE_ROBUST
|
|
317
282
|
/* Android currently lacks Robust Mutex support. So does glibc < 2.4. */
|
|
318
|
-
# if defined(MDB_USE_POSIX_MUTEX) && (defined(
|
|
283
|
+
# if defined(MDB_USE_POSIX_MUTEX) && (defined(ANDROID) || \
|
|
319
284
|
(defined(__GLIBC__) && GLIBC_VER < 0x020004))
|
|
320
285
|
# define MDB_USE_ROBUST 0
|
|
321
286
|
# else
|
|
@@ -375,10 +340,12 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t;
|
|
|
375
340
|
#else
|
|
376
341
|
#define MDB_PROCESS_QUERY_LIMITED_INFORMATION 0x1000
|
|
377
342
|
#endif
|
|
343
|
+
#define Z "I"
|
|
378
344
|
#else
|
|
379
345
|
#define THREAD_RET void *
|
|
380
346
|
#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg)
|
|
381
347
|
#define THREAD_FINISH(thr) pthread_join(thr,NULL)
|
|
348
|
+
#define Z "z" /**< printf format modifier for size_t */
|
|
382
349
|
|
|
383
350
|
/** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */
|
|
384
351
|
#define MDB_PIDLOCK 1
|
|
@@ -397,40 +364,6 @@ mdb_sem_wait(sem_t *sem)
|
|
|
397
364
|
return rc;
|
|
398
365
|
}
|
|
399
366
|
|
|
400
|
-
#elif defined MDB_USE_SYSV_SEM
|
|
401
|
-
|
|
402
|
-
typedef struct mdb_mutex {
|
|
403
|
-
int semid;
|
|
404
|
-
int semnum;
|
|
405
|
-
int *locked;
|
|
406
|
-
} mdb_mutex_t[1], *mdb_mutexref_t;
|
|
407
|
-
|
|
408
|
-
#define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex)
|
|
409
|
-
#define UNLOCK_MUTEX(mutex) do { \
|
|
410
|
-
struct sembuf sb = { 0, 1, SEM_UNDO }; \
|
|
411
|
-
sb.sem_num = (mutex)->semnum; \
|
|
412
|
-
*(mutex)->locked = 0; \
|
|
413
|
-
semop((mutex)->semid, &sb, 1); \
|
|
414
|
-
} while(0)
|
|
415
|
-
|
|
416
|
-
static int
|
|
417
|
-
mdb_sem_wait(mdb_mutexref_t sem)
|
|
418
|
-
{
|
|
419
|
-
int rc, *locked = sem->locked;
|
|
420
|
-
struct sembuf sb = { 0, -1, SEM_UNDO };
|
|
421
|
-
sb.sem_num = sem->semnum;
|
|
422
|
-
do {
|
|
423
|
-
if (!semop(sem->semid, &sb, 1)) {
|
|
424
|
-
rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS;
|
|
425
|
-
*locked = 1;
|
|
426
|
-
break;
|
|
427
|
-
}
|
|
428
|
-
} while ((rc = errno) == EINTR);
|
|
429
|
-
return rc;
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
#define mdb_mutex_consistent(mutex) 0
|
|
433
|
-
|
|
434
367
|
#else /* MDB_USE_POSIX_MUTEX: */
|
|
435
368
|
/** Shared mutex/semaphore as the original is stored.
|
|
436
369
|
*
|
|
@@ -451,7 +384,7 @@ typedef pthread_mutex_t *mdb_mutexref_t;
|
|
|
451
384
|
/** Mark mutex-protected data as repaired, after death of previous owner.
|
|
452
385
|
*/
|
|
453
386
|
#define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex)
|
|
454
|
-
#endif /* MDB_USE_POSIX_SEM
|
|
387
|
+
#endif /* MDB_USE_POSIX_SEM */
|
|
455
388
|
|
|
456
389
|
/** Get the error code for the last failed system function.
|
|
457
390
|
*/
|
|
@@ -476,25 +409,12 @@ typedef pthread_mutex_t *mdb_mutexref_t;
|
|
|
476
409
|
#define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE))
|
|
477
410
|
#endif
|
|
478
411
|
|
|
479
|
-
#
|
|
480
|
-
#define
|
|
481
|
-
#define Yd MDB_PRIy(d) /**< printf format for 'signed #mdb_size_t' */
|
|
482
|
-
|
|
483
|
-
#ifdef MDB_USE_SYSV_SEM
|
|
484
|
-
#define MNAME_LEN (sizeof(int))
|
|
412
|
+
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
|
|
413
|
+
#define MNAME_LEN 32
|
|
485
414
|
#else
|
|
486
415
|
#define MNAME_LEN (sizeof(pthread_mutex_t))
|
|
487
416
|
#endif
|
|
488
417
|
|
|
489
|
-
/** Initial part of #MDB_env.me_mutexname[].
|
|
490
|
-
* Changes to this code must be reflected in #MDB_LOCK_FORMAT.
|
|
491
|
-
*/
|
|
492
|
-
#ifdef _WIN32
|
|
493
|
-
#define MUTEXNAME_PREFIX "Global\\MDB"
|
|
494
|
-
#elif defined MDB_USE_POSIX_SEM
|
|
495
|
-
#define MUTEXNAME_PREFIX "/MDB"
|
|
496
|
-
#endif
|
|
497
|
-
|
|
498
418
|
/** @} */
|
|
499
419
|
|
|
500
420
|
#ifdef MDB_ROBUST_SUPPORTED
|
|
@@ -573,18 +493,26 @@ typedef MDB_ID txnid_t;
|
|
|
573
493
|
#define MDB_DEBUG 0
|
|
574
494
|
#endif
|
|
575
495
|
|
|
496
|
+
#define MDB_DBG_INFO 1
|
|
497
|
+
#define MDB_DBG_TRACE 2
|
|
498
|
+
|
|
576
499
|
#if MDB_DEBUG
|
|
577
|
-
static int mdb_debug;
|
|
500
|
+
static int mdb_debug = MDB_DBG_TRACE;
|
|
578
501
|
static txnid_t mdb_debug_start;
|
|
579
502
|
|
|
580
503
|
/** Print a debug message with printf formatting.
|
|
581
504
|
* Requires double parenthesis around 2 or more args.
|
|
582
505
|
*/
|
|
583
|
-
# define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args))
|
|
506
|
+
# define DPRINTF(args) ((void) ((mdb_debug & MDB_DBG_INFO) && DPRINTF0 args))
|
|
584
507
|
# define DPRINTF0(fmt, ...) \
|
|
585
508
|
fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__)
|
|
509
|
+
/** Trace info for replaying */
|
|
510
|
+
# define MDB_TRACE(args) ((void) ((mdb_debug & MDB_DBG_TRACE) && DPRINTF1 args))
|
|
511
|
+
# define DPRINTF1(fmt, ...) \
|
|
512
|
+
fprintf(stderr, ">%d:%s: " fmt "\n", getpid(), mdb_func_, __VA_ARGS__)
|
|
586
513
|
#else
|
|
587
514
|
# define DPRINTF(args) ((void) 0)
|
|
515
|
+
# define MDB_TRACE(args) ((void) 0)
|
|
588
516
|
#endif
|
|
589
517
|
/** Print a debug string.
|
|
590
518
|
* The string is printed literally, with no format processing.
|
|
@@ -636,11 +564,7 @@ static txnid_t mdb_debug_start;
|
|
|
636
564
|
/** The version number for a database's datafile format. */
|
|
637
565
|
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
|
|
638
566
|
/** The version number for a database's lockfile format. */
|
|
639
|
-
#define MDB_LOCK_VERSION
|
|
640
|
-
/** Number of bits representing #MDB_LOCK_VERSION in #MDB_LOCK_FORMAT.
|
|
641
|
-
* The remaining bits must leave room for #MDB_lock_desc.
|
|
642
|
-
*/
|
|
643
|
-
#define MDB_LOCK_VERSION_BITS 12
|
|
567
|
+
#define MDB_LOCK_VERSION 1
|
|
644
568
|
|
|
645
569
|
/** @brief The max size of a key we can write, or 0 for computed max.
|
|
646
570
|
*
|
|
@@ -685,6 +609,11 @@ static txnid_t mdb_debug_start;
|
|
|
685
609
|
* This is used for printing a hex dump of a key's contents.
|
|
686
610
|
*/
|
|
687
611
|
#define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1]
|
|
612
|
+
/** A data value buffer.
|
|
613
|
+
* @ingroup debug
|
|
614
|
+
* This is used for printing a hex dump of a #MDB_DUPSORT value's contents.
|
|
615
|
+
*/
|
|
616
|
+
#define DDBUF char dbuf[DKBUF_MAXKEYSIZE*2+1+2]
|
|
688
617
|
/** Display a key in hex.
|
|
689
618
|
* @ingroup debug
|
|
690
619
|
* Invoke a function to display a key in hex.
|
|
@@ -692,6 +621,7 @@ static txnid_t mdb_debug_start;
|
|
|
692
621
|
#define DKEY(x) mdb_dkey(x, kbuf)
|
|
693
622
|
#else
|
|
694
623
|
#define DKBUF
|
|
624
|
+
#define DDBUF
|
|
695
625
|
#define DKEY(x) 0
|
|
696
626
|
#endif
|
|
697
627
|
|
|
@@ -706,27 +636,12 @@ static txnid_t mdb_debug_start;
|
|
|
706
636
|
/** Round \b n up to an even number. */
|
|
707
637
|
#define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
|
|
708
638
|
|
|
709
|
-
/** Least significant 1-bit of \b n. n must be of an unsigned type. */
|
|
710
|
-
#define LOW_BIT(n) ((n) & (-(n)))
|
|
711
|
-
|
|
712
|
-
/** (log2(\b p2) % \b n), for p2 = power of 2 and 0 < n < 8. */
|
|
713
|
-
#define LOG2_MOD(p2, n) (7 - 86 / ((p2) % ((1U<<(n))-1) + 11))
|
|
714
|
-
/* Explanation: Let p2 = 2**(n*y + x), x<n and M = (1U<<n)-1. Now p2 =
|
|
715
|
-
* (M+1)**y * 2**x = 2**x (mod M). Finally "/" "happens" to return 7-x.
|
|
716
|
-
*/
|
|
717
|
-
|
|
718
|
-
/** Should be alignment of \b type. Ensure it is a power of 2. */
|
|
719
|
-
#define ALIGNOF2(type) \
|
|
720
|
-
LOW_BIT(offsetof(struct { char ch_; type align_; }, align_))
|
|
721
|
-
|
|
722
639
|
/** Used for offsets within a single page.
|
|
723
640
|
* Since memory pages are typically 4 or 8KB in size, 12-13 bits,
|
|
724
641
|
* this is plenty.
|
|
725
642
|
*/
|
|
726
643
|
typedef uint16_t indx_t;
|
|
727
644
|
|
|
728
|
-
typedef unsigned long long mdb_hash_t;
|
|
729
|
-
|
|
730
645
|
/** Default size of memory map.
|
|
731
646
|
* This is certainly too small for any actual applications. Apps should always set
|
|
732
647
|
* the size explicitly using #mdb_env_set_mapsize().
|
|
@@ -844,6 +759,14 @@ typedef struct MDB_txbody {
|
|
|
844
759
|
uint32_t mtb_magic;
|
|
845
760
|
/** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */
|
|
846
761
|
uint32_t mtb_format;
|
|
762
|
+
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
|
|
763
|
+
char mtb_rmname[MNAME_LEN];
|
|
764
|
+
#else
|
|
765
|
+
/** Mutex protecting access to this table.
|
|
766
|
+
* This is the reader table lock used with LOCK_MUTEX().
|
|
767
|
+
*/
|
|
768
|
+
mdb_mutex_t mtb_rmutex;
|
|
769
|
+
#endif
|
|
847
770
|
/** The ID of the last transaction committed to the database.
|
|
848
771
|
* This is recorded here only for convenience; the value can always
|
|
849
772
|
* be determined by reading the main database meta pages.
|
|
@@ -854,18 +777,6 @@ typedef struct MDB_txbody {
|
|
|
854
777
|
* when readers release their slots.
|
|
855
778
|
*/
|
|
856
779
|
volatile unsigned mtb_numreaders;
|
|
857
|
-
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
|
|
858
|
-
/** Binary form of names of the reader/writer locks */
|
|
859
|
-
mdb_hash_t mtb_mutexid;
|
|
860
|
-
#elif defined(MDB_USE_SYSV_SEM)
|
|
861
|
-
int mtb_semid;
|
|
862
|
-
int mtb_rlocked;
|
|
863
|
-
#else
|
|
864
|
-
/** Mutex protecting access to this table.
|
|
865
|
-
* This is the reader table lock used with LOCK_MUTEX().
|
|
866
|
-
*/
|
|
867
|
-
mdb_mutex_t mtb_rmutex;
|
|
868
|
-
#endif
|
|
869
780
|
} MDB_txbody;
|
|
870
781
|
|
|
871
782
|
/** The actual reader table definition. */
|
|
@@ -875,80 +786,30 @@ typedef struct MDB_txninfo {
|
|
|
875
786
|
#define mti_magic mt1.mtb.mtb_magic
|
|
876
787
|
#define mti_format mt1.mtb.mtb_format
|
|
877
788
|
#define mti_rmutex mt1.mtb.mtb_rmutex
|
|
789
|
+
#define mti_rmname mt1.mtb.mtb_rmname
|
|
878
790
|
#define mti_txnid mt1.mtb.mtb_txnid
|
|
879
791
|
#define mti_numreaders mt1.mtb.mtb_numreaders
|
|
880
|
-
#define mti_mutexid mt1.mtb.mtb_mutexid
|
|
881
|
-
#ifdef MDB_USE_SYSV_SEM
|
|
882
|
-
#define mti_semid mt1.mtb.mtb_semid
|
|
883
|
-
#define mti_rlocked mt1.mtb.mtb_rlocked
|
|
884
|
-
#endif
|
|
885
792
|
char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
|
|
886
793
|
} mt1;
|
|
887
|
-
#if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM))
|
|
888
794
|
union {
|
|
889
|
-
#
|
|
890
|
-
|
|
891
|
-
#define
|
|
795
|
+
#if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
|
|
796
|
+
char mt2_wmname[MNAME_LEN];
|
|
797
|
+
#define mti_wmname mt2.mt2_wmname
|
|
892
798
|
#else
|
|
893
799
|
mdb_mutex_t mt2_wmutex;
|
|
894
800
|
#define mti_wmutex mt2.mt2_wmutex
|
|
895
801
|
#endif
|
|
896
802
|
char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)];
|
|
897
803
|
} mt2;
|
|
898
|
-
#endif
|
|
899
804
|
MDB_reader mti_readers[1];
|
|
900
805
|
} MDB_txninfo;
|
|
901
806
|
|
|
902
807
|
/** Lockfile format signature: version, features and field layout */
|
|
903
808
|
#define MDB_LOCK_FORMAT \
|
|
904
|
-
((uint32_t)
|
|
905
|
-
((
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
/** Lock type and layout. Values 0-119. _WIN32 implies #MDB_PIDLOCK.
|
|
909
|
-
* Some low values are reserved for future tweaks.
|
|
910
|
-
*/
|
|
911
|
-
#ifdef _WIN32
|
|
912
|
-
# define MDB_LOCK_TYPE (0 + ALIGNOF2(mdb_hash_t)/8 % 2)
|
|
913
|
-
#elif defined MDB_USE_POSIX_SEM
|
|
914
|
-
# define MDB_LOCK_TYPE (4 + ALIGNOF2(mdb_hash_t)/8 % 2)
|
|
915
|
-
#elif defined MDB_USE_SYSV_SEM
|
|
916
|
-
# define MDB_LOCK_TYPE (8)
|
|
917
|
-
#elif defined MDB_USE_POSIX_MUTEX
|
|
918
|
-
/* We do not know the inside of a POSIX mutex and how to check if mutexes
|
|
919
|
-
* used by two executables are compatible. Just check alignment and size.
|
|
920
|
-
*/
|
|
921
|
-
# define MDB_LOCK_TYPE (10 + \
|
|
922
|
-
LOG2_MOD(ALIGNOF2(pthread_mutex_t), 5) + \
|
|
923
|
-
sizeof(pthread_mutex_t) / 4U % 22 * 5)
|
|
924
|
-
#endif
|
|
925
|
-
|
|
926
|
-
enum {
|
|
927
|
-
/** Magic number for lockfile layout and features.
|
|
928
|
-
*
|
|
929
|
-
* This *attempts* to stop liblmdb variants compiled with conflicting
|
|
930
|
-
* options from using the lockfile at the same time and thus breaking
|
|
931
|
-
* it. It describes locking types, and sizes and sometimes alignment
|
|
932
|
-
* of the various lockfile items.
|
|
933
|
-
*
|
|
934
|
-
* The detected ranges are mostly guesswork, or based simply on how
|
|
935
|
-
* big they could be without using more bits. So we can tweak them
|
|
936
|
-
* in good conscience when updating #MDB_LOCK_VERSION.
|
|
937
|
-
*/
|
|
938
|
-
MDB_lock_desc =
|
|
939
|
-
/* Default CACHELINE=64 vs. other values (have seen mention of 32-256) */
|
|
940
|
-
(CACHELINE==64 ? 0 : 1 + LOG2_MOD(CACHELINE >> (CACHELINE>64), 5))
|
|
941
|
-
+ 6 * (sizeof(MDB_PID_T)/4 % 3) /* legacy(2) to word(4/8)? */
|
|
942
|
-
+ 18 * (sizeof(pthread_t)/4 % 5) /* can be struct{id, active data} */
|
|
943
|
-
+ 90 * (sizeof(MDB_txbody) / CACHELINE % 3)
|
|
944
|
-
+ 270 * (MDB_LOCK_TYPE % 120)
|
|
945
|
-
/* The above is < 270*120 < 2**15 */
|
|
946
|
-
+ ((sizeof(txnid_t) == 8) << 15) /* 32bit/64bit */
|
|
947
|
-
+ ((sizeof(MDB_reader) > CACHELINE) << 16)
|
|
948
|
-
/* Not really needed - implied by MDB_LOCK_TYPE != (_WIN32 locking) */
|
|
949
|
-
+ (((MDB_PIDLOCK) != 0) << 17)
|
|
950
|
-
/* 18 bits total: Must be <= (32 - MDB_LOCK_VERSION_BITS). */
|
|
951
|
-
};
|
|
809
|
+
((uint32_t) \
|
|
810
|
+
((MDB_LOCK_VERSION) \
|
|
811
|
+
/* Flags which describe functionality */ \
|
|
812
|
+
+ (((MDB_PIDLOCK) != 0) << 16)))
|
|
952
813
|
/** @} */
|
|
953
814
|
|
|
954
815
|
/** Common header for all page types. The page type depends on #mp_flags.
|
|
@@ -1003,9 +864,26 @@ typedef struct MDB_page {
|
|
|
1003
864
|
} pb;
|
|
1004
865
|
uint32_t pb_pages; /**< number of overflow pages */
|
|
1005
866
|
} mp_pb;
|
|
1006
|
-
indx_t mp_ptrs[
|
|
867
|
+
indx_t mp_ptrs[0]; /**< dynamic size */
|
|
1007
868
|
} MDB_page;
|
|
1008
869
|
|
|
870
|
+
/** Alternate page header, for 2-byte aligned access */
|
|
871
|
+
typedef struct MDB_page2 {
|
|
872
|
+
uint16_t mp2_p[sizeof(pgno_t)/2];
|
|
873
|
+
uint16_t mp2_pad;
|
|
874
|
+
uint16_t mp2_flags;
|
|
875
|
+
indx_t mp2_lower;
|
|
876
|
+
indx_t mp2_upper;
|
|
877
|
+
indx_t mp2_ptrs[0];
|
|
878
|
+
} MDB_page2;
|
|
879
|
+
|
|
880
|
+
#define MP_PGNO(p) (((MDB_page2 *)(void *)(p))->mp2_p)
|
|
881
|
+
#define MP_PAD(p) (((MDB_page2 *)(void *)(p))->mp2_pad)
|
|
882
|
+
#define MP_FLAGS(p) (((MDB_page2 *)(void *)(p))->mp2_flags)
|
|
883
|
+
#define MP_LOWER(p) (((MDB_page2 *)(void *)(p))->mp2_lower)
|
|
884
|
+
#define MP_UPPER(p) (((MDB_page2 *)(void *)(p))->mp2_upper)
|
|
885
|
+
#define MP_PTRS(p) (((MDB_page2 *)(void *)(p))->mp2_ptrs)
|
|
886
|
+
|
|
1009
887
|
/** Size of the page header, excluding dynamic data at the end */
|
|
1010
888
|
#define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs))
|
|
1011
889
|
|
|
@@ -1016,10 +894,10 @@ typedef struct MDB_page {
|
|
|
1016
894
|
#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0)
|
|
1017
895
|
|
|
1018
896
|
/** Number of nodes on a page */
|
|
1019
|
-
#define NUMKEYS(p) (((p)
|
|
897
|
+
#define NUMKEYS(p) ((MP_LOWER(p) - (PAGEHDRSZ-PAGEBASE)) >> 1)
|
|
1020
898
|
|
|
1021
899
|
/** The amount of space remaining in the page */
|
|
1022
|
-
#define SIZELEFT(p) (indx_t)((p)
|
|
900
|
+
#define SIZELEFT(p) (indx_t)(MP_UPPER(p) - MP_LOWER(p))
|
|
1023
901
|
|
|
1024
902
|
/** The percentage of space used in the page, in tenths of a percent. */
|
|
1025
903
|
#define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \
|
|
@@ -1030,15 +908,15 @@ typedef struct MDB_page {
|
|
|
1030
908
|
#define FILL_THRESHOLD 250
|
|
1031
909
|
|
|
1032
910
|
/** Test if a page is a leaf page */
|
|
1033
|
-
#define IS_LEAF(p) F_ISSET((p)
|
|
911
|
+
#define IS_LEAF(p) F_ISSET(MP_FLAGS(p), P_LEAF)
|
|
1034
912
|
/** Test if a page is a LEAF2 page */
|
|
1035
|
-
#define IS_LEAF2(p) F_ISSET((p)
|
|
913
|
+
#define IS_LEAF2(p) F_ISSET(MP_FLAGS(p), P_LEAF2)
|
|
1036
914
|
/** Test if a page is a branch page */
|
|
1037
|
-
#define IS_BRANCH(p) F_ISSET((p)
|
|
915
|
+
#define IS_BRANCH(p) F_ISSET(MP_FLAGS(p), P_BRANCH)
|
|
1038
916
|
/** Test if a page is an overflow page */
|
|
1039
|
-
#define IS_OVERFLOW(p) F_ISSET((p)
|
|
917
|
+
#define IS_OVERFLOW(p) F_ISSET(MP_FLAGS(p), P_OVERFLOW)
|
|
1040
918
|
/** Test if a page is a sub page */
|
|
1041
|
-
#define IS_SUBP(p) F_ISSET((p)
|
|
919
|
+
#define IS_SUBP(p) F_ISSET(MP_FLAGS(p), P_SUBP)
|
|
1042
920
|
|
|
1043
921
|
/** The number of overflow pages needed to store the given size. */
|
|
1044
922
|
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
|
|
@@ -1106,7 +984,7 @@ typedef struct MDB_node {
|
|
|
1106
984
|
#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
|
|
1107
985
|
|
|
1108
986
|
/** Address of node \b i in page \b p */
|
|
1109
|
-
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)
|
|
987
|
+
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + MP_PTRS(p)[i] + PAGEBASE))
|
|
1110
988
|
|
|
1111
989
|
/** Address of the key for the node */
|
|
1112
990
|
#define NODEKEY(node) (void *)((node)->mn_data)
|
|
@@ -1134,8 +1012,10 @@ typedef struct MDB_node {
|
|
|
1134
1012
|
/** Copy a page number from src to dst */
|
|
1135
1013
|
#ifdef MISALIGNED_OK
|
|
1136
1014
|
#define COPY_PGNO(dst,src) dst = src
|
|
1015
|
+
#undef MP_PGNO
|
|
1016
|
+
#define MP_PGNO(p) ((p)->mp_pgno)
|
|
1137
1017
|
#else
|
|
1138
|
-
#if
|
|
1018
|
+
#if SIZE_MAX > 4294967295UL
|
|
1139
1019
|
#define COPY_PGNO(dst,src) do { \
|
|
1140
1020
|
unsigned short *s, *d; \
|
|
1141
1021
|
s = (unsigned short *)&(src); \
|
|
@@ -1176,7 +1056,7 @@ typedef struct MDB_db {
|
|
|
1176
1056
|
pgno_t md_branch_pages; /**< number of internal pages */
|
|
1177
1057
|
pgno_t md_leaf_pages; /**< number of leaf pages */
|
|
1178
1058
|
pgno_t md_overflow_pages; /**< number of overflow pages */
|
|
1179
|
-
|
|
1059
|
+
size_t md_entries; /**< number of data items */
|
|
1180
1060
|
pgno_t md_root; /**< the root page of this tree */
|
|
1181
1061
|
} MDB_db;
|
|
1182
1062
|
|
|
@@ -1206,16 +1086,8 @@ typedef struct MDB_meta {
|
|
|
1206
1086
|
uint32_t mm_magic;
|
|
1207
1087
|
/** Version number of this file. Must be set to #MDB_DATA_VERSION. */
|
|
1208
1088
|
uint32_t mm_version;
|
|
1209
|
-
#ifdef MDB_VL32
|
|
1210
|
-
union { /* always zero since we don't support fixed mapping in MDB_VL32 */
|
|
1211
|
-
MDB_ID mmun_ull;
|
|
1212
|
-
void *mmun_address;
|
|
1213
|
-
} mm_un;
|
|
1214
|
-
#define mm_address mm_un.mmun_address
|
|
1215
|
-
#else
|
|
1216
1089
|
void *mm_address; /**< address for fixed mapping */
|
|
1217
|
-
|
|
1218
|
-
mdb_size_t mm_mapsize; /**< size of mmap region */
|
|
1090
|
+
size_t mm_mapsize; /**< size of mmap region */
|
|
1219
1091
|
MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */
|
|
1220
1092
|
/** The size of pages used in this DB */
|
|
1221
1093
|
#define mm_psize mm_dbs[FREE_DBI].md_pad
|
|
@@ -1261,9 +1133,6 @@ struct MDB_txn {
|
|
|
1261
1133
|
/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */
|
|
1262
1134
|
MDB_txn *mt_child;
|
|
1263
1135
|
pgno_t mt_next_pgno; /**< next unallocated page */
|
|
1264
|
-
#ifdef MDB_VL32
|
|
1265
|
-
pgno_t mt_last_pgno; /**< last written page */
|
|
1266
|
-
#endif
|
|
1267
1136
|
/** The ID of this transaction. IDs are integers incrementing from 1.
|
|
1268
1137
|
* Only committed write transactions increment the ID. If a transaction
|
|
1269
1138
|
* aborts, the ID may be re-used by the next writer.
|
|
@@ -1311,19 +1180,6 @@ struct MDB_txn {
|
|
|
1311
1180
|
MDB_cursor **mt_cursors;
|
|
1312
1181
|
/** Array of flags for each DB */
|
|
1313
1182
|
unsigned char *mt_dbflags;
|
|
1314
|
-
#ifdef MDB_VL32
|
|
1315
|
-
/** List of read-only pages (actually chunks) */
|
|
1316
|
-
MDB_ID3L mt_rpages;
|
|
1317
|
-
/** We map chunks of 16 pages. Even though Windows uses 4KB pages, all
|
|
1318
|
-
* mappings must begin on 64KB boundaries. So we round off all pgnos to
|
|
1319
|
-
* a chunk boundary. We do the same on Linux for symmetry, and also to
|
|
1320
|
-
* reduce the frequency of mmap/munmap calls.
|
|
1321
|
-
*/
|
|
1322
|
-
#define MDB_RPAGE_CHUNK 16
|
|
1323
|
-
#define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */
|
|
1324
|
-
#define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */
|
|
1325
|
-
unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */
|
|
1326
|
-
#endif
|
|
1327
1183
|
/** Number of DB records in use, or 0 when the txn is finished.
|
|
1328
1184
|
* This number only ever increments until the txn finishes; we
|
|
1329
1185
|
* don't decrement it when individual DB handles are closed.
|
|
@@ -1335,9 +1191,7 @@ struct MDB_txn {
|
|
|
1335
1191
|
* @{
|
|
1336
1192
|
*/
|
|
1337
1193
|
/** #mdb_txn_begin() flags */
|
|
1338
|
-
#define MDB_TXN_BEGIN_FLAGS
|
|
1339
|
-
#define MDB_TXN_NOMETASYNC MDB_NOMETASYNC /**< don't sync meta for this txn on commit */
|
|
1340
|
-
#define MDB_TXN_NOSYNC MDB_NOSYNC /**< don't sync this txn on commit */
|
|
1194
|
+
#define MDB_TXN_BEGIN_FLAGS MDB_RDONLY
|
|
1341
1195
|
#define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */
|
|
1342
1196
|
/* internal txn flags */
|
|
1343
1197
|
#define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */
|
|
@@ -1403,24 +1257,10 @@ struct MDB_cursor {
|
|
|
1403
1257
|
#define C_SUB 0x04 /**< Cursor is a sub-cursor */
|
|
1404
1258
|
#define C_DEL 0x08 /**< last op was a cursor_del */
|
|
1405
1259
|
#define C_UNTRACK 0x40 /**< Un-track cursor when closing */
|
|
1406
|
-
#define C_WRITEMAP MDB_TXN_WRITEMAP /**< Copy of txn flag */
|
|
1407
|
-
/** Read-only cursor into the txn's original snapshot in the map.
|
|
1408
|
-
* Set for read-only txns, and in #mdb_page_alloc() for #FREE_DBI when
|
|
1409
|
-
* #MDB_DEVEL & 2. Only implements code which is necessary for this.
|
|
1410
|
-
*/
|
|
1411
|
-
#define C_ORIG_RDONLY MDB_TXN_RDONLY
|
|
1412
1260
|
/** @} */
|
|
1413
1261
|
unsigned int mc_flags; /**< @ref mdb_cursor */
|
|
1414
1262
|
MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */
|
|
1415
1263
|
indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */
|
|
1416
|
-
#ifdef MDB_VL32
|
|
1417
|
-
MDB_page *mc_ovpg; /**< a referenced overflow page */
|
|
1418
|
-
# define MC_OVPG(mc) ((mc)->mc_ovpg)
|
|
1419
|
-
# define MC_SET_OVPG(mc, pg) ((mc)->mc_ovpg = (pg))
|
|
1420
|
-
#else
|
|
1421
|
-
# define MC_OVPG(mc) ((MDB_page *)0)
|
|
1422
|
-
# define MC_SET_OVPG(mc, pg) ((void)0)
|
|
1423
|
-
#endif
|
|
1424
1264
|
};
|
|
1425
1265
|
|
|
1426
1266
|
/** Context for sorted-dup records.
|
|
@@ -1467,12 +1307,6 @@ struct MDB_env {
|
|
|
1467
1307
|
HANDLE me_fd; /**< The main data file */
|
|
1468
1308
|
HANDLE me_lfd; /**< The lock file */
|
|
1469
1309
|
HANDLE me_mfd; /**< For writing and syncing the meta pages */
|
|
1470
|
-
#ifdef _WIN32
|
|
1471
|
-
#ifdef MDB_VL32
|
|
1472
|
-
HANDLE me_fmh; /**< File Mapping handle */
|
|
1473
|
-
#endif /* MDB_VL32 */
|
|
1474
|
-
HANDLE me_ovfd; /**< Overlapped/async with write-through file handle */
|
|
1475
|
-
#endif /* _WIN32 */
|
|
1476
1310
|
/** Failed to update the meta page. Probably an I/O error. */
|
|
1477
1311
|
#define MDB_FATAL_ERROR 0x80000000U
|
|
1478
1312
|
/** Some fields are initialized. */
|
|
@@ -1497,8 +1331,8 @@ struct MDB_env {
|
|
|
1497
1331
|
void *me_pbuf; /**< scratch area for DUPSORT put() */
|
|
1498
1332
|
MDB_txn *me_txn; /**< current write transaction */
|
|
1499
1333
|
MDB_txn *me_txn0; /**< prealloc'd write transaction */
|
|
1500
|
-
|
|
1501
|
-
|
|
1334
|
+
size_t me_mapsize; /**< size of the data memory map */
|
|
1335
|
+
off_t me_size; /**< current file size */
|
|
1502
1336
|
pgno_t me_maxpg; /**< me_mapsize / me_psize */
|
|
1503
1337
|
MDB_dbx *me_dbxs; /**< array of static DB info */
|
|
1504
1338
|
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
|
|
@@ -1523,8 +1357,6 @@ struct MDB_env {
|
|
|
1523
1357
|
int me_live_reader; /**< have liveness lock in reader table */
|
|
1524
1358
|
#ifdef _WIN32
|
|
1525
1359
|
int me_pidquery; /**< Used in OpenProcess */
|
|
1526
|
-
OVERLAPPED *ov; /**< Used for for overlapping I/O requests */
|
|
1527
|
-
int ovs; /**< Count of OVERLAPPEDs */
|
|
1528
1360
|
#endif
|
|
1529
1361
|
#ifdef MDB_USE_POSIX_MUTEX /* Posix mutexes reside in shared mem */
|
|
1530
1362
|
# define me_rmutex me_txns->mti_rmutex /**< Shared reader lock */
|
|
@@ -1532,17 +1364,6 @@ struct MDB_env {
|
|
|
1532
1364
|
#else
|
|
1533
1365
|
mdb_mutex_t me_rmutex;
|
|
1534
1366
|
mdb_mutex_t me_wmutex;
|
|
1535
|
-
# if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
|
|
1536
|
-
/** Half-initialized name of mutexes, to be completed by #MUTEXNAME() */
|
|
1537
|
-
char me_mutexname[sizeof(MUTEXNAME_PREFIX) + 11];
|
|
1538
|
-
# endif
|
|
1539
|
-
#endif
|
|
1540
|
-
#ifdef MDB_VL32
|
|
1541
|
-
MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */
|
|
1542
|
-
pthread_mutex_t me_rpmutex; /**< control access to #me_rpages */
|
|
1543
|
-
#define MDB_ERPAGE_SIZE 16384
|
|
1544
|
-
#define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1)
|
|
1545
|
-
unsigned int me_rpcheck;
|
|
1546
1367
|
#endif
|
|
1547
1368
|
void *me_userctx; /**< User-settable context */
|
|
1548
1369
|
MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
|
|
@@ -1604,7 +1425,7 @@ static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst);
|
|
|
1604
1425
|
static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata,
|
|
1605
1426
|
pgno_t newpgno, unsigned int nflags);
|
|
1606
1427
|
|
|
1607
|
-
static int mdb_env_read_header(MDB_env *env,
|
|
1428
|
+
static int mdb_env_read_header(MDB_env *env, MDB_meta *meta);
|
|
1608
1429
|
static MDB_meta *mdb_env_pick_meta(const MDB_env *env);
|
|
1609
1430
|
static int mdb_env_write_meta(MDB_txn *txn);
|
|
1610
1431
|
#if defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_ROBUST_SUPPORTED) /* Drop unused excl arg */
|
|
@@ -1628,6 +1449,9 @@ static int mdb_update_key(MDB_cursor *mc, MDB_val *key);
|
|
|
1628
1449
|
static void mdb_cursor_pop(MDB_cursor *mc);
|
|
1629
1450
|
static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
|
|
1630
1451
|
|
|
1452
|
+
static int _mdb_cursor_del(MDB_cursor *mc, unsigned int flags);
|
|
1453
|
+
static int _mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned int flags);
|
|
1454
|
+
|
|
1631
1455
|
static int mdb_cursor_del0(MDB_cursor *mc);
|
|
1632
1456
|
static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags);
|
|
1633
1457
|
static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
|
|
@@ -1651,18 +1475,13 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead);
|
|
|
1651
1475
|
static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long;
|
|
1652
1476
|
/** @endcond */
|
|
1653
1477
|
|
|
1654
|
-
/** Compare two items pointing at '
|
|
1478
|
+
/** Compare two items pointing at size_t's of unknown alignment. */
|
|
1655
1479
|
#ifdef MISALIGNED_OK
|
|
1656
1480
|
# define mdb_cmp_clong mdb_cmp_long
|
|
1657
1481
|
#else
|
|
1658
1482
|
# define mdb_cmp_clong mdb_cmp_cint
|
|
1659
1483
|
#endif
|
|
1660
1484
|
|
|
1661
|
-
/** True if we need #mdb_cmp_clong() instead of \b cmp for #MDB_INTEGERDUP */
|
|
1662
|
-
#define NEED_CMP_CLONG(cmp, ksize) \
|
|
1663
|
-
(UINT_MAX < MDB_SIZE_MAX && \
|
|
1664
|
-
(cmp) == mdb_cmp_int && (ksize) == sizeof(mdb_size_t))
|
|
1665
|
-
|
|
1666
1485
|
#ifdef _WIN32
|
|
1667
1486
|
static SECURITY_DESCRIPTOR mdb_null_sd;
|
|
1668
1487
|
static SECURITY_ATTRIBUTES mdb_all_sa;
|
|
@@ -1704,7 +1523,6 @@ static char *const mdb_errstr[] = {
|
|
|
1704
1523
|
"MDB_BAD_TXN: Transaction must abort, has a child, or is invalid",
|
|
1705
1524
|
"MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
|
|
1706
1525
|
"MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
|
|
1707
|
-
"MDB_PROBLEM: Unexpected problem - txn should abort",
|
|
1708
1526
|
};
|
|
1709
1527
|
|
|
1710
1528
|
char *
|
|
@@ -1749,9 +1567,11 @@ mdb_strerror(int err)
|
|
|
1749
1567
|
buf[0] = 0;
|
|
1750
1568
|
FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM |
|
|
1751
1569
|
FORMAT_MESSAGE_IGNORE_INSERTS,
|
|
1752
|
-
NULL, err, 0, ptr, MSGSIZE,
|
|
1570
|
+
NULL, err, 0, ptr, MSGSIZE, NULL);
|
|
1753
1571
|
return ptr;
|
|
1754
1572
|
#else
|
|
1573
|
+
if (err < 0)
|
|
1574
|
+
return "Invalid error code";
|
|
1755
1575
|
return strerror(err);
|
|
1756
1576
|
#endif
|
|
1757
1577
|
}
|
|
@@ -1789,7 +1609,7 @@ static pgno_t
|
|
|
1789
1609
|
mdb_dbg_pgno(MDB_page *mp)
|
|
1790
1610
|
{
|
|
1791
1611
|
pgno_t ret;
|
|
1792
|
-
COPY_PGNO(ret, mp
|
|
1612
|
+
COPY_PGNO(ret, MP_PGNO(mp));
|
|
1793
1613
|
return ret;
|
|
1794
1614
|
}
|
|
1795
1615
|
|
|
@@ -1823,6 +1643,18 @@ mdb_dkey(MDB_val *key, char *buf)
|
|
|
1823
1643
|
return buf;
|
|
1824
1644
|
}
|
|
1825
1645
|
|
|
1646
|
+
static char *
|
|
1647
|
+
mdb_dval(MDB_txn *txn, MDB_dbi dbi, MDB_val *data, char *buf)
|
|
1648
|
+
{
|
|
1649
|
+
if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
|
|
1650
|
+
mdb_dkey(data, buf+1);
|
|
1651
|
+
*buf = '[';
|
|
1652
|
+
strcpy(buf + data->mv_size * 2 + 1, "]");
|
|
1653
|
+
} else
|
|
1654
|
+
*buf = '\0';
|
|
1655
|
+
return buf;
|
|
1656
|
+
}
|
|
1657
|
+
|
|
1826
1658
|
static const char *
|
|
1827
1659
|
mdb_leafnode_type(MDB_node *n)
|
|
1828
1660
|
{
|
|
@@ -1836,33 +1668,33 @@ void
|
|
|
1836
1668
|
mdb_page_list(MDB_page *mp)
|
|
1837
1669
|
{
|
|
1838
1670
|
pgno_t pgno = mdb_dbg_pgno(mp);
|
|
1839
|
-
const char *type, *state = (mp
|
|
1671
|
+
const char *type, *state = (MP_FLAGS(mp) & P_DIRTY) ? ", dirty" : "";
|
|
1840
1672
|
MDB_node *node;
|
|
1841
1673
|
unsigned int i, nkeys, nsize, total = 0;
|
|
1842
1674
|
MDB_val key;
|
|
1843
1675
|
DKBUF;
|
|
1844
1676
|
|
|
1845
|
-
switch (mp
|
|
1677
|
+
switch (MP_FLAGS(mp) & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) {
|
|
1846
1678
|
case P_BRANCH: type = "Branch page"; break;
|
|
1847
1679
|
case P_LEAF: type = "Leaf page"; break;
|
|
1848
1680
|
case P_LEAF|P_SUBP: type = "Sub-page"; break;
|
|
1849
1681
|
case P_LEAF|P_LEAF2: type = "LEAF2 page"; break;
|
|
1850
1682
|
case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break;
|
|
1851
1683
|
case P_OVERFLOW:
|
|
1852
|
-
fprintf(stderr, "Overflow page %"
|
|
1684
|
+
fprintf(stderr, "Overflow page %"Z"u pages %u%s\n",
|
|
1853
1685
|
pgno, mp->mp_pages, state);
|
|
1854
1686
|
return;
|
|
1855
1687
|
case P_META:
|
|
1856
|
-
fprintf(stderr, "Meta-page %"
|
|
1688
|
+
fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n",
|
|
1857
1689
|
pgno, ((MDB_meta *)METADATA(mp))->mm_txnid);
|
|
1858
1690
|
return;
|
|
1859
1691
|
default:
|
|
1860
|
-
fprintf(stderr, "Bad page %"
|
|
1692
|
+
fprintf(stderr, "Bad page %"Z"u flags 0x%X\n", pgno, MP_FLAGS(mp));
|
|
1861
1693
|
return;
|
|
1862
1694
|
}
|
|
1863
1695
|
|
|
1864
1696
|
nkeys = NUMKEYS(mp);
|
|
1865
|
-
fprintf(stderr, "%s %"
|
|
1697
|
+
fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state);
|
|
1866
1698
|
|
|
1867
1699
|
for (i=0; i<nkeys; i++) {
|
|
1868
1700
|
if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
|
|
@@ -1877,7 +1709,7 @@ mdb_page_list(MDB_page *mp)
|
|
|
1877
1709
|
key.mv_data = node->mn_data;
|
|
1878
1710
|
nsize = NODESIZE + key.mv_size;
|
|
1879
1711
|
if (IS_BRANCH(mp)) {
|
|
1880
|
-
fprintf(stderr, "key %d: page %"
|
|
1712
|
+
fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
|
|
1881
1713
|
DKEY(&key));
|
|
1882
1714
|
total += nsize;
|
|
1883
1715
|
} else {
|
|
@@ -1893,7 +1725,7 @@ mdb_page_list(MDB_page *mp)
|
|
|
1893
1725
|
total = EVEN(total);
|
|
1894
1726
|
}
|
|
1895
1727
|
fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
|
|
1896
|
-
IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp
|
|
1728
|
+
IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + MP_LOWER(mp), total, SIZELEFT(mp));
|
|
1897
1729
|
}
|
|
1898
1730
|
|
|
1899
1731
|
void
|
|
@@ -1973,7 +1805,7 @@ static void mdb_audit(MDB_txn *txn)
|
|
|
1973
1805
|
}
|
|
1974
1806
|
}
|
|
1975
1807
|
if (freecount + count + NUM_METAS != txn->mt_next_pgno) {
|
|
1976
|
-
fprintf(stderr, "audit: %"
|
|
1808
|
+
fprintf(stderr, "audit: %"Z"u freecount: %"Z"u count: %"Z"u total: %"Z"u next_pgno: %"Z"u\n",
|
|
1977
1809
|
txn->mt_txnid, freecount, count+NUM_METAS,
|
|
1978
1810
|
freecount+count+NUM_METAS, txn->mt_next_pgno);
|
|
1979
1811
|
}
|
|
@@ -1990,8 +1822,10 @@ int
|
|
|
1990
1822
|
mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
|
|
1991
1823
|
{
|
|
1992
1824
|
MDB_cmp_func *dcmp = txn->mt_dbxs[dbi].md_dcmp;
|
|
1993
|
-
|
|
1825
|
+
#if UINT_MAX < SIZE_MAX
|
|
1826
|
+
if (dcmp == mdb_cmp_int && a->mv_size == sizeof(size_t))
|
|
1994
1827
|
dcmp = mdb_cmp_clong;
|
|
1828
|
+
#endif
|
|
1995
1829
|
return dcmp(a, b);
|
|
1996
1830
|
}
|
|
1997
1831
|
|
|
@@ -2072,53 +1906,6 @@ mdb_dlist_free(MDB_txn *txn)
|
|
|
2072
1906
|
dl[0].mid = 0;
|
|
2073
1907
|
}
|
|
2074
1908
|
|
|
2075
|
-
#ifdef MDB_VL32
|
|
2076
|
-
static void
|
|
2077
|
-
mdb_page_unref(MDB_txn *txn, MDB_page *mp)
|
|
2078
|
-
{
|
|
2079
|
-
pgno_t pgno;
|
|
2080
|
-
MDB_ID3L tl = txn->mt_rpages;
|
|
2081
|
-
unsigned x, rem;
|
|
2082
|
-
if (mp->mp_flags & (P_SUBP|P_DIRTY))
|
|
2083
|
-
return;
|
|
2084
|
-
rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1);
|
|
2085
|
-
pgno = mp->mp_pgno ^ rem;
|
|
2086
|
-
x = mdb_mid3l_search(tl, pgno);
|
|
2087
|
-
if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno)
|
|
2088
|
-
x++;
|
|
2089
|
-
if (tl[x].mref)
|
|
2090
|
-
tl[x].mref--;
|
|
2091
|
-
}
|
|
2092
|
-
#define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp)
|
|
2093
|
-
|
|
2094
|
-
static void
|
|
2095
|
-
mdb_cursor_unref(MDB_cursor *mc)
|
|
2096
|
-
{
|
|
2097
|
-
int i;
|
|
2098
|
-
if (mc->mc_txn->mt_rpages[0].mid) {
|
|
2099
|
-
if (!mc->mc_snum || !mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0]))
|
|
2100
|
-
return;
|
|
2101
|
-
for (i=0; i<mc->mc_snum; i++)
|
|
2102
|
-
mdb_page_unref(mc->mc_txn, mc->mc_pg[i]);
|
|
2103
|
-
if (mc->mc_ovpg) {
|
|
2104
|
-
mdb_page_unref(mc->mc_txn, mc->mc_ovpg);
|
|
2105
|
-
mc->mc_ovpg = 0;
|
|
2106
|
-
}
|
|
2107
|
-
}
|
|
2108
|
-
mc->mc_snum = mc->mc_top = 0;
|
|
2109
|
-
mc->mc_pg[0] = NULL;
|
|
2110
|
-
mc->mc_flags &= ~C_INITIALIZED;
|
|
2111
|
-
}
|
|
2112
|
-
#define MDB_CURSOR_UNREF(mc, force) \
|
|
2113
|
-
(((force) || ((mc)->mc_flags & C_INITIALIZED)) \
|
|
2114
|
-
? mdb_cursor_unref(mc) \
|
|
2115
|
-
: (void)0)
|
|
2116
|
-
|
|
2117
|
-
#else
|
|
2118
|
-
#define MDB_PAGE_UNREF(txn, mp)
|
|
2119
|
-
#define MDB_CURSOR_UNREF(mc, force) ((void)0)
|
|
2120
|
-
#endif /* MDB_VL32 */
|
|
2121
|
-
|
|
2122
1909
|
/** Loosen or free a single page.
|
|
2123
1910
|
* Saves single pages to a list for future reuse
|
|
2124
1911
|
* in this same txn. It has been pulled from the freeDB
|
|
@@ -2148,7 +1935,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
|
|
|
2148
1935
|
if (mp != dl[x].mptr) { /* bad cursor? */
|
|
2149
1936
|
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
2150
1937
|
txn->mt_flags |= MDB_TXN_ERROR;
|
|
2151
|
-
return
|
|
1938
|
+
return MDB_CORRUPTED;
|
|
2152
1939
|
}
|
|
2153
1940
|
/* ok, it's ours */
|
|
2154
1941
|
loose = 1;
|
|
@@ -2160,7 +1947,8 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
|
|
|
2160
1947
|
}
|
|
2161
1948
|
}
|
|
2162
1949
|
if (loose) {
|
|
2163
|
-
DPRINTF(("loosen db %d page %"
|
|
1950
|
+
DPRINTF(("loosen db %d page %"Z"u", DDBI(mc),
|
|
1951
|
+
mp->mp_pgno));
|
|
2164
1952
|
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
|
|
2165
1953
|
txn->mt_loose_pgs = mp;
|
|
2166
1954
|
txn->mt_loose_count++;
|
|
@@ -2193,9 +1981,13 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
|
|
|
2193
1981
|
unsigned i, j;
|
|
2194
1982
|
int rc = MDB_SUCCESS, level;
|
|
2195
1983
|
|
|
2196
|
-
/* Mark pages seen by cursors
|
|
2197
|
-
|
|
2198
|
-
|
|
1984
|
+
/* Mark pages seen by cursors */
|
|
1985
|
+
if (mc->mc_flags & C_UNTRACK)
|
|
1986
|
+
mc = NULL; /* will find mc in mt_cursors */
|
|
1987
|
+
for (i = txn->mt_numdbs;; mc = txn->mt_cursors[--i]) {
|
|
1988
|
+
for (; mc; mc=mc->mc_next) {
|
|
1989
|
+
if (!(mc->mc_flags & C_INITIALIZED))
|
|
1990
|
+
continue;
|
|
2199
1991
|
for (m3 = mc;; m3 = &mx->mx_cursor) {
|
|
2200
1992
|
mp = NULL;
|
|
2201
1993
|
for (j=0; j<m3->mc_snum; j++) {
|
|
@@ -2214,13 +2006,10 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
|
|
|
2214
2006
|
break;
|
|
2215
2007
|
}
|
|
2216
2008
|
}
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
if (i == 0)
|
|
2220
|
-
goto mark_done;
|
|
2009
|
+
if (i == 0)
|
|
2010
|
+
break;
|
|
2221
2011
|
}
|
|
2222
2012
|
|
|
2223
|
-
mark_done:
|
|
2224
2013
|
if (all) {
|
|
2225
2014
|
/* Mark dirty root pages */
|
|
2226
2015
|
for (i=0; i<txn->mt_numdbs; i++) {
|
|
@@ -2396,16 +2185,12 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
|
|
|
2396
2185
|
{
|
|
2397
2186
|
MDB_ID2 mid;
|
|
2398
2187
|
int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
|
|
2399
|
-
|
|
2400
|
-
|
|
2401
|
-
insert = mdb_mid2l_insert;
|
|
2402
|
-
#else /* but otherwise with writemaps, we just use msync, we
|
|
2403
|
-
* don't need the ordering and just append */
|
|
2404
|
-
if (txn->mt_flags & MDB_TXN_WRITEMAP)
|
|
2188
|
+
|
|
2189
|
+
if (txn->mt_flags & MDB_TXN_WRITEMAP) {
|
|
2405
2190
|
insert = mdb_mid2l_append;
|
|
2406
|
-
else
|
|
2191
|
+
} else {
|
|
2407
2192
|
insert = mdb_mid2l_insert;
|
|
2408
|
-
|
|
2193
|
+
}
|
|
2409
2194
|
mid.mid = mp->mp_pgno;
|
|
2410
2195
|
mid.mptr = mp;
|
|
2411
2196
|
rc = insert(txn->mt_u.dirty_list, &mid);
|
|
@@ -2421,8 +2206,6 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
|
|
|
2421
2206
|
* Do not modify the freedB, just merge freeDB records into me_pghead[]
|
|
2422
2207
|
* and move me_pglast to say which records were consumed. Only this
|
|
2423
2208
|
* function can create me_pghead and move me_pglast/mt_next_pgno.
|
|
2424
|
-
* When #MDB_DEVEL & 2, it is not affected by #mdb_freelist_save(): it
|
|
2425
|
-
* then uses the transaction's original snapshot of the freeDB.
|
|
2426
2209
|
* @param[in] mc cursor A cursor handle identifying the transaction and
|
|
2427
2210
|
* database for which we are allocating.
|
|
2428
2211
|
* @param[in] num the number of pages to allocate.
|
|
@@ -2460,7 +2243,8 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
2460
2243
|
np = txn->mt_loose_pgs;
|
|
2461
2244
|
txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np);
|
|
2462
2245
|
txn->mt_loose_count--;
|
|
2463
|
-
DPRINTF(("db %d use loose page %"
|
|
2246
|
+
DPRINTF(("db %d use loose page %"Z"u", DDBI(mc),
|
|
2247
|
+
np->mp_pgno));
|
|
2464
2248
|
*mp = np;
|
|
2465
2249
|
return MDB_SUCCESS;
|
|
2466
2250
|
}
|
|
@@ -2497,14 +2281,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
2497
2281
|
last = env->me_pglast;
|
|
2498
2282
|
oldest = env->me_pgoldest;
|
|
2499
2283
|
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
|
|
2500
|
-
#if (MDB_DEVEL) & 2 /* "& 2" so MDB_DEVEL=1 won't hide bugs breaking freeDB */
|
|
2501
|
-
/* Use original snapshot. TODO: Should need less care in code
|
|
2502
|
-
* which modifies the database. Maybe we can delete some code?
|
|
2503
|
-
*/
|
|
2504
|
-
m2.mc_flags |= C_ORIG_RDONLY;
|
|
2505
|
-
m2.mc_db = &env->me_metas[(txn->mt_txnid-1) & 1]->mm_dbs[FREE_DBI];
|
|
2506
|
-
m2.mc_dbflag = (unsigned char *)""; /* probably unnecessary */
|
|
2507
|
-
#endif
|
|
2508
2284
|
if (last) {
|
|
2509
2285
|
op = MDB_SET_RANGE;
|
|
2510
2286
|
key.mv_data = &last; /* will look up last+1 */
|
|
@@ -2562,10 +2338,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
2562
2338
|
}
|
|
2563
2339
|
env->me_pglast = last;
|
|
2564
2340
|
#if (MDB_DEBUG) > 1
|
|
2565
|
-
DPRINTF(("IDL read txn %"
|
|
2341
|
+
DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u",
|
|
2566
2342
|
last, txn->mt_dbs[FREE_DBI].md_root, i));
|
|
2567
2343
|
for (j = i; j; j--)
|
|
2568
|
-
DPRINTF(("IDL %"
|
|
2344
|
+
DPRINTF(("IDL %"Z"u", idl[j]));
|
|
2569
2345
|
#endif
|
|
2570
2346
|
/* Merge in descending sorted order */
|
|
2571
2347
|
mdb_midl_xmerge(mop, idl);
|
|
@@ -2580,20 +2356,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
2580
2356
|
rc = MDB_MAP_FULL;
|
|
2581
2357
|
goto fail;
|
|
2582
2358
|
}
|
|
2583
|
-
#if defined(_WIN32) && !defined(MDB_VL32)
|
|
2584
|
-
if (!(env->me_flags & MDB_RDONLY)) {
|
|
2585
|
-
void *p;
|
|
2586
|
-
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
|
|
2587
|
-
p = VirtualAlloc(p, env->me_psize * num, MEM_COMMIT,
|
|
2588
|
-
(env->me_flags & MDB_WRITEMAP) ? PAGE_READWRITE:
|
|
2589
|
-
PAGE_READONLY);
|
|
2590
|
-
if (!p) {
|
|
2591
|
-
DPUTS("VirtualAlloc failed");
|
|
2592
|
-
rc = ErrCode();
|
|
2593
|
-
goto fail;
|
|
2594
|
-
}
|
|
2595
|
-
}
|
|
2596
|
-
#endif
|
|
2597
2359
|
|
|
2598
2360
|
search_done:
|
|
2599
2361
|
if (env->me_flags & MDB_WRITEMAP) {
|
|
@@ -2723,7 +2485,7 @@ mdb_page_touch(MDB_cursor *mc)
|
|
|
2723
2485
|
pgno_t pgno;
|
|
2724
2486
|
int rc;
|
|
2725
2487
|
|
|
2726
|
-
if (!F_ISSET(mp
|
|
2488
|
+
if (!F_ISSET(MP_FLAGS(mp), P_DIRTY)) {
|
|
2727
2489
|
if (txn->mt_flags & MDB_TXN_SPILLS) {
|
|
2728
2490
|
np = NULL;
|
|
2729
2491
|
rc = mdb_page_unspill(txn, mp, &np);
|
|
@@ -2736,7 +2498,7 @@ mdb_page_touch(MDB_cursor *mc)
|
|
|
2736
2498
|
(rc = mdb_page_alloc(mc, 1, &np)))
|
|
2737
2499
|
goto fail;
|
|
2738
2500
|
pgno = np->mp_pgno;
|
|
2739
|
-
DPRINTF(("touched db %d page %"
|
|
2501
|
+
DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc),
|
|
2740
2502
|
mp->mp_pgno, pgno));
|
|
2741
2503
|
mdb_cassert(mc, mp->mp_pgno != pgno);
|
|
2742
2504
|
mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
|
|
@@ -2760,7 +2522,7 @@ mdb_page_touch(MDB_cursor *mc)
|
|
|
2760
2522
|
if (mp != dl[x].mptr) { /* bad cursor? */
|
|
2761
2523
|
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
2762
2524
|
txn->mt_flags |= MDB_TXN_ERROR;
|
|
2763
|
-
return
|
|
2525
|
+
return MDB_CORRUPTED;
|
|
2764
2526
|
}
|
|
2765
2527
|
return 0;
|
|
2766
2528
|
}
|
|
@@ -2804,7 +2566,6 @@ done:
|
|
|
2804
2566
|
}
|
|
2805
2567
|
}
|
|
2806
2568
|
}
|
|
2807
|
-
MDB_PAGE_UNREF(mc->mc_txn, mp);
|
|
2808
2569
|
return 0;
|
|
2809
2570
|
|
|
2810
2571
|
fail:
|
|
@@ -2813,22 +2574,18 @@ fail:
|
|
|
2813
2574
|
}
|
|
2814
2575
|
|
|
2815
2576
|
int
|
|
2816
|
-
|
|
2577
|
+
mdb_env_sync(MDB_env *env, int force)
|
|
2817
2578
|
{
|
|
2818
2579
|
int rc = 0;
|
|
2819
2580
|
if (env->me_flags & MDB_RDONLY)
|
|
2820
2581
|
return EACCES;
|
|
2821
|
-
if (force
|
|
2822
|
-
#ifndef _WIN32 /* Sync is normally achieved in Windows by doing WRITE_THROUGH writes */
|
|
2823
|
-
|| !(env->me_flags & MDB_NOSYNC)
|
|
2824
|
-
#endif
|
|
2825
|
-
) {
|
|
2582
|
+
if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
|
|
2826
2583
|
if (env->me_flags & MDB_WRITEMAP) {
|
|
2827
2584
|
int flags = ((env->me_flags & MDB_MAPASYNC) && !force)
|
|
2828
2585
|
? MS_ASYNC : MS_SYNC;
|
|
2829
|
-
if (MDB_MSYNC(env->me_map, env->
|
|
2586
|
+
if (MDB_MSYNC(env->me_map, env->me_mapsize, flags))
|
|
2830
2587
|
rc = ErrCode();
|
|
2831
|
-
#
|
|
2588
|
+
#if defined(_WIN32) || defined(__APPLE__)
|
|
2832
2589
|
else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd))
|
|
2833
2590
|
rc = ErrCode();
|
|
2834
2591
|
#endif
|
|
@@ -2846,13 +2603,6 @@ mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs)
|
|
|
2846
2603
|
return rc;
|
|
2847
2604
|
}
|
|
2848
2605
|
|
|
2849
|
-
int
|
|
2850
|
-
mdb_env_sync(MDB_env *env, int force)
|
|
2851
|
-
{
|
|
2852
|
-
MDB_meta *m = mdb_env_pick_meta(env);
|
|
2853
|
-
return mdb_env_sync0(env, force, m->mm_last_pg+1);
|
|
2854
|
-
}
|
|
2855
|
-
|
|
2856
2606
|
/** Back up parent txn's cursors, then grab the originals for tracking */
|
|
2857
2607
|
static int
|
|
2858
2608
|
mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst)
|
|
@@ -3058,9 +2808,14 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
|
3058
2808
|
do /* LY: Retry on a race, ITS#7970. */
|
|
3059
2809
|
r->mr_txnid = ti->mti_txnid;
|
|
3060
2810
|
while(r->mr_txnid != ti->mti_txnid);
|
|
2811
|
+
if (!r->mr_txnid && (env->me_flags & MDB_RDONLY)) {
|
|
2812
|
+
meta = mdb_env_pick_meta(env);
|
|
2813
|
+
r->mr_txnid = meta->mm_txnid;
|
|
2814
|
+
} else {
|
|
2815
|
+
meta = env->me_metas[r->mr_txnid & 1];
|
|
2816
|
+
}
|
|
3061
2817
|
txn->mt_txnid = r->mr_txnid;
|
|
3062
2818
|
txn->mt_u.reader = r;
|
|
3063
|
-
meta = env->me_metas[txn->mt_txnid & 1];
|
|
3064
2819
|
}
|
|
3065
2820
|
|
|
3066
2821
|
} else {
|
|
@@ -3077,7 +2832,7 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
|
3077
2832
|
txn->mt_txnid++;
|
|
3078
2833
|
#if MDB_DEBUG
|
|
3079
2834
|
if (txn->mt_txnid == mdb_debug_start)
|
|
3080
|
-
mdb_debug =
|
|
2835
|
+
mdb_debug = MDB_DBG_INFO;
|
|
3081
2836
|
#endif
|
|
3082
2837
|
txn->mt_child = NULL;
|
|
3083
2838
|
txn->mt_loose_pgs = NULL;
|
|
@@ -3097,9 +2852,6 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
|
3097
2852
|
|
|
3098
2853
|
/* Moved to here to avoid a data race in read TXNs */
|
|
3099
2854
|
txn->mt_next_pgno = meta->mm_last_pg+1;
|
|
3100
|
-
#ifdef MDB_VL32
|
|
3101
|
-
txn->mt_last_pgno = txn->mt_next_pgno - 1;
|
|
3102
|
-
#endif
|
|
3103
2855
|
|
|
3104
2856
|
txn->mt_flags = flags;
|
|
3105
2857
|
|
|
@@ -3135,7 +2887,7 @@ mdb_txn_renew(MDB_txn *txn)
|
|
|
3135
2887
|
|
|
3136
2888
|
rc = mdb_txn_renew0(txn);
|
|
3137
2889
|
if (rc == MDB_SUCCESS) {
|
|
3138
|
-
DPRINTF(("renew txn %"
|
|
2890
|
+
DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
|
|
3139
2891
|
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
|
|
3140
2892
|
(void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root));
|
|
3141
2893
|
}
|
|
@@ -3178,17 +2930,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
|
|
|
3178
2930
|
DPRINTF(("calloc: %s", strerror(errno)));
|
|
3179
2931
|
return ENOMEM;
|
|
3180
2932
|
}
|
|
3181
|
-
#ifdef MDB_VL32
|
|
3182
|
-
if (!parent) {
|
|
3183
|
-
txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
|
|
3184
|
-
if (!txn->mt_rpages) {
|
|
3185
|
-
free(txn);
|
|
3186
|
-
return ENOMEM;
|
|
3187
|
-
}
|
|
3188
|
-
txn->mt_rpages[0].mid = 0;
|
|
3189
|
-
txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
|
|
3190
|
-
}
|
|
3191
|
-
#endif
|
|
3192
2933
|
txn->mt_dbxs = env->me_dbxs; /* static */
|
|
3193
2934
|
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
|
|
3194
2935
|
txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs;
|
|
@@ -3216,9 +2957,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
|
|
|
3216
2957
|
parent->mt_child = txn;
|
|
3217
2958
|
txn->mt_parent = parent;
|
|
3218
2959
|
txn->mt_numdbs = parent->mt_numdbs;
|
|
3219
|
-
#ifdef MDB_VL32
|
|
3220
|
-
txn->mt_rpages = parent->mt_rpages;
|
|
3221
|
-
#endif
|
|
3222
2960
|
memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
|
|
3223
2961
|
/* Copy parent's mt_dbflags, but clear DB_NEW */
|
|
3224
2962
|
for (i=0; i<txn->mt_numdbs; i++)
|
|
@@ -3245,18 +2983,17 @@ renew:
|
|
|
3245
2983
|
}
|
|
3246
2984
|
if (rc) {
|
|
3247
2985
|
if (txn != env->me_txn0) {
|
|
3248
|
-
|
|
3249
|
-
free(txn->mt_rpages);
|
|
3250
|
-
#endif
|
|
2986
|
+
free(txn->mt_u.dirty_list);
|
|
3251
2987
|
free(txn);
|
|
3252
2988
|
}
|
|
3253
2989
|
} else {
|
|
3254
2990
|
txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */
|
|
3255
2991
|
*ret = txn;
|
|
3256
|
-
DPRINTF(("begin txn %"
|
|
2992
|
+
DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
|
|
3257
2993
|
txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w',
|
|
3258
2994
|
(void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root));
|
|
3259
2995
|
}
|
|
2996
|
+
MDB_TRACE(("%p, %p, %u = %p", env, parent, flags, txn));
|
|
3260
2997
|
|
|
3261
2998
|
return rc;
|
|
3262
2999
|
}
|
|
@@ -3268,7 +3005,7 @@ mdb_txn_env(MDB_txn *txn)
|
|
|
3268
3005
|
return txn->mt_env;
|
|
3269
3006
|
}
|
|
3270
3007
|
|
|
3271
|
-
|
|
3008
|
+
size_t
|
|
3272
3009
|
mdb_txn_id(MDB_txn *txn)
|
|
3273
3010
|
{
|
|
3274
3011
|
if(!txn) return 0;
|
|
@@ -3320,7 +3057,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
|
|
|
3320
3057
|
/* Export or close DBI handles opened in this txn */
|
|
3321
3058
|
mdb_dbis_update(txn, mode & MDB_END_UPDATE);
|
|
3322
3059
|
|
|
3323
|
-
DPRINTF(("%s txn %"
|
|
3060
|
+
DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
|
|
3324
3061
|
names[mode & MDB_END_OPMASK],
|
|
3325
3062
|
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
|
|
3326
3063
|
(void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root));
|
|
@@ -3349,6 +3086,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
|
|
|
3349
3086
|
|
|
3350
3087
|
txn->mt_numdbs = 0;
|
|
3351
3088
|
txn->mt_flags = MDB_TXN_FINISHED;
|
|
3089
|
+
mdb_midl_free(txn->mt_spill_pgs);
|
|
3352
3090
|
|
|
3353
3091
|
if (!txn->mt_parent) {
|
|
3354
3092
|
mdb_midl_shrink(&txn->mt_free_pgs);
|
|
@@ -3370,35 +3108,10 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
|
|
|
3370
3108
|
mdb_midl_free(txn->mt_free_pgs);
|
|
3371
3109
|
free(txn->mt_u.dirty_list);
|
|
3372
3110
|
}
|
|
3373
|
-
mdb_midl_free(txn->mt_spill_pgs);
|
|
3374
3111
|
|
|
3375
3112
|
mdb_midl_free(pghead);
|
|
3376
3113
|
}
|
|
3377
|
-
|
|
3378
|
-
if (!txn->mt_parent) {
|
|
3379
|
-
MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages;
|
|
3380
|
-
unsigned i, x, n = tl[0].mid;
|
|
3381
|
-
pthread_mutex_lock(&env->me_rpmutex);
|
|
3382
|
-
for (i = 1; i <= n; i++) {
|
|
3383
|
-
if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
|
|
3384
|
-
/* tmp overflow pages that we didn't share in env */
|
|
3385
|
-
munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
|
|
3386
|
-
} else {
|
|
3387
|
-
x = mdb_mid3l_search(el, tl[i].mid);
|
|
3388
|
-
if (tl[i].mptr == el[x].mptr) {
|
|
3389
|
-
el[x].mref--;
|
|
3390
|
-
} else {
|
|
3391
|
-
/* another tmp overflow page */
|
|
3392
|
-
munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
|
|
3393
|
-
}
|
|
3394
|
-
}
|
|
3395
|
-
}
|
|
3396
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
3397
|
-
tl[0].mid = 0;
|
|
3398
|
-
if (mode & MDB_END_FREE)
|
|
3399
|
-
free(tl);
|
|
3400
|
-
}
|
|
3401
|
-
#endif
|
|
3114
|
+
|
|
3402
3115
|
if (mode & MDB_END_FREE)
|
|
3403
3116
|
free(txn);
|
|
3404
3117
|
}
|
|
@@ -3416,23 +3129,27 @@ mdb_txn_reset(MDB_txn *txn)
|
|
|
3416
3129
|
mdb_txn_end(txn, MDB_END_RESET);
|
|
3417
3130
|
}
|
|
3418
3131
|
|
|
3419
|
-
void
|
|
3420
|
-
|
|
3132
|
+
static void
|
|
3133
|
+
_mdb_txn_abort(MDB_txn *txn)
|
|
3421
3134
|
{
|
|
3422
3135
|
if (txn == NULL)
|
|
3423
3136
|
return;
|
|
3424
3137
|
|
|
3425
3138
|
if (txn->mt_child)
|
|
3426
|
-
|
|
3139
|
+
_mdb_txn_abort(txn->mt_child);
|
|
3427
3140
|
|
|
3428
3141
|
mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE);
|
|
3429
3142
|
}
|
|
3430
3143
|
|
|
3144
|
+
void
|
|
3145
|
+
mdb_txn_abort(MDB_txn *txn)
|
|
3146
|
+
{
|
|
3147
|
+
MDB_TRACE(("%p", txn));
|
|
3148
|
+
_mdb_txn_abort(txn);
|
|
3149
|
+
}
|
|
3150
|
+
|
|
3431
3151
|
/** Save the freelist as of this transaction to the freeDB.
|
|
3432
3152
|
* This changes the freelist. Keep trying until it stabilizes.
|
|
3433
|
-
*
|
|
3434
|
-
* When (MDB_DEVEL) & 2, the changes do not affect #mdb_page_alloc(),
|
|
3435
|
-
* it then uses the transaction's original snapshot of the freeDB.
|
|
3436
3153
|
*/
|
|
3437
3154
|
static int
|
|
3438
3155
|
mdb_freelist_save(MDB_txn *txn)
|
|
@@ -3521,7 +3238,7 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
3521
3238
|
pglast = head_id = *(txnid_t *)key.mv_data;
|
|
3522
3239
|
total_room = head_room = 0;
|
|
3523
3240
|
mdb_tassert(txn, pglast <= env->me_pglast);
|
|
3524
|
-
rc =
|
|
3241
|
+
rc = _mdb_cursor_del(&mc, 0);
|
|
3525
3242
|
if (rc)
|
|
3526
3243
|
return rc;
|
|
3527
3244
|
}
|
|
@@ -3541,7 +3258,7 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
3541
3258
|
do {
|
|
3542
3259
|
freecnt = free_pgs[0];
|
|
3543
3260
|
data.mv_size = MDB_IDL_SIZEOF(free_pgs);
|
|
3544
|
-
rc =
|
|
3261
|
+
rc = _mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
|
|
3545
3262
|
if (rc)
|
|
3546
3263
|
return rc;
|
|
3547
3264
|
/* Retry if mt_free_pgs[] grew during the Put() */
|
|
@@ -3552,10 +3269,10 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
3552
3269
|
#if (MDB_DEBUG) > 1
|
|
3553
3270
|
{
|
|
3554
3271
|
unsigned int i = free_pgs[0];
|
|
3555
|
-
DPRINTF(("IDL write txn %"
|
|
3272
|
+
DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u",
|
|
3556
3273
|
txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i));
|
|
3557
3274
|
for (; i; i--)
|
|
3558
|
-
DPRINTF(("IDL %"
|
|
3275
|
+
DPRINTF(("IDL %"Z"u", free_pgs[i]));
|
|
3559
3276
|
}
|
|
3560
3277
|
#endif
|
|
3561
3278
|
continue;
|
|
@@ -3590,7 +3307,7 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
3590
3307
|
key.mv_size = sizeof(head_id);
|
|
3591
3308
|
key.mv_data = &head_id;
|
|
3592
3309
|
data.mv_size = (head_room + 1) * sizeof(pgno_t);
|
|
3593
|
-
rc =
|
|
3310
|
+
rc = _mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
|
|
3594
3311
|
if (rc)
|
|
3595
3312
|
return rc;
|
|
3596
3313
|
/* IDL is initially empty, zero out at least the length */
|
|
@@ -3645,7 +3362,7 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
3645
3362
|
data.mv_data = mop -= len;
|
|
3646
3363
|
save = mop[0];
|
|
3647
3364
|
mop[0] = len;
|
|
3648
|
-
rc =
|
|
3365
|
+
rc = _mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
|
|
3649
3366
|
mop[0] = save;
|
|
3650
3367
|
if (rc || !(mop_len -= len))
|
|
3651
3368
|
break;
|
|
@@ -3666,31 +3383,21 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
|
3666
3383
|
MDB_ID2L dl = txn->mt_u.dirty_list;
|
|
3667
3384
|
unsigned psize = env->me_psize, j;
|
|
3668
3385
|
int i, pagecount = dl[0].mid, rc;
|
|
3669
|
-
size_t size = 0;
|
|
3670
|
-
MDB_OFF_T pos = 0;
|
|
3386
|
+
size_t size = 0, pos = 0;
|
|
3671
3387
|
pgno_t pgno = 0;
|
|
3672
3388
|
MDB_page *dp = NULL;
|
|
3673
3389
|
#ifdef _WIN32
|
|
3674
|
-
OVERLAPPED
|
|
3675
|
-
MDB_page *wdp;
|
|
3676
|
-
int async_i = 0;
|
|
3677
|
-
HANDLE fd = (env->me_flags & MDB_NOSYNC) ? env->me_fd : env->me_ovfd;
|
|
3390
|
+
OVERLAPPED ov;
|
|
3678
3391
|
#else
|
|
3679
3392
|
struct iovec iov[MDB_COMMIT_PAGES];
|
|
3680
|
-
|
|
3681
|
-
|
|
3682
|
-
ssize_t wsize = 0, wres;
|
|
3683
|
-
MDB_OFF_T wpos = 0, next_pos = 1; /* impossible pos, so pos != next_pos */
|
|
3393
|
+
ssize_t wpos = 0, wsize = 0, wres;
|
|
3394
|
+
size_t next_pos = 1; /* impossible pos, so pos != next_pos */
|
|
3684
3395
|
int n = 0;
|
|
3396
|
+
#endif
|
|
3685
3397
|
|
|
3686
3398
|
j = i = keep;
|
|
3687
|
-
|
|
3688
|
-
|
|
3689
|
-
/* In windows, we still do writes to the file (with write-through enabled in sync mode),
|
|
3690
|
-
* as this is faster than FlushViewOfFile/FlushFileBuffers */
|
|
3691
|
-
&& (env->me_flags & MDB_NOSYNC)
|
|
3692
|
-
#endif
|
|
3693
|
-
) {
|
|
3399
|
+
|
|
3400
|
+
if (env->me_flags & MDB_WRITEMAP) {
|
|
3694
3401
|
/* Clear dirty flags */
|
|
3695
3402
|
while (++i <= pagecount) {
|
|
3696
3403
|
dp = dl[i].mptr;
|
|
@@ -3705,27 +3412,6 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
|
3705
3412
|
goto done;
|
|
3706
3413
|
}
|
|
3707
3414
|
|
|
3708
|
-
#ifdef _WIN32
|
|
3709
|
-
if (pagecount - keep >= env->ovs) {
|
|
3710
|
-
/* ran out of room in ov array, and re-malloc, copy handles and free previous */
|
|
3711
|
-
int ovs = (pagecount - keep) * 1.5; /* provide extra padding to reduce number of re-allocations */
|
|
3712
|
-
int new_size = ovs * sizeof(OVERLAPPED);
|
|
3713
|
-
ov = malloc(new_size);
|
|
3714
|
-
if (ov == NULL)
|
|
3715
|
-
return ENOMEM;
|
|
3716
|
-
int previous_size = env->ovs * sizeof(OVERLAPPED);
|
|
3717
|
-
memcpy(ov, env->ov, previous_size); /* Copy previous OVERLAPPED data to retain event handles */
|
|
3718
|
-
/* And clear rest of memory */
|
|
3719
|
-
memset(&ov[env->ovs], 0, new_size - previous_size);
|
|
3720
|
-
if (env->ovs > 0) {
|
|
3721
|
-
free(env->ov); /* release previous allocation */
|
|
3722
|
-
}
|
|
3723
|
-
|
|
3724
|
-
env->ov = ov;
|
|
3725
|
-
env->ovs = ovs;
|
|
3726
|
-
}
|
|
3727
|
-
#endif
|
|
3728
|
-
|
|
3729
3415
|
/* Write the pages */
|
|
3730
3416
|
for (;;) {
|
|
3731
3417
|
if (++i <= pagecount) {
|
|
@@ -3743,65 +3429,46 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
|
3743
3429
|
size = psize;
|
|
3744
3430
|
if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
|
|
3745
3431
|
}
|
|
3746
|
-
/* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
|
|
3747
|
-
if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE
|
|
3748
3432
|
#ifdef _WIN32
|
|
3749
|
-
|
|
3750
|
-
|
|
3751
|
-
|
|
3752
|
-
|
|
3753
|
-
|
|
3754
|
-
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
3433
|
+
else break;
|
|
3434
|
+
|
|
3435
|
+
/* Windows actually supports scatter/gather I/O, but only on
|
|
3436
|
+
* unbuffered file handles. Since we're relying on the OS page
|
|
3437
|
+
* cache for all our data, that's self-defeating. So we just
|
|
3438
|
+
* write pages one at a time. We use the ov structure to set
|
|
3439
|
+
* the write offset, to at least save the overhead of a Seek
|
|
3440
|
+
* system call.
|
|
3441
|
+
*/
|
|
3442
|
+
DPRINTF(("committing page %"Z"u", pgno));
|
|
3443
|
+
memset(&ov, 0, sizeof(ov));
|
|
3444
|
+
ov.Offset = pos & 0xffffffff;
|
|
3445
|
+
ov.OffsetHigh = pos >> 16 >> 16;
|
|
3446
|
+
if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
|
|
3447
|
+
rc = ErrCode();
|
|
3448
|
+
DPRINTF(("WriteFile: %d", rc));
|
|
3449
|
+
return rc;
|
|
3450
|
+
}
|
|
3451
|
+
#else
|
|
3452
|
+
/* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
|
|
3453
|
+
if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
|
|
3762
3454
|
if (n) {
|
|
3763
3455
|
retry_write:
|
|
3764
3456
|
/* Write previous page(s) */
|
|
3765
|
-
DPRINTF(("committing page %"Z"u", pgno));
|
|
3766
|
-
#ifdef _WIN32
|
|
3767
|
-
OVERLAPPED *this_ov = &ov[async_i];
|
|
3768
|
-
/* Clear status, and keep hEvent, we reuse that */
|
|
3769
|
-
this_ov->Internal = 0;
|
|
3770
|
-
this_ov->Offset = wpos & 0xffffffff;
|
|
3771
|
-
this_ov->OffsetHigh = wpos >> 16 >> 16;
|
|
3772
|
-
if (!F_ISSET(env->me_flags, MDB_NOSYNC) && !this_ov->hEvent) {
|
|
3773
|
-
HANDLE event = CreateEvent(NULL, FALSE, FALSE, NULL);
|
|
3774
|
-
if (!event) {
|
|
3775
|
-
rc = ErrCode();
|
|
3776
|
-
DPRINTF(("CreateEvent: %s", strerror(rc)));
|
|
3777
|
-
return rc;
|
|
3778
|
-
}
|
|
3779
|
-
this_ov->hEvent = event;
|
|
3780
|
-
}
|
|
3781
|
-
if (!WriteFile(fd, wdp, wsize, NULL, this_ov)) {
|
|
3782
|
-
rc = ErrCode();
|
|
3783
|
-
if (rc != ERROR_IO_PENDING) {
|
|
3784
|
-
DPRINTF(("WriteFile: %d", rc));
|
|
3785
|
-
return rc;
|
|
3786
|
-
}
|
|
3787
|
-
}
|
|
3788
|
-
async_i++;
|
|
3789
|
-
#else
|
|
3790
3457
|
#ifdef MDB_USE_PWRITEV
|
|
3791
|
-
wres = pwritev(
|
|
3458
|
+
wres = pwritev(env->me_fd, iov, n, wpos);
|
|
3792
3459
|
#else
|
|
3793
3460
|
if (n == 1) {
|
|
3794
|
-
wres = pwrite(
|
|
3461
|
+
wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
|
|
3795
3462
|
} else {
|
|
3796
3463
|
retry_seek:
|
|
3797
|
-
if (lseek(
|
|
3464
|
+
if (lseek(env->me_fd, wpos, SEEK_SET) == -1) {
|
|
3798
3465
|
rc = ErrCode();
|
|
3799
3466
|
if (rc == EINTR)
|
|
3800
3467
|
goto retry_seek;
|
|
3801
3468
|
DPRINTF(("lseek: %s", strerror(rc)));
|
|
3802
3469
|
return rc;
|
|
3803
3470
|
}
|
|
3804
|
-
wres = writev(
|
|
3471
|
+
wres = writev(env->me_fd, iov, n);
|
|
3805
3472
|
}
|
|
3806
3473
|
#endif
|
|
3807
3474
|
if (wres != wsize) {
|
|
@@ -3816,69 +3483,37 @@ retry_seek:
|
|
|
3816
3483
|
}
|
|
3817
3484
|
return rc;
|
|
3818
3485
|
}
|
|
3819
|
-
#endif /* _WIN32 */
|
|
3820
3486
|
n = 0;
|
|
3821
3487
|
}
|
|
3822
3488
|
if (i > pagecount)
|
|
3823
3489
|
break;
|
|
3824
3490
|
wpos = pos;
|
|
3825
3491
|
wsize = 0;
|
|
3826
|
-
#ifdef _WIN32
|
|
3827
|
-
wdp = dp;
|
|
3828
|
-
}
|
|
3829
|
-
#else
|
|
3830
3492
|
}
|
|
3493
|
+
DPRINTF(("committing page %"Z"u", pgno));
|
|
3494
|
+
next_pos = pos + size;
|
|
3831
3495
|
iov[n].iov_len = size;
|
|
3832
3496
|
iov[n].iov_base = (char *)dp;
|
|
3833
|
-
#endif /* _WIN32 */
|
|
3834
|
-
DPRINTF(("committing page %"Yu, pgno));
|
|
3835
|
-
next_pos = pos + size;
|
|
3836
3497
|
wsize += size;
|
|
3837
3498
|
n++;
|
|
3838
|
-
}
|
|
3839
|
-
#ifdef MDB_VL32
|
|
3840
|
-
if (pgno > txn->mt_last_pgno)
|
|
3841
|
-
txn->mt_last_pgno = pgno;
|
|
3842
|
-
#endif
|
|
3843
|
-
|
|
3844
|
-
#ifdef _WIN32
|
|
3845
|
-
if (!F_ISSET(env->me_flags, MDB_NOSYNC)) {
|
|
3846
|
-
/* Now wait for all the asynchronous/overlapped sync/write-through writes to complete.
|
|
3847
|
-
* We start with the last one so that all the others should already be complete and
|
|
3848
|
-
* we reduce thread suspend/resuming (in practice, typically about 99.5% of writes are
|
|
3849
|
-
* done after the last write is done) */
|
|
3850
|
-
rc = 0;
|
|
3851
|
-
while (--async_i >= 0) {
|
|
3852
|
-
if (ov[async_i].hEvent) {
|
|
3853
|
-
if (!GetOverlappedResult(fd, &ov[async_i], &wres, TRUE)) {
|
|
3854
|
-
rc = ErrCode(); /* Continue on so that all the event signals are reset */
|
|
3855
|
-
}
|
|
3856
|
-
}
|
|
3857
|
-
}
|
|
3858
|
-
if (rc) { /* any error on GetOverlappedResult, exit now */
|
|
3859
|
-
return rc;
|
|
3860
|
-
}
|
|
3861
|
-
}
|
|
3862
3499
|
#endif /* _WIN32 */
|
|
3500
|
+
}
|
|
3863
3501
|
|
|
3864
|
-
|
|
3865
|
-
|
|
3866
|
-
|
|
3867
|
-
|
|
3868
|
-
|
|
3869
|
-
*/
|
|
3870
|
-
CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
|
|
3502
|
+
/* MIPS has cache coherency issues, this is a no-op everywhere else
|
|
3503
|
+
* Note: for any size >= on-chip cache size, entire on-chip cache is
|
|
3504
|
+
* flushed.
|
|
3505
|
+
*/
|
|
3506
|
+
CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
|
|
3871
3507
|
|
|
3872
|
-
|
|
3873
|
-
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
|
|
3877
|
-
|
|
3878
|
-
|
|
3879
|
-
}
|
|
3880
|
-
mdb_dpage_free(env, dp);
|
|
3508
|
+
for (i = keep; ++i <= pagecount; ) {
|
|
3509
|
+
dp = dl[i].mptr;
|
|
3510
|
+
/* This is a page we skipped above */
|
|
3511
|
+
if (!dl[i].mid) {
|
|
3512
|
+
dl[++j] = dl[i];
|
|
3513
|
+
dl[j].mid = dp->mp_pgno;
|
|
3514
|
+
continue;
|
|
3881
3515
|
}
|
|
3516
|
+
mdb_dpage_free(env, dp);
|
|
3882
3517
|
}
|
|
3883
3518
|
|
|
3884
3519
|
done:
|
|
@@ -3888,10 +3523,8 @@ done:
|
|
|
3888
3523
|
return MDB_SUCCESS;
|
|
3889
3524
|
}
|
|
3890
3525
|
|
|
3891
|
-
static int
|
|
3892
|
-
|
|
3893
|
-
int
|
|
3894
|
-
mdb_txn_commit(MDB_txn *txn)
|
|
3526
|
+
static int
|
|
3527
|
+
_mdb_txn_commit(MDB_txn *txn)
|
|
3895
3528
|
{
|
|
3896
3529
|
int rc;
|
|
3897
3530
|
unsigned int i, end_mode;
|
|
@@ -3904,7 +3537,7 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
3904
3537
|
end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE;
|
|
3905
3538
|
|
|
3906
3539
|
if (txn->mt_child) {
|
|
3907
|
-
rc =
|
|
3540
|
+
rc = _mdb_txn_commit(txn->mt_child);
|
|
3908
3541
|
if (rc)
|
|
3909
3542
|
goto fail;
|
|
3910
3543
|
}
|
|
@@ -4066,7 +3699,7 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
4066
3699
|
!(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS)))
|
|
4067
3700
|
goto done;
|
|
4068
3701
|
|
|
4069
|
-
DPRINTF(("committing txn %"
|
|
3702
|
+
DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u",
|
|
4070
3703
|
txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root));
|
|
4071
3704
|
|
|
4072
3705
|
/* Update DB root pointers */
|
|
@@ -4084,7 +3717,7 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
4084
3717
|
goto fail;
|
|
4085
3718
|
}
|
|
4086
3719
|
data.mv_data = &txn->mt_dbs[i];
|
|
4087
|
-
rc =
|
|
3720
|
+
rc = _mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data,
|
|
4088
3721
|
F_SUBDATA);
|
|
4089
3722
|
if (rc)
|
|
4090
3723
|
goto fail;
|
|
@@ -4104,42 +3737,36 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
4104
3737
|
mdb_audit(txn);
|
|
4105
3738
|
#endif
|
|
4106
3739
|
|
|
4107
|
-
if ((rc = mdb_page_flush(txn, 0))
|
|
4108
|
-
|
|
4109
|
-
|
|
4110
|
-
(rc = mdb_env_sync0(env, 0, txn->mt_next_pgno)))
|
|
4111
|
-
goto fail;
|
|
4112
|
-
if ((rc = mdb_env_write_meta(txn)))
|
|
3740
|
+
if ((rc = mdb_page_flush(txn, 0)) ||
|
|
3741
|
+
(rc = mdb_env_sync(env, 0)) ||
|
|
3742
|
+
(rc = mdb_env_write_meta(txn)))
|
|
4113
3743
|
goto fail;
|
|
4114
3744
|
end_mode = MDB_END_COMMITTED|MDB_END_UPDATE;
|
|
4115
|
-
if (env->me_flags & MDB_PREVSNAPSHOT) {
|
|
4116
|
-
if (!(env->me_flags & MDB_NOLOCK)) {
|
|
4117
|
-
int excl;
|
|
4118
|
-
rc = mdb_env_share_locks(env, &excl);
|
|
4119
|
-
if (rc)
|
|
4120
|
-
goto fail;
|
|
4121
|
-
}
|
|
4122
|
-
env->me_flags ^= MDB_PREVSNAPSHOT;
|
|
4123
|
-
}
|
|
4124
3745
|
|
|
4125
3746
|
done:
|
|
4126
3747
|
mdb_txn_end(txn, end_mode);
|
|
4127
3748
|
return MDB_SUCCESS;
|
|
4128
3749
|
|
|
4129
3750
|
fail:
|
|
4130
|
-
|
|
3751
|
+
_mdb_txn_abort(txn);
|
|
4131
3752
|
return rc;
|
|
4132
3753
|
}
|
|
4133
3754
|
|
|
3755
|
+
int
|
|
3756
|
+
mdb_txn_commit(MDB_txn *txn)
|
|
3757
|
+
{
|
|
3758
|
+
MDB_TRACE(("%p", txn));
|
|
3759
|
+
return _mdb_txn_commit(txn);
|
|
3760
|
+
}
|
|
3761
|
+
|
|
4134
3762
|
/** Read the environment parameters of a DB environment before
|
|
4135
3763
|
* mapping it into memory.
|
|
4136
3764
|
* @param[in] env the environment handle
|
|
4137
|
-
* @param[in] prev whether to read the backup meta page
|
|
4138
3765
|
* @param[out] meta address of where to store the meta information
|
|
4139
3766
|
* @return 0 on success, non-zero on failure.
|
|
4140
3767
|
*/
|
|
4141
3768
|
static int ESECT
|
|
4142
|
-
mdb_env_read_header(MDB_env *env,
|
|
3769
|
+
mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
|
4143
3770
|
{
|
|
4144
3771
|
MDB_metabuf pbuf;
|
|
4145
3772
|
MDB_page *p;
|
|
@@ -4174,7 +3801,7 @@ mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta)
|
|
|
4174
3801
|
p = (MDB_page *)&pbuf;
|
|
4175
3802
|
|
|
4176
3803
|
if (!F_ISSET(p->mp_flags, P_META)) {
|
|
4177
|
-
DPRINTF(("page %"
|
|
3804
|
+
DPRINTF(("page %"Z"u not a meta page", p->mp_pgno));
|
|
4178
3805
|
return MDB_INVALID;
|
|
4179
3806
|
}
|
|
4180
3807
|
|
|
@@ -4190,7 +3817,7 @@ mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta)
|
|
|
4190
3817
|
return MDB_VERSION_MISMATCH;
|
|
4191
3818
|
}
|
|
4192
3819
|
|
|
4193
|
-
if (off == 0 ||
|
|
3820
|
+
if (off == 0 || m->mm_txnid > meta->mm_txnid)
|
|
4194
3821
|
*meta = *m;
|
|
4195
3822
|
}
|
|
4196
3823
|
return 0;
|
|
@@ -4236,6 +3863,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
|
|
4236
3863
|
if (len == -1 && ErrCode() == EINTR) continue; \
|
|
4237
3864
|
rc = (len >= 0); break; } while(1)
|
|
4238
3865
|
#endif
|
|
3866
|
+
|
|
4239
3867
|
DPUTS("writing new meta page");
|
|
4240
3868
|
|
|
4241
3869
|
psize = env->me_psize;
|
|
@@ -4243,6 +3871,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
|
|
4243
3871
|
p = calloc(NUM_METAS, psize);
|
|
4244
3872
|
if (!p)
|
|
4245
3873
|
return ENOMEM;
|
|
3874
|
+
|
|
4246
3875
|
p->mp_pgno = 0;
|
|
4247
3876
|
p->mp_flags = P_META;
|
|
4248
3877
|
*(MDB_meta *)METADATA(p) = *meta;
|
|
@@ -4273,8 +3902,8 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
4273
3902
|
MDB_env *env;
|
|
4274
3903
|
MDB_meta meta, metab, *mp;
|
|
4275
3904
|
unsigned flags;
|
|
4276
|
-
|
|
4277
|
-
|
|
3905
|
+
size_t mapsize;
|
|
3906
|
+
off_t off;
|
|
4278
3907
|
int rc, len, toggle;
|
|
4279
3908
|
char *ptr;
|
|
4280
3909
|
HANDLE mfd;
|
|
@@ -4285,18 +3914,17 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
4285
3914
|
#endif
|
|
4286
3915
|
|
|
4287
3916
|
toggle = txn->mt_txnid & 1;
|
|
4288
|
-
DPRINTF(("writing meta page %d for root page %"
|
|
3917
|
+
DPRINTF(("writing meta page %d for root page %"Z"u",
|
|
4289
3918
|
toggle, txn->mt_dbs[MAIN_DBI].md_root));
|
|
4290
3919
|
|
|
4291
3920
|
env = txn->mt_env;
|
|
4292
|
-
flags =
|
|
3921
|
+
flags = env->me_flags;
|
|
4293
3922
|
mp = env->me_metas[toggle];
|
|
4294
3923
|
mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
|
|
4295
3924
|
/* Persist any increases of mapsize config */
|
|
4296
3925
|
if (mapsize < env->me_mapsize)
|
|
4297
3926
|
mapsize = env->me_mapsize;
|
|
4298
3927
|
|
|
4299
|
-
#ifndef _WIN32 /* We don't want to ever use MSYNC/FlushViewOfFile in Windows */
|
|
4300
3928
|
if (flags & MDB_WRITEMAP) {
|
|
4301
3929
|
mp->mm_mapsize = mapsize;
|
|
4302
3930
|
mp->mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI];
|
|
@@ -4312,10 +3940,11 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
4312
3940
|
unsigned meta_size = env->me_psize;
|
|
4313
3941
|
rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
|
|
4314
3942
|
ptr = (char *)mp - PAGEHDRSZ;
|
|
4315
|
-
|
|
3943
|
+
#ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */
|
|
4316
3944
|
r2 = (ptr - env->me_map) & (env->me_os_psize - 1);
|
|
4317
3945
|
ptr -= r2;
|
|
4318
3946
|
meta_size += r2;
|
|
3947
|
+
#endif
|
|
4319
3948
|
if (MDB_MSYNC(ptr, meta_size, rc)) {
|
|
4320
3949
|
rc = ErrCode();
|
|
4321
3950
|
goto fail;
|
|
@@ -4323,7 +3952,6 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
4323
3952
|
}
|
|
4324
3953
|
goto done;
|
|
4325
3954
|
}
|
|
4326
|
-
#endif
|
|
4327
3955
|
metab.mm_txnid = mp->mm_txnid;
|
|
4328
3956
|
metab.mm_last_pg = mp->mm_last_pg;
|
|
4329
3957
|
|
|
@@ -4402,8 +4030,7 @@ static MDB_meta *
|
|
|
4402
4030
|
mdb_env_pick_meta(const MDB_env *env)
|
|
4403
4031
|
{
|
|
4404
4032
|
MDB_meta *const *metas = env->me_metas;
|
|
4405
|
-
return metas[
|
|
4406
|
-
((env->me_flags & MDB_PREVSNAPSHOT) != 0) ];
|
|
4033
|
+
return metas[ metas[0]->mm_txnid < metas[1]->mm_txnid ];
|
|
4407
4034
|
}
|
|
4408
4035
|
|
|
4409
4036
|
int ESECT
|
|
@@ -4423,30 +4050,15 @@ mdb_env_create(MDB_env **env)
|
|
|
4423
4050
|
#ifdef MDB_USE_POSIX_SEM
|
|
4424
4051
|
e->me_rmutex = SEM_FAILED;
|
|
4425
4052
|
e->me_wmutex = SEM_FAILED;
|
|
4426
|
-
#elif defined MDB_USE_SYSV_SEM
|
|
4427
|
-
e->me_rmutex->semid = -1;
|
|
4428
|
-
e->me_wmutex->semid = -1;
|
|
4429
4053
|
#endif
|
|
4430
4054
|
e->me_pid = getpid();
|
|
4431
4055
|
GET_PAGESIZE(e->me_os_psize);
|
|
4432
4056
|
VGMEMP_CREATE(e,0,0);
|
|
4433
4057
|
*env = e;
|
|
4058
|
+
MDB_TRACE(("%p", e));
|
|
4434
4059
|
return MDB_SUCCESS;
|
|
4435
4060
|
}
|
|
4436
4061
|
|
|
4437
|
-
#ifdef _WIN32
|
|
4438
|
-
/** @brief Map a result from an NTAPI call to WIN32. */
|
|
4439
|
-
static DWORD
|
|
4440
|
-
mdb_nt2win32(NTSTATUS st)
|
|
4441
|
-
{
|
|
4442
|
-
OVERLAPPED o = {0};
|
|
4443
|
-
DWORD br;
|
|
4444
|
-
o.Internal = st;
|
|
4445
|
-
GetOverlappedResult(NULL, &o, &br, FALSE);
|
|
4446
|
-
return GetLastError();
|
|
4447
|
-
}
|
|
4448
|
-
#endif
|
|
4449
|
-
|
|
4450
4062
|
static int ESECT
|
|
4451
4063
|
mdb_env_map(MDB_env *env, void *addr)
|
|
4452
4064
|
{
|
|
@@ -4454,54 +4066,42 @@ mdb_env_map(MDB_env *env, void *addr)
|
|
|
4454
4066
|
unsigned int flags = env->me_flags;
|
|
4455
4067
|
#ifdef _WIN32
|
|
4456
4068
|
int rc;
|
|
4457
|
-
int access = SECTION_MAP_READ;
|
|
4458
4069
|
HANDLE mh;
|
|
4459
|
-
|
|
4460
|
-
|
|
4461
|
-
ULONG pageprot = PAGE_READONLY, secprot, alloctype;
|
|
4070
|
+
LONG sizelo, sizehi;
|
|
4071
|
+
size_t msize;
|
|
4462
4072
|
|
|
4463
|
-
if (flags & MDB_WRITEMAP) {
|
|
4464
|
-
access |= SECTION_MAP_WRITE;
|
|
4465
|
-
pageprot = PAGE_READWRITE;
|
|
4466
|
-
}
|
|
4467
4073
|
if (flags & MDB_RDONLY) {
|
|
4468
|
-
|
|
4074
|
+
/* Don't set explicit map size, use whatever exists */
|
|
4469
4075
|
msize = 0;
|
|
4470
|
-
|
|
4076
|
+
sizelo = 0;
|
|
4077
|
+
sizehi = 0;
|
|
4471
4078
|
} else {
|
|
4472
|
-
secprot = PAGE_READWRITE;
|
|
4473
4079
|
msize = env->me_mapsize;
|
|
4474
|
-
|
|
4080
|
+
sizelo = msize & 0xffffffff;
|
|
4081
|
+
sizehi = msize >> 16 >> 16; /* only needed on Win64 */
|
|
4082
|
+
|
|
4083
|
+
/* Windows won't create mappings for zero length files.
|
|
4084
|
+
* and won't map more than the file size.
|
|
4085
|
+
* Just set the maxsize right now.
|
|
4086
|
+
*/
|
|
4087
|
+
if (!(flags & MDB_WRITEMAP) && (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
|
|
4088
|
+
|| !SetEndOfFile(env->me_fd)
|
|
4089
|
+
|| SetFilePointer(env->me_fd, 0, NULL, 0) != 0))
|
|
4090
|
+
return ErrCode();
|
|
4475
4091
|
}
|
|
4476
4092
|
|
|
4477
|
-
|
|
4478
|
-
|
|
4479
|
-
|
|
4480
|
-
|
|
4481
|
-
|
|
4482
|
-
|
|
4483
|
-
|
|
4484
|
-
|
|
4485
|
-
|
|
4486
|
-
|
|
4487
|
-
#else
|
|
4488
|
-
rc = NtCreateSection(&mh, access, NULL, NULL, secprot, SEC_RESERVE, env->me_fd);
|
|
4489
|
-
#endif
|
|
4490
|
-
if (rc)
|
|
4491
|
-
return mdb_nt2win32(rc);
|
|
4492
|
-
map = addr;
|
|
4493
|
-
#ifdef MDB_VL32
|
|
4494
|
-
msize = NUM_METAS * env->me_psize;
|
|
4495
|
-
#endif
|
|
4496
|
-
rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot);
|
|
4497
|
-
#ifdef MDB_VL32
|
|
4498
|
-
env->me_fmh = mh;
|
|
4499
|
-
#else
|
|
4500
|
-
NtClose(mh);
|
|
4501
|
-
#endif
|
|
4093
|
+
mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
|
|
4094
|
+
PAGE_READWRITE : PAGE_READONLY,
|
|
4095
|
+
sizehi, sizelo, NULL);
|
|
4096
|
+
if (!mh)
|
|
4097
|
+
return ErrCode();
|
|
4098
|
+
env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
|
|
4099
|
+
FILE_MAP_WRITE : FILE_MAP_READ,
|
|
4100
|
+
0, 0, msize, addr);
|
|
4101
|
+
rc = env->me_map ? 0 : ErrCode();
|
|
4102
|
+
CloseHandle(mh);
|
|
4502
4103
|
if (rc)
|
|
4503
|
-
return
|
|
4504
|
-
env->me_map = map;
|
|
4104
|
+
return rc;
|
|
4505
4105
|
#else
|
|
4506
4106
|
int mmap_flags = MAP_SHARED;
|
|
4507
4107
|
int prot = PROT_READ;
|
|
@@ -4509,15 +4109,6 @@ mdb_env_map(MDB_env *env, void *addr)
|
|
|
4509
4109
|
if (flags & MDB_NOSYNC)
|
|
4510
4110
|
mmap_flags |= MAP_NOSYNC;
|
|
4511
4111
|
#endif
|
|
4512
|
-
#ifdef MDB_VL32
|
|
4513
|
-
(void) flags;
|
|
4514
|
-
env->me_map = mmap(addr, NUM_METAS * env->me_psize, prot, mmap_flags,
|
|
4515
|
-
env->me_fd, 0);
|
|
4516
|
-
if (env->me_map == MAP_FAILED) {
|
|
4517
|
-
env->me_map = NULL;
|
|
4518
|
-
return ErrCode();
|
|
4519
|
-
}
|
|
4520
|
-
#else
|
|
4521
4112
|
if (flags & MDB_WRITEMAP) {
|
|
4522
4113
|
prot |= PROT_WRITE;
|
|
4523
4114
|
if (ftruncate(env->me_fd, env->me_mapsize) < 0)
|
|
@@ -4549,7 +4140,6 @@ mdb_env_map(MDB_env *env, void *addr)
|
|
|
4549
4140
|
*/
|
|
4550
4141
|
if (addr && env->me_map != addr)
|
|
4551
4142
|
return EBUSY; /* TODO: Make a new MDB_* error code? */
|
|
4552
|
-
#endif
|
|
4553
4143
|
|
|
4554
4144
|
p = (MDB_page *)env->me_map;
|
|
4555
4145
|
env->me_metas[0] = METADATA(p);
|
|
@@ -4559,17 +4149,15 @@ mdb_env_map(MDB_env *env, void *addr)
|
|
|
4559
4149
|
}
|
|
4560
4150
|
|
|
4561
4151
|
int ESECT
|
|
4562
|
-
mdb_env_set_mapsize(MDB_env *env,
|
|
4152
|
+
mdb_env_set_mapsize(MDB_env *env, size_t size)
|
|
4563
4153
|
{
|
|
4564
4154
|
/* If env is already open, caller is responsible for making
|
|
4565
4155
|
* sure there are no active txns.
|
|
4566
4156
|
*/
|
|
4567
4157
|
if (env->me_map) {
|
|
4158
|
+
int rc;
|
|
4568
4159
|
MDB_meta *meta;
|
|
4569
|
-
#ifndef MDB_VL32
|
|
4570
4160
|
void *old;
|
|
4571
|
-
int rc;
|
|
4572
|
-
#endif
|
|
4573
4161
|
if (env->me_txn)
|
|
4574
4162
|
return EINVAL;
|
|
4575
4163
|
meta = mdb_env_pick_meta(env);
|
|
@@ -4577,25 +4165,21 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size)
|
|
|
4577
4165
|
size = meta->mm_mapsize;
|
|
4578
4166
|
{
|
|
4579
4167
|
/* Silently round up to minimum if the size is too small */
|
|
4580
|
-
|
|
4168
|
+
size_t minsize = (meta->mm_last_pg + 1) * env->me_psize;
|
|
4581
4169
|
if (size < minsize)
|
|
4582
4170
|
size = minsize;
|
|
4583
4171
|
}
|
|
4584
|
-
#ifndef MDB_VL32
|
|
4585
|
-
/* For MDB_VL32 this bit is a noop since we dynamically remap
|
|
4586
|
-
* chunks of the DB anyway.
|
|
4587
|
-
*/
|
|
4588
4172
|
munmap(env->me_map, env->me_mapsize);
|
|
4589
4173
|
env->me_mapsize = size;
|
|
4590
4174
|
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
|
|
4591
4175
|
rc = mdb_env_map(env, old);
|
|
4592
4176
|
if (rc)
|
|
4593
4177
|
return rc;
|
|
4594
|
-
#endif /* !MDB_VL32 */
|
|
4595
4178
|
}
|
|
4596
4179
|
env->me_mapsize = size;
|
|
4597
4180
|
if (env->me_psize)
|
|
4598
4181
|
env->me_maxpg = env->me_mapsize / env->me_psize;
|
|
4182
|
+
MDB_TRACE(("%p, %"Yu"", env, size));
|
|
4599
4183
|
return MDB_SUCCESS;
|
|
4600
4184
|
}
|
|
4601
4185
|
|
|
@@ -4605,6 +4189,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
|
|
|
4605
4189
|
if (env->me_map)
|
|
4606
4190
|
return EINVAL;
|
|
4607
4191
|
env->me_maxdbs = dbs + CORE_DBS;
|
|
4192
|
+
MDB_TRACE(("%p, %u", env, dbs));
|
|
4608
4193
|
return MDB_SUCCESS;
|
|
4609
4194
|
}
|
|
4610
4195
|
|
|
@@ -4614,6 +4199,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
|
|
|
4614
4199
|
if (env->me_map || readers < 1)
|
|
4615
4200
|
return EINVAL;
|
|
4616
4201
|
env->me_maxreaders = readers;
|
|
4202
|
+
MDB_TRACE(("%p, %u", env, readers));
|
|
4617
4203
|
return MDB_SUCCESS;
|
|
4618
4204
|
}
|
|
4619
4205
|
|
|
@@ -4627,7 +4213,7 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
|
|
|
4627
4213
|
}
|
|
4628
4214
|
|
|
4629
4215
|
static int ESECT
|
|
4630
|
-
mdb_fsize(HANDLE fd,
|
|
4216
|
+
mdb_fsize(HANDLE fd, size_t *size)
|
|
4631
4217
|
{
|
|
4632
4218
|
#ifdef _WIN32
|
|
4633
4219
|
LARGE_INTEGER fsize;
|
|
@@ -4716,7 +4302,7 @@ mdb_fname_init(const char *path, unsigned envflags, MDB_name *fname)
|
|
|
4716
4302
|
/** File type, access mode etc. for #mdb_fopen() */
|
|
4717
4303
|
enum mdb_fopen_type {
|
|
4718
4304
|
#ifdef _WIN32
|
|
4719
|
-
MDB_O_RDONLY, MDB_O_RDWR,
|
|
4305
|
+
MDB_O_RDONLY, MDB_O_RDWR, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS
|
|
4720
4306
|
#else
|
|
4721
4307
|
/* A comment in mdb_fopen() explains some O_* flag choices. */
|
|
4722
4308
|
MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */
|
|
@@ -4777,11 +4363,6 @@ mdb_fopen(const MDB_env *env, MDB_name *fname,
|
|
|
4777
4363
|
disp = OPEN_ALWAYS;
|
|
4778
4364
|
attrs = FILE_ATTRIBUTE_NORMAL;
|
|
4779
4365
|
switch (which) {
|
|
4780
|
-
case MDB_O_OVERLAPPED: /* for unbuffered asynchronous writes (write-through mode)*/
|
|
4781
|
-
acc = GENERIC_WRITE;
|
|
4782
|
-
disp = OPEN_EXISTING;
|
|
4783
|
-
attrs = FILE_FLAG_OVERLAPPED|FILE_FLAG_WRITE_THROUGH;
|
|
4784
|
-
break;
|
|
4785
4366
|
case MDB_O_RDONLY: /* read-only datafile */
|
|
4786
4367
|
acc = GENERIC_READ;
|
|
4787
4368
|
disp = OPEN_EXISTING;
|
|
@@ -4843,7 +4424,7 @@ mdb_fopen(const MDB_env *env, MDB_name *fname,
|
|
|
4843
4424
|
/** Further setup required for opening an LMDB environment
|
|
4844
4425
|
*/
|
|
4845
4426
|
static int ESECT
|
|
4846
|
-
mdb_env_open2(MDB_env *env
|
|
4427
|
+
mdb_env_open2(MDB_env *env)
|
|
4847
4428
|
{
|
|
4848
4429
|
unsigned int flags = env->me_flags;
|
|
4849
4430
|
int i, newenv = 0, rc;
|
|
@@ -4856,22 +4437,6 @@ mdb_env_open2(MDB_env *env, int prev)
|
|
|
4856
4437
|
env->me_pidquery = MDB_PROCESS_QUERY_LIMITED_INFORMATION;
|
|
4857
4438
|
else
|
|
4858
4439
|
env->me_pidquery = PROCESS_QUERY_INFORMATION;
|
|
4859
|
-
/* Grab functions we need from NTDLL */
|
|
4860
|
-
if (!NtCreateSection) {
|
|
4861
|
-
HMODULE h = GetModuleHandleW(L"NTDLL.DLL");
|
|
4862
|
-
if (!h)
|
|
4863
|
-
return MDB_PROBLEM;
|
|
4864
|
-
NtClose = (NtCloseFunc *)GetProcAddress(h, "NtClose");
|
|
4865
|
-
if (!NtClose)
|
|
4866
|
-
return MDB_PROBLEM;
|
|
4867
|
-
NtMapViewOfSection = (NtMapViewOfSectionFunc *)GetProcAddress(h, "NtMapViewOfSection");
|
|
4868
|
-
if (!NtMapViewOfSection)
|
|
4869
|
-
return MDB_PROBLEM;
|
|
4870
|
-
NtCreateSection = (NtCreateSectionFunc *)GetProcAddress(h, "NtCreateSection");
|
|
4871
|
-
if (!NtCreateSection)
|
|
4872
|
-
return MDB_PROBLEM;
|
|
4873
|
-
}
|
|
4874
|
-
env->ovs = 0;
|
|
4875
4440
|
#endif /* _WIN32 */
|
|
4876
4441
|
|
|
4877
4442
|
#ifdef BROKEN_FDATASYNC
|
|
@@ -4922,7 +4487,7 @@ mdb_env_open2(MDB_env *env, int prev)
|
|
|
4922
4487
|
}
|
|
4923
4488
|
#endif
|
|
4924
4489
|
|
|
4925
|
-
if ((i = mdb_env_read_header(env,
|
|
4490
|
+
if ((i = mdb_env_read_header(env, &meta)) != 0) {
|
|
4926
4491
|
if (i != ENOENT)
|
|
4927
4492
|
return i;
|
|
4928
4493
|
DPUTS("new mdbenv");
|
|
@@ -4945,7 +4510,7 @@ mdb_env_open2(MDB_env *env, int prev)
|
|
|
4945
4510
|
/* Make sure mapsize >= committed data size. Even when using
|
|
4946
4511
|
* mm_mapsize, which could be broken in old files (ITS#7789).
|
|
4947
4512
|
*/
|
|
4948
|
-
|
|
4513
|
+
size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize;
|
|
4949
4514
|
if (env->me_mapsize < minsize)
|
|
4950
4515
|
env->me_mapsize = minsize;
|
|
4951
4516
|
}
|
|
@@ -4964,18 +4529,6 @@ mdb_env_open2(MDB_env *env, int prev)
|
|
|
4964
4529
|
return rc;
|
|
4965
4530
|
newenv = 0;
|
|
4966
4531
|
}
|
|
4967
|
-
#ifdef _WIN32
|
|
4968
|
-
/* For FIXEDMAP, make sure the file is non-empty before we attempt to map it */
|
|
4969
|
-
if (newenv) {
|
|
4970
|
-
char dummy = 0;
|
|
4971
|
-
DWORD len;
|
|
4972
|
-
rc = WriteFile(env->me_fd, &dummy, 1, &len, NULL);
|
|
4973
|
-
if (!rc) {
|
|
4974
|
-
rc = ErrCode();
|
|
4975
|
-
return rc;
|
|
4976
|
-
}
|
|
4977
|
-
}
|
|
4978
|
-
#endif
|
|
4979
4532
|
|
|
4980
4533
|
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL);
|
|
4981
4534
|
if (rc)
|
|
@@ -5005,13 +4558,13 @@ mdb_env_open2(MDB_env *env, int prev)
|
|
|
5005
4558
|
|
|
5006
4559
|
DPRINTF(("opened database version %u, pagesize %u",
|
|
5007
4560
|
meta->mm_version, env->me_psize));
|
|
5008
|
-
DPRINTF(("using meta page %d",
|
|
5009
|
-
DPRINTF(("depth: %u",
|
|
5010
|
-
DPRINTF(("entries: %"
|
|
5011
|
-
DPRINTF(("branch pages: %"
|
|
5012
|
-
DPRINTF(("leaf pages: %"
|
|
5013
|
-
DPRINTF(("overflow pages: %"
|
|
5014
|
-
DPRINTF(("root: %"
|
|
4561
|
+
DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1)));
|
|
4562
|
+
DPRINTF(("depth: %u", db->md_depth));
|
|
4563
|
+
DPRINTF(("entries: %"Z"u", db->md_entries));
|
|
4564
|
+
DPRINTF(("branch pages: %"Z"u", db->md_branch_pages));
|
|
4565
|
+
DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages));
|
|
4566
|
+
DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages));
|
|
4567
|
+
DPRINTF(("root: %"Z"u", db->md_root));
|
|
5015
4568
|
}
|
|
5016
4569
|
#endif
|
|
5017
4570
|
|
|
@@ -5207,21 +4760,32 @@ mdb_env_excl_lock(MDB_env *env, int *excl)
|
|
|
5207
4760
|
* Share and Enjoy! :-)
|
|
5208
4761
|
*/
|
|
5209
4762
|
|
|
4763
|
+
typedef unsigned long long mdb_hash_t;
|
|
4764
|
+
#define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL)
|
|
4765
|
+
|
|
5210
4766
|
/** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer
|
|
5211
4767
|
* @param[in] val value to hash
|
|
5212
|
-
* @param[in]
|
|
4768
|
+
* @param[in] hval initial value for hash
|
|
5213
4769
|
* @return 64 bit hash
|
|
4770
|
+
*
|
|
4771
|
+
* NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the
|
|
4772
|
+
* hval arg on the first call.
|
|
5214
4773
|
*/
|
|
5215
4774
|
static mdb_hash_t
|
|
5216
|
-
|
|
4775
|
+
mdb_hash_val(MDB_val *val, mdb_hash_t hval)
|
|
5217
4776
|
{
|
|
5218
|
-
|
|
5219
|
-
|
|
4777
|
+
unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */
|
|
4778
|
+
unsigned char *end = s + val->mv_size;
|
|
5220
4779
|
/*
|
|
5221
|
-
* FNV-1a hash each octet of the
|
|
4780
|
+
* FNV-1a hash each octet of the string
|
|
5222
4781
|
*/
|
|
5223
4782
|
while (s < end) {
|
|
5224
|
-
|
|
4783
|
+
/* xor the bottom with the current octet */
|
|
4784
|
+
hval ^= (mdb_hash_t)*s++;
|
|
4785
|
+
|
|
4786
|
+
/* multiply by the 64 bit FNV magic prime mod 2^64 */
|
|
4787
|
+
hval += (hval << 1) + (hval << 4) + (hval << 5) +
|
|
4788
|
+
(hval << 7) + (hval << 8) + (hval << 40);
|
|
5225
4789
|
}
|
|
5226
4790
|
/* return our new hash value */
|
|
5227
4791
|
return hval;
|
|
@@ -5238,33 +4802,25 @@ mdb_hash(const void *val, size_t len)
|
|
|
5238
4802
|
static const char mdb_a85[]= "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
|
|
5239
4803
|
|
|
5240
4804
|
static void ESECT
|
|
5241
|
-
mdb_pack85(unsigned long
|
|
4805
|
+
mdb_pack85(unsigned long l, char *out)
|
|
5242
4806
|
{
|
|
5243
4807
|
int i;
|
|
5244
4808
|
|
|
5245
|
-
for (i=0; i<
|
|
4809
|
+
for (i=0; i<5; i++) {
|
|
5246
4810
|
*out++ = mdb_a85[l % 85];
|
|
5247
4811
|
l /= 85;
|
|
5248
4812
|
}
|
|
5249
|
-
*out = '\0';
|
|
5250
4813
|
}
|
|
5251
4814
|
|
|
5252
|
-
/** Init #MDB_env.me_mutexname[] except the char which #MUTEXNAME() will set.
|
|
5253
|
-
* Changes to this code must be reflected in #MDB_LOCK_FORMAT.
|
|
5254
|
-
*/
|
|
5255
4815
|
static void ESECT
|
|
5256
|
-
|
|
4816
|
+
mdb_hash_enc(MDB_val *val, char *encbuf)
|
|
5257
4817
|
{
|
|
5258
|
-
|
|
5259
|
-
strcpy(nm, MUTEXNAME_PREFIX);
|
|
5260
|
-
mdb_pack85(env->me_txns->mti_mutexid, nm + sizeof(MUTEXNAME_PREFIX));
|
|
5261
|
-
}
|
|
5262
|
-
|
|
5263
|
-
/** Return env->me_mutexname after filling in ch ('r'/'w') for convenience */
|
|
5264
|
-
#define MUTEXNAME(env, ch) ( \
|
|
5265
|
-
(void) ((env)->me_mutexname[sizeof(MUTEXNAME_PREFIX)-1] = (ch)), \
|
|
5266
|
-
(env)->me_mutexname)
|
|
4818
|
+
mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT);
|
|
5267
4819
|
|
|
4820
|
+
mdb_pack85(h, encbuf);
|
|
4821
|
+
mdb_pack85(h>>32, encbuf+5);
|
|
4822
|
+
encbuf[10] = '\0';
|
|
4823
|
+
}
|
|
5268
4824
|
#endif
|
|
5269
4825
|
|
|
5270
4826
|
/** Open and/or initialize the lock region for the environment.
|
|
@@ -5281,13 +4837,9 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5281
4837
|
# define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT
|
|
5282
4838
|
#else
|
|
5283
4839
|
# define MDB_ERRCODE_ROFS EROFS
|
|
5284
|
-
#endif
|
|
5285
|
-
#ifdef MDB_USE_SYSV_SEM
|
|
5286
|
-
int semid;
|
|
5287
|
-
union semun semu;
|
|
5288
4840
|
#endif
|
|
5289
4841
|
int rc;
|
|
5290
|
-
|
|
4842
|
+
off_t size, rsize;
|
|
5291
4843
|
|
|
5292
4844
|
rc = mdb_fopen(env, fname, MDB_O_LOCKS, mode, &env->me_lfd);
|
|
5293
4845
|
if (rc) {
|
|
@@ -5362,6 +4914,8 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5362
4914
|
DWORD nhigh;
|
|
5363
4915
|
DWORD nlow;
|
|
5364
4916
|
} idbuf;
|
|
4917
|
+
MDB_val val;
|
|
4918
|
+
char encbuf[11];
|
|
5365
4919
|
|
|
5366
4920
|
if (!mdb_sec_inited) {
|
|
5367
4921
|
InitializeSecurityDescriptor(&mdb_null_sd,
|
|
@@ -5376,11 +4930,14 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5376
4930
|
idbuf.volume = stbuf.dwVolumeSerialNumber;
|
|
5377
4931
|
idbuf.nhigh = stbuf.nFileIndexHigh;
|
|
5378
4932
|
idbuf.nlow = stbuf.nFileIndexLow;
|
|
5379
|
-
|
|
5380
|
-
|
|
5381
|
-
|
|
4933
|
+
val.mv_data = &idbuf;
|
|
4934
|
+
val.mv_size = sizeof(idbuf);
|
|
4935
|
+
mdb_hash_enc(&val, encbuf);
|
|
4936
|
+
sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", encbuf);
|
|
4937
|
+
sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", encbuf);
|
|
4938
|
+
env->me_rmutex = CreateMutexA(&mdb_all_sa, FALSE, env->me_txns->mti_rmname);
|
|
5382
4939
|
if (!env->me_rmutex) goto fail_errno;
|
|
5383
|
-
env->me_wmutex = CreateMutexA(&mdb_all_sa, FALSE,
|
|
4940
|
+
env->me_wmutex = CreateMutexA(&mdb_all_sa, FALSE, env->me_txns->mti_wmname);
|
|
5384
4941
|
if (!env->me_wmutex) goto fail_errno;
|
|
5385
4942
|
#elif defined(MDB_USE_POSIX_SEM)
|
|
5386
4943
|
struct stat stbuf;
|
|
@@ -5388,46 +4945,34 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5388
4945
|
dev_t dev;
|
|
5389
4946
|
ino_t ino;
|
|
5390
4947
|
} idbuf;
|
|
4948
|
+
MDB_val val;
|
|
4949
|
+
char encbuf[11];
|
|
5391
4950
|
|
|
5392
4951
|
#if defined(__NetBSD__)
|
|
5393
4952
|
#define MDB_SHORT_SEMNAMES 1 /* limited to 14 chars */
|
|
5394
4953
|
#endif
|
|
5395
4954
|
if (fstat(env->me_lfd, &stbuf)) goto fail_errno;
|
|
5396
|
-
memset(&idbuf, 0, sizeof(idbuf));
|
|
5397
4955
|
idbuf.dev = stbuf.st_dev;
|
|
5398
4956
|
idbuf.ino = stbuf.st_ino;
|
|
5399
|
-
|
|
4957
|
+
val.mv_data = &idbuf;
|
|
4958
|
+
val.mv_size = sizeof(idbuf);
|
|
4959
|
+
mdb_hash_enc(&val, encbuf);
|
|
5400
4960
|
#ifdef MDB_SHORT_SEMNAMES
|
|
5401
|
-
|
|
5402
|
-
* mdb_env_mname_init() to keep the latter portable.
|
|
5403
|
-
*/
|
|
5404
|
-
% ((mdb_hash_t)85*85*85*85*85*85*85*85*85)
|
|
4961
|
+
encbuf[9] = '\0'; /* drop name from 15 chars to 14 chars */
|
|
5405
4962
|
#endif
|
|
5406
|
-
|
|
5407
|
-
|
|
4963
|
+
sprintf(env->me_txns->mti_rmname, "/MDBr%s", encbuf);
|
|
4964
|
+
sprintf(env->me_txns->mti_wmname, "/MDBw%s", encbuf);
|
|
5408
4965
|
/* Clean up after a previous run, if needed: Try to
|
|
5409
4966
|
* remove both semaphores before doing anything else.
|
|
5410
4967
|
*/
|
|
5411
|
-
sem_unlink(
|
|
5412
|
-
sem_unlink(
|
|
5413
|
-
env->me_rmutex = sem_open(
|
|
4968
|
+
sem_unlink(env->me_txns->mti_rmname);
|
|
4969
|
+
sem_unlink(env->me_txns->mti_wmname);
|
|
4970
|
+
env->me_rmutex = sem_open(env->me_txns->mti_rmname,
|
|
4971
|
+
O_CREAT|O_EXCL, mode, 1);
|
|
5414
4972
|
if (env->me_rmutex == SEM_FAILED) goto fail_errno;
|
|
5415
|
-
env->me_wmutex = sem_open(
|
|
4973
|
+
env->me_wmutex = sem_open(env->me_txns->mti_wmname,
|
|
4974
|
+
O_CREAT|O_EXCL, mode, 1);
|
|
5416
4975
|
if (env->me_wmutex == SEM_FAILED) goto fail_errno;
|
|
5417
|
-
#elif defined(MDB_USE_SYSV_SEM)
|
|
5418
|
-
unsigned short vals[2] = {1, 1};
|
|
5419
|
-
key_t key = ftok(fname->mn_val, 'M'); /* fname is lockfile path now */
|
|
5420
|
-
if (key == -1)
|
|
5421
|
-
goto fail_errno;
|
|
5422
|
-
semid = semget(key, 2, (mode & 0777) | IPC_CREAT);
|
|
5423
|
-
if (semid < 0)
|
|
5424
|
-
goto fail_errno;
|
|
5425
|
-
semu.array = vals;
|
|
5426
|
-
if (semctl(semid, 0, SETALL, semu) < 0)
|
|
5427
|
-
goto fail_errno;
|
|
5428
|
-
env->me_txns->mti_semid = semid;
|
|
5429
|
-
env->me_txns->mti_rlocked = 0;
|
|
5430
|
-
env->me_txns->mti_wlocked = 0;
|
|
5431
4976
|
#else /* MDB_USE_POSIX_MUTEX: */
|
|
5432
4977
|
pthread_mutexattr_t mattr;
|
|
5433
4978
|
|
|
@@ -5438,8 +4983,9 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5438
4983
|
memset(env->me_txns->mti_rmutex, 0, sizeof(*env->me_txns->mti_rmutex));
|
|
5439
4984
|
memset(env->me_txns->mti_wmutex, 0, sizeof(*env->me_txns->mti_wmutex));
|
|
5440
4985
|
|
|
5441
|
-
if ((rc = pthread_mutexattr_init(&mattr))
|
|
4986
|
+
if ((rc = pthread_mutexattr_init(&mattr)))
|
|
5442
4987
|
goto fail;
|
|
4988
|
+
|
|
5443
4989
|
rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
|
|
5444
4990
|
#ifdef MDB_ROBUST_SUPPORTED
|
|
5445
4991
|
if (!rc) rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST);
|
|
@@ -5449,7 +4995,7 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5449
4995
|
pthread_mutexattr_destroy(&mattr);
|
|
5450
4996
|
if (rc)
|
|
5451
4997
|
goto fail;
|
|
5452
|
-
#endif /* _WIN32 ||
|
|
4998
|
+
#endif /* _WIN32 || MDB_USE_POSIX_SEM */
|
|
5453
4999
|
|
|
5454
5000
|
env->me_txns->mti_magic = MDB_MAGIC;
|
|
5455
5001
|
env->me_txns->mti_format = MDB_LOCK_FORMAT;
|
|
@@ -5457,9 +5003,6 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5457
5003
|
env->me_txns->mti_numreaders = 0;
|
|
5458
5004
|
|
|
5459
5005
|
} else {
|
|
5460
|
-
#ifdef MDB_USE_SYSV_SEM
|
|
5461
|
-
struct semid_ds buf;
|
|
5462
|
-
#endif
|
|
5463
5006
|
if (env->me_txns->mti_magic != MDB_MAGIC) {
|
|
5464
5007
|
DPUTS("lock region has invalid magic");
|
|
5465
5008
|
rc = MDB_INVALID;
|
|
@@ -5476,37 +5019,17 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
|
|
|
5476
5019
|
goto fail;
|
|
5477
5020
|
}
|
|
5478
5021
|
#ifdef _WIN32
|
|
5479
|
-
|
|
5480
|
-
env->me_rmutex = OpenMutexA(SYNCHRONIZE, FALSE, MUTEXNAME(env, 'r'));
|
|
5022
|
+
env->me_rmutex = OpenMutexA(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname);
|
|
5481
5023
|
if (!env->me_rmutex) goto fail_errno;
|
|
5482
|
-
env->me_wmutex = OpenMutexA(SYNCHRONIZE, FALSE,
|
|
5024
|
+
env->me_wmutex = OpenMutexA(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname);
|
|
5483
5025
|
if (!env->me_wmutex) goto fail_errno;
|
|
5484
5026
|
#elif defined(MDB_USE_POSIX_SEM)
|
|
5485
|
-
|
|
5486
|
-
env->me_rmutex = sem_open(MUTEXNAME(env, 'r'), 0);
|
|
5027
|
+
env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0);
|
|
5487
5028
|
if (env->me_rmutex == SEM_FAILED) goto fail_errno;
|
|
5488
|
-
env->me_wmutex = sem_open(
|
|
5029
|
+
env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0);
|
|
5489
5030
|
if (env->me_wmutex == SEM_FAILED) goto fail_errno;
|
|
5490
|
-
#elif defined(MDB_USE_SYSV_SEM)
|
|
5491
|
-
semid = env->me_txns->mti_semid;
|
|
5492
|
-
semu.buf = &buf;
|
|
5493
|
-
/* check for read access */
|
|
5494
|
-
if (semctl(semid, 0, IPC_STAT, semu) < 0)
|
|
5495
|
-
goto fail_errno;
|
|
5496
|
-
/* check for write access */
|
|
5497
|
-
if (semctl(semid, 0, IPC_SET, semu) < 0)
|
|
5498
|
-
goto fail_errno;
|
|
5499
5031
|
#endif
|
|
5500
5032
|
}
|
|
5501
|
-
#ifdef MDB_USE_SYSV_SEM
|
|
5502
|
-
env->me_rmutex->semid = semid;
|
|
5503
|
-
env->me_wmutex->semid = semid;
|
|
5504
|
-
env->me_rmutex->semnum = 0;
|
|
5505
|
-
env->me_wmutex->semnum = 1;
|
|
5506
|
-
env->me_rmutex->locked = &env->me_txns->mti_rlocked;
|
|
5507
|
-
env->me_wmutex->locked = &env->me_txns->mti_wlocked;
|
|
5508
|
-
#endif
|
|
5509
|
-
|
|
5510
5033
|
return MDB_SUCCESS;
|
|
5511
5034
|
|
|
5512
5035
|
fail_errno:
|
|
@@ -5521,7 +5044,7 @@ fail:
|
|
|
5521
5044
|
*/
|
|
5522
5045
|
#define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
|
|
5523
5046
|
#define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \
|
|
5524
|
-
MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD
|
|
5047
|
+
MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
|
|
5525
5048
|
|
|
5526
5049
|
#if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
|
|
5527
5050
|
# error "Persistent DB flags & env flags overlap, but both go in mm_flags"
|
|
@@ -5536,37 +5059,12 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5536
5059
|
if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
|
|
5537
5060
|
return EINVAL;
|
|
5538
5061
|
|
|
5539
|
-
#ifdef MDB_VL32
|
|
5540
|
-
if (flags & MDB_WRITEMAP) {
|
|
5541
|
-
/* silently ignore WRITEMAP in 32 bit mode */
|
|
5542
|
-
flags ^= MDB_WRITEMAP;
|
|
5543
|
-
}
|
|
5544
|
-
if (flags & MDB_FIXEDMAP) {
|
|
5545
|
-
/* cannot support FIXEDMAP */
|
|
5546
|
-
return EINVAL;
|
|
5547
|
-
}
|
|
5548
|
-
#endif
|
|
5549
5062
|
flags |= env->me_flags;
|
|
5550
5063
|
|
|
5551
5064
|
rc = mdb_fname_init(path, flags, &fname);
|
|
5552
5065
|
if (rc)
|
|
5553
5066
|
return rc;
|
|
5554
5067
|
|
|
5555
|
-
#ifdef MDB_VL32
|
|
5556
|
-
#ifdef _WIN32
|
|
5557
|
-
env->me_rpmutex = CreateMutex(NULL, FALSE, NULL);
|
|
5558
|
-
if (!env->me_rpmutex) {
|
|
5559
|
-
rc = ErrCode();
|
|
5560
|
-
goto leave;
|
|
5561
|
-
}
|
|
5562
|
-
#else
|
|
5563
|
-
rc = pthread_mutex_init(&env->me_rpmutex, NULL);
|
|
5564
|
-
if (rc)
|
|
5565
|
-
goto leave;
|
|
5566
|
-
#endif
|
|
5567
|
-
#endif
|
|
5568
|
-
flags |= MDB_ENV_ACTIVE; /* tell mdb_env_close0() to clean up */
|
|
5569
|
-
|
|
5570
5068
|
if (flags & MDB_RDONLY) {
|
|
5571
5069
|
/* silently ignore WRITEMAP when we're only getting read access */
|
|
5572
5070
|
flags &= ~MDB_WRITEMAP;
|
|
@@ -5575,23 +5073,10 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5575
5073
|
(env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)))))
|
|
5576
5074
|
rc = ENOMEM;
|
|
5577
5075
|
}
|
|
5578
|
-
|
|
5579
|
-
env->me_flags = flags;
|
|
5076
|
+
env->me_flags = flags |= MDB_ENV_ACTIVE;
|
|
5580
5077
|
if (rc)
|
|
5581
5078
|
goto leave;
|
|
5582
5079
|
|
|
5583
|
-
#ifdef MDB_VL32
|
|
5584
|
-
{
|
|
5585
|
-
env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3));
|
|
5586
|
-
if (!env->me_rpages) {
|
|
5587
|
-
rc = ENOMEM;
|
|
5588
|
-
goto leave;
|
|
5589
|
-
}
|
|
5590
|
-
env->me_rpages[0].mid = 0;
|
|
5591
|
-
env->me_rpcheck = MDB_ERPAGE_SIZE/2;
|
|
5592
|
-
}
|
|
5593
|
-
#endif
|
|
5594
|
-
|
|
5595
5080
|
env->me_path = strdup(path);
|
|
5596
5081
|
env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
|
|
5597
5082
|
env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t));
|
|
@@ -5607,10 +5092,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5607
5092
|
rc = mdb_env_setup_locks(env, &fname, mode, &excl);
|
|
5608
5093
|
if (rc)
|
|
5609
5094
|
goto leave;
|
|
5610
|
-
if ((flags & MDB_PREVSNAPSHOT) && !excl) {
|
|
5611
|
-
rc = EAGAIN;
|
|
5612
|
-
goto leave;
|
|
5613
|
-
}
|
|
5614
5095
|
}
|
|
5615
5096
|
|
|
5616
5097
|
rc = mdb_fopen(env, &fname,
|
|
@@ -5618,11 +5099,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5618
5099
|
mode, &env->me_fd);
|
|
5619
5100
|
if (rc)
|
|
5620
5101
|
goto leave;
|
|
5621
|
-
#ifdef _WIN32
|
|
5622
|
-
rc = mdb_fopen(env, &fname, MDB_O_OVERLAPPED, mode, &env->me_ovfd);
|
|
5623
|
-
if (rc)
|
|
5624
|
-
goto leave;
|
|
5625
|
-
#endif
|
|
5626
5102
|
|
|
5627
5103
|
if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) {
|
|
5628
5104
|
rc = mdb_env_setup_locks(env, &fname, mode, &excl);
|
|
@@ -5630,16 +5106,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5630
5106
|
goto leave;
|
|
5631
5107
|
}
|
|
5632
5108
|
|
|
5633
|
-
if ((rc = mdb_env_open2(env
|
|
5634
|
-
|
|
5635
|
-
|
|
5636
|
-
|
|
5637
|
-
|
|
5638
|
-
|
|
5639
|
-
|
|
5640
|
-
|
|
5109
|
+
if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) {
|
|
5110
|
+
if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) {
|
|
5111
|
+
/* Synchronous fd for meta writes. Needed even with
|
|
5112
|
+
* MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset.
|
|
5113
|
+
*/
|
|
5114
|
+
rc = mdb_fopen(env, &fname, MDB_O_META, mode, &env->me_mfd);
|
|
5115
|
+
if (rc)
|
|
5116
|
+
goto leave;
|
|
5117
|
+
}
|
|
5641
5118
|
DPRINTF(("opened dbenv %p", (void *) env));
|
|
5642
|
-
if (excl > 0
|
|
5119
|
+
if (excl > 0) {
|
|
5643
5120
|
rc = mdb_env_share_locks(env, &excl);
|
|
5644
5121
|
if (rc)
|
|
5645
5122
|
goto leave;
|
|
@@ -5656,16 +5133,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5656
5133
|
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
|
5657
5134
|
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
|
5658
5135
|
txn->mt_env = env;
|
|
5659
|
-
#ifdef MDB_VL32
|
|
5660
|
-
txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
|
|
5661
|
-
if (!txn->mt_rpages) {
|
|
5662
|
-
free(txn);
|
|
5663
|
-
rc = ENOMEM;
|
|
5664
|
-
goto leave;
|
|
5665
|
-
}
|
|
5666
|
-
txn->mt_rpages[0].mid = 0;
|
|
5667
|
-
txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
|
|
5668
|
-
#endif
|
|
5669
5136
|
txn->mt_dbxs = env->me_dbxs;
|
|
5670
5137
|
txn->mt_flags = MDB_TXN_FINISHED;
|
|
5671
5138
|
env->me_txn0 = txn;
|
|
@@ -5676,6 +5143,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
5676
5143
|
}
|
|
5677
5144
|
|
|
5678
5145
|
leave:
|
|
5146
|
+
MDB_TRACE(("%p, %s, %u, %04o", env, path, flags & (CHANGEABLE|CHANGELESS), mode));
|
|
5679
5147
|
if (rc) {
|
|
5680
5148
|
mdb_env_close0(env, excl);
|
|
5681
5149
|
}
|
|
@@ -5704,17 +5172,6 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
|
5704
5172
|
free(env->me_dbflags);
|
|
5705
5173
|
free(env->me_path);
|
|
5706
5174
|
free(env->me_dirty_list);
|
|
5707
|
-
#ifdef MDB_VL32
|
|
5708
|
-
if (env->me_txn0 && env->me_txn0->mt_rpages)
|
|
5709
|
-
free(env->me_txn0->mt_rpages);
|
|
5710
|
-
if (env->me_rpages) {
|
|
5711
|
-
MDB_ID3L el = env->me_rpages;
|
|
5712
|
-
unsigned int x;
|
|
5713
|
-
for (x=1; x<=el[0].mid; x++)
|
|
5714
|
-
munmap(el[x].mptr, el[x].mcnt * env->me_psize);
|
|
5715
|
-
free(el);
|
|
5716
|
-
}
|
|
5717
|
-
#endif
|
|
5718
5175
|
free(env->me_txn0);
|
|
5719
5176
|
mdb_midl_free(env->me_free_pgs);
|
|
5720
5177
|
|
|
@@ -5732,24 +5189,10 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
|
5732
5189
|
}
|
|
5733
5190
|
|
|
5734
5191
|
if (env->me_map) {
|
|
5735
|
-
#ifdef MDB_VL32
|
|
5736
|
-
munmap(env->me_map, NUM_METAS*env->me_psize);
|
|
5737
|
-
#else
|
|
5738
5192
|
munmap(env->me_map, env->me_mapsize);
|
|
5739
|
-
#endif
|
|
5740
5193
|
}
|
|
5741
5194
|
if (env->me_mfd != INVALID_HANDLE_VALUE)
|
|
5742
5195
|
(void) close(env->me_mfd);
|
|
5743
|
-
#ifdef _WIN32
|
|
5744
|
-
if (env->ovs > 0) {
|
|
5745
|
-
for (i = 0; i < env->ovs; i++) {
|
|
5746
|
-
CloseHandle(env->ov[i].hEvent);
|
|
5747
|
-
}
|
|
5748
|
-
free(env->ov);
|
|
5749
|
-
}
|
|
5750
|
-
if (env->me_ovfd != INVALID_HANDLE_VALUE)
|
|
5751
|
-
(void) close(env->me_ovfd);
|
|
5752
|
-
#endif
|
|
5753
5196
|
if (env->me_fd != INVALID_HANDLE_VALUE)
|
|
5754
5197
|
(void) close(env->me_fd);
|
|
5755
5198
|
if (env->me_txns) {
|
|
@@ -5783,31 +5226,10 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
|
5783
5226
|
if (excl == 0)
|
|
5784
5227
|
mdb_env_excl_lock(env, &excl);
|
|
5785
5228
|
if (excl > 0) {
|
|
5786
|
-
sem_unlink(
|
|
5787
|
-
sem_unlink(
|
|
5229
|
+
sem_unlink(env->me_txns->mti_rmname);
|
|
5230
|
+
sem_unlink(env->me_txns->mti_wmname);
|
|
5788
5231
|
}
|
|
5789
5232
|
}
|
|
5790
|
-
#elif defined(MDB_USE_SYSV_SEM)
|
|
5791
|
-
if (env->me_rmutex->semid != -1) {
|
|
5792
|
-
/* If we have the filelock: If we are the
|
|
5793
|
-
* only remaining user, clean up semaphores.
|
|
5794
|
-
*/
|
|
5795
|
-
if (excl == 0)
|
|
5796
|
-
mdb_env_excl_lock(env, &excl);
|
|
5797
|
-
if (excl > 0)
|
|
5798
|
-
semctl(env->me_rmutex->semid, 0, IPC_RMID);
|
|
5799
|
-
}
|
|
5800
|
-
#elif defined(MDB_ROBUST_SUPPORTED)
|
|
5801
|
-
/* If we have the filelock: If we are the
|
|
5802
|
-
* only remaining user, clean up robust
|
|
5803
|
-
* mutexes.
|
|
5804
|
-
*/
|
|
5805
|
-
if (excl == 0)
|
|
5806
|
-
mdb_env_excl_lock(env, &excl);
|
|
5807
|
-
if (excl > 0) {
|
|
5808
|
-
pthread_mutex_destroy(env->me_txns->mti_rmutex);
|
|
5809
|
-
pthread_mutex_destroy(env->me_txns->mti_wmutex);
|
|
5810
|
-
}
|
|
5811
5233
|
#endif
|
|
5812
5234
|
munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
|
|
5813
5235
|
}
|
|
@@ -5822,14 +5244,6 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
|
5822
5244
|
#endif
|
|
5823
5245
|
(void) close(env->me_lfd);
|
|
5824
5246
|
}
|
|
5825
|
-
#ifdef MDB_VL32
|
|
5826
|
-
#ifdef _WIN32
|
|
5827
|
-
if (env->me_fmh) CloseHandle(env->me_fmh);
|
|
5828
|
-
if (env->me_rpmutex) CloseHandle(env->me_rpmutex);
|
|
5829
|
-
#else
|
|
5830
|
-
pthread_mutex_destroy(&env->me_rpmutex);
|
|
5831
|
-
#endif
|
|
5832
|
-
#endif
|
|
5833
5247
|
|
|
5834
5248
|
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
|
|
5835
5249
|
}
|
|
@@ -5842,6 +5256,7 @@ mdb_env_close(MDB_env *env)
|
|
|
5842
5256
|
if (env == NULL)
|
|
5843
5257
|
return;
|
|
5844
5258
|
|
|
5259
|
+
MDB_TRACE(("%p", env));
|
|
5845
5260
|
VGMEMP_DESTROY(env);
|
|
5846
5261
|
while ((dp = env->me_dpages) != NULL) {
|
|
5847
5262
|
VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
|
|
@@ -5853,18 +5268,18 @@ mdb_env_close(MDB_env *env)
|
|
|
5853
5268
|
free(env);
|
|
5854
5269
|
}
|
|
5855
5270
|
|
|
5856
|
-
/** Compare two items pointing at aligned
|
|
5271
|
+
/** Compare two items pointing at aligned size_t's */
|
|
5857
5272
|
static int
|
|
5858
5273
|
mdb_cmp_long(const MDB_val *a, const MDB_val *b)
|
|
5859
5274
|
{
|
|
5860
|
-
return (*(
|
|
5861
|
-
*(
|
|
5275
|
+
return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
|
|
5276
|
+
*(size_t *)a->mv_data > *(size_t *)b->mv_data;
|
|
5862
5277
|
}
|
|
5863
5278
|
|
|
5864
5279
|
/** Compare two items pointing at aligned unsigned int's.
|
|
5865
5280
|
*
|
|
5866
5281
|
* This is also set as #MDB_INTEGERDUP|#MDB_DUPFIXED's #MDB_dbx.%md_dcmp,
|
|
5867
|
-
* but #mdb_cmp_clong() is called instead if the data type is
|
|
5282
|
+
* but #mdb_cmp_clong() is called instead if the data type is size_t.
|
|
5868
5283
|
*/
|
|
5869
5284
|
static int
|
|
5870
5285
|
mdb_cmp_int(const MDB_val *a, const MDB_val *b)
|
|
@@ -5969,7 +5384,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
|
|
|
5969
5384
|
|
|
5970
5385
|
nkeys = NUMKEYS(mp);
|
|
5971
5386
|
|
|
5972
|
-
DPRINTF(("searching %u keys in %s %spage %"
|
|
5387
|
+
DPRINTF(("searching %u keys in %s %spage %"Z"u",
|
|
5973
5388
|
nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "",
|
|
5974
5389
|
mdb_dbg_pgno(mp)));
|
|
5975
5390
|
|
|
@@ -5981,7 +5396,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
|
|
|
5981
5396
|
* alignment is guaranteed. Use faster mdb_cmp_int.
|
|
5982
5397
|
*/
|
|
5983
5398
|
if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) {
|
|
5984
|
-
if (NODEPTR(mp, 1)->mn_ksize == sizeof(
|
|
5399
|
+
if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t))
|
|
5985
5400
|
cmp = mdb_cmp_long;
|
|
5986
5401
|
else
|
|
5987
5402
|
cmp = mdb_cmp_int;
|
|
@@ -6017,7 +5432,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
|
|
|
6017
5432
|
DPRINTF(("found leaf index %u [%s], rc = %i",
|
|
6018
5433
|
i, DKEY(&nodekey), rc));
|
|
6019
5434
|
else
|
|
6020
|
-
DPRINTF(("found branch index %u [%s -> %"
|
|
5435
|
+
DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i",
|
|
6021
5436
|
i, DKEY(&nodekey), NODEPGNO(node), rc));
|
|
6022
5437
|
#endif
|
|
6023
5438
|
if (rc == 0)
|
|
@@ -6065,7 +5480,7 @@ static void
|
|
|
6065
5480
|
mdb_cursor_pop(MDB_cursor *mc)
|
|
6066
5481
|
{
|
|
6067
5482
|
if (mc->mc_snum) {
|
|
6068
|
-
DPRINTF(("popping page %"
|
|
5483
|
+
DPRINTF(("popping page %"Z"u off db %d cursor %p",
|
|
6069
5484
|
mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *) mc));
|
|
6070
5485
|
|
|
6071
5486
|
mc->mc_snum--;
|
|
@@ -6083,7 +5498,7 @@ mdb_cursor_pop(MDB_cursor *mc)
|
|
|
6083
5498
|
static int
|
|
6084
5499
|
mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
|
|
6085
5500
|
{
|
|
6086
|
-
DPRINTF(("pushing page %"
|
|
5501
|
+
DPRINTF(("pushing page %"Z"u on db %d cursor %p", mp->mp_pgno,
|
|
6087
5502
|
DDBI(mc), (void *) mc));
|
|
6088
5503
|
|
|
6089
5504
|
if (mc->mc_snum >= CURSOR_STACK) {
|
|
@@ -6098,294 +5513,6 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
|
|
|
6098
5513
|
return MDB_SUCCESS;
|
|
6099
5514
|
}
|
|
6100
5515
|
|
|
6101
|
-
#ifdef MDB_VL32
|
|
6102
|
-
/** Map a read-only page.
|
|
6103
|
-
* There are two levels of tracking in use, a per-txn list and a per-env list.
|
|
6104
|
-
* ref'ing and unref'ing the per-txn list is faster since it requires no
|
|
6105
|
-
* locking. Pages are cached in the per-env list for global reuse, and a lock
|
|
6106
|
-
* is required. Pages are not immediately unmapped when their refcnt goes to
|
|
6107
|
-
* zero; they hang around in case they will be reused again soon.
|
|
6108
|
-
*
|
|
6109
|
-
* When the per-txn list gets full, all pages with refcnt=0 are purged from the
|
|
6110
|
-
* list and their refcnts in the per-env list are decremented.
|
|
6111
|
-
*
|
|
6112
|
-
* When the per-env list gets full, all pages with refcnt=0 are purged from the
|
|
6113
|
-
* list and their pages are unmapped.
|
|
6114
|
-
*
|
|
6115
|
-
* @note "full" means the list has reached its respective rpcheck threshold.
|
|
6116
|
-
* This threshold slowly raises if no pages could be purged on a given check,
|
|
6117
|
-
* and returns to its original value when enough pages were purged.
|
|
6118
|
-
*
|
|
6119
|
-
* If purging doesn't free any slots, filling the per-txn list will return
|
|
6120
|
-
* MDB_TXN_FULL, and filling the per-env list returns MDB_MAP_FULL.
|
|
6121
|
-
*
|
|
6122
|
-
* Reference tracking in a txn is imperfect, pages can linger with non-zero
|
|
6123
|
-
* refcnt even without active references. It was deemed to be too invasive
|
|
6124
|
-
* to add unrefs in every required location. However, all pages are unref'd
|
|
6125
|
-
* at the end of the transaction. This guarantees that no stale references
|
|
6126
|
-
* linger in the per-env list.
|
|
6127
|
-
*
|
|
6128
|
-
* Usually we map chunks of 16 pages at a time, but if an overflow page begins
|
|
6129
|
-
* at the tail of the chunk we extend the chunk to include the entire overflow
|
|
6130
|
-
* page. Unfortunately, pages can be turned into overflow pages after their
|
|
6131
|
-
* chunk was already mapped. In that case we must remap the chunk if the
|
|
6132
|
-
* overflow page is referenced. If the chunk's refcnt is 0 we can just remap
|
|
6133
|
-
* it, otherwise we temporarily map a new chunk just for the overflow page.
|
|
6134
|
-
*
|
|
6135
|
-
* @note this chunk handling means we cannot guarantee that a data item
|
|
6136
|
-
* returned from the DB will stay alive for the duration of the transaction:
|
|
6137
|
-
* We unref pages as soon as a cursor moves away from the page
|
|
6138
|
-
* A subsequent op may cause a purge, which may unmap any unref'd chunks
|
|
6139
|
-
* The caller must copy the data if it must be used later in the same txn.
|
|
6140
|
-
*
|
|
6141
|
-
* Also - our reference counting revolves around cursors, but overflow pages
|
|
6142
|
-
* aren't pointed to by a cursor's page stack. We have to remember them
|
|
6143
|
-
* explicitly, in the added mc_ovpg field. A single cursor can only hold a
|
|
6144
|
-
* reference to one overflow page at a time.
|
|
6145
|
-
*
|
|
6146
|
-
* @param[in] txn the transaction for this access.
|
|
6147
|
-
* @param[in] pgno the page number for the page to retrieve.
|
|
6148
|
-
* @param[out] ret address of a pointer where the page's address will be stored.
|
|
6149
|
-
* @return 0 on success, non-zero on failure.
|
|
6150
|
-
*/
|
|
6151
|
-
static int
|
|
6152
|
-
mdb_rpage_get(MDB_txn *txn, pgno_t pg0, MDB_page **ret)
|
|
6153
|
-
{
|
|
6154
|
-
MDB_env *env = txn->mt_env;
|
|
6155
|
-
MDB_page *p;
|
|
6156
|
-
MDB_ID3L tl = txn->mt_rpages;
|
|
6157
|
-
MDB_ID3L el = env->me_rpages;
|
|
6158
|
-
MDB_ID3 id3;
|
|
6159
|
-
unsigned x, rem;
|
|
6160
|
-
pgno_t pgno;
|
|
6161
|
-
int rc, retries = 1;
|
|
6162
|
-
#ifdef _WIN32
|
|
6163
|
-
LARGE_INTEGER off;
|
|
6164
|
-
SIZE_T len;
|
|
6165
|
-
#define SET_OFF(off,val) off.QuadPart = val
|
|
6166
|
-
#define MAP(rc,env,addr,len,off) \
|
|
6167
|
-
addr = NULL; \
|
|
6168
|
-
rc = NtMapViewOfSection(env->me_fmh, GetCurrentProcess(), &addr, 0, \
|
|
6169
|
-
len, &off, &len, ViewUnmap, (env->me_flags & MDB_RDONLY) ? 0 : MEM_RESERVE, PAGE_READONLY); \
|
|
6170
|
-
if (rc) rc = mdb_nt2win32(rc)
|
|
6171
|
-
#else
|
|
6172
|
-
off_t off;
|
|
6173
|
-
size_t len;
|
|
6174
|
-
#define SET_OFF(off,val) off = val
|
|
6175
|
-
#define MAP(rc,env,addr,len,off) \
|
|
6176
|
-
addr = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); \
|
|
6177
|
-
rc = (addr == MAP_FAILED) ? errno : 0
|
|
6178
|
-
#endif
|
|
6179
|
-
|
|
6180
|
-
/* remember the offset of the actual page number, so we can
|
|
6181
|
-
* return the correct pointer at the end.
|
|
6182
|
-
*/
|
|
6183
|
-
rem = pg0 & (MDB_RPAGE_CHUNK-1);
|
|
6184
|
-
pgno = pg0 ^ rem;
|
|
6185
|
-
|
|
6186
|
-
id3.mid = 0;
|
|
6187
|
-
x = mdb_mid3l_search(tl, pgno);
|
|
6188
|
-
if (x <= tl[0].mid && tl[x].mid == pgno) {
|
|
6189
|
-
if (x != tl[0].mid && tl[x+1].mid == pg0)
|
|
6190
|
-
x++;
|
|
6191
|
-
/* check for overflow size */
|
|
6192
|
-
p = (MDB_page *)((char *)tl[x].mptr + rem * env->me_psize);
|
|
6193
|
-
if (IS_OVERFLOW(p) && p->mp_pages + rem > tl[x].mcnt) {
|
|
6194
|
-
id3.mcnt = p->mp_pages + rem;
|
|
6195
|
-
len = id3.mcnt * env->me_psize;
|
|
6196
|
-
SET_OFF(off, pgno * env->me_psize);
|
|
6197
|
-
MAP(rc, env, id3.mptr, len, off);
|
|
6198
|
-
if (rc)
|
|
6199
|
-
return rc;
|
|
6200
|
-
/* check for local-only page */
|
|
6201
|
-
if (rem) {
|
|
6202
|
-
mdb_tassert(txn, tl[x].mid != pg0);
|
|
6203
|
-
/* hope there's room to insert this locally.
|
|
6204
|
-
* setting mid here tells later code to just insert
|
|
6205
|
-
* this id3 instead of searching for a match.
|
|
6206
|
-
*/
|
|
6207
|
-
id3.mid = pg0;
|
|
6208
|
-
goto notlocal;
|
|
6209
|
-
} else {
|
|
6210
|
-
/* ignore the mapping we got from env, use new one */
|
|
6211
|
-
tl[x].mptr = id3.mptr;
|
|
6212
|
-
tl[x].mcnt = id3.mcnt;
|
|
6213
|
-
/* if no active ref, see if we can replace in env */
|
|
6214
|
-
if (!tl[x].mref) {
|
|
6215
|
-
unsigned i;
|
|
6216
|
-
pthread_mutex_lock(&env->me_rpmutex);
|
|
6217
|
-
i = mdb_mid3l_search(el, tl[x].mid);
|
|
6218
|
-
if (el[i].mref == 1) {
|
|
6219
|
-
/* just us, replace it */
|
|
6220
|
-
munmap(el[i].mptr, el[i].mcnt * env->me_psize);
|
|
6221
|
-
el[i].mptr = tl[x].mptr;
|
|
6222
|
-
el[i].mcnt = tl[x].mcnt;
|
|
6223
|
-
} else {
|
|
6224
|
-
/* there are others, remove ourself */
|
|
6225
|
-
el[i].mref--;
|
|
6226
|
-
}
|
|
6227
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6228
|
-
}
|
|
6229
|
-
}
|
|
6230
|
-
}
|
|
6231
|
-
id3.mptr = tl[x].mptr;
|
|
6232
|
-
id3.mcnt = tl[x].mcnt;
|
|
6233
|
-
tl[x].mref++;
|
|
6234
|
-
goto ok;
|
|
6235
|
-
}
|
|
6236
|
-
|
|
6237
|
-
notlocal:
|
|
6238
|
-
if (tl[0].mid >= MDB_TRPAGE_MAX - txn->mt_rpcheck) {
|
|
6239
|
-
unsigned i, y;
|
|
6240
|
-
/* purge unref'd pages from our list and unref in env */
|
|
6241
|
-
pthread_mutex_lock(&env->me_rpmutex);
|
|
6242
|
-
retry:
|
|
6243
|
-
y = 0;
|
|
6244
|
-
for (i=1; i<=tl[0].mid; i++) {
|
|
6245
|
-
if (!tl[i].mref) {
|
|
6246
|
-
if (!y) y = i;
|
|
6247
|
-
/* tmp overflow pages don't go to env */
|
|
6248
|
-
if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
|
|
6249
|
-
munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
|
|
6250
|
-
continue;
|
|
6251
|
-
}
|
|
6252
|
-
x = mdb_mid3l_search(el, tl[i].mid);
|
|
6253
|
-
el[x].mref--;
|
|
6254
|
-
}
|
|
6255
|
-
}
|
|
6256
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6257
|
-
if (!y) {
|
|
6258
|
-
/* we didn't find any unref'd chunks.
|
|
6259
|
-
* if we're out of room, fail.
|
|
6260
|
-
*/
|
|
6261
|
-
if (tl[0].mid >= MDB_TRPAGE_MAX)
|
|
6262
|
-
return MDB_TXN_FULL;
|
|
6263
|
-
/* otherwise, raise threshold for next time around
|
|
6264
|
-
* and let this go.
|
|
6265
|
-
*/
|
|
6266
|
-
txn->mt_rpcheck /= 2;
|
|
6267
|
-
} else {
|
|
6268
|
-
/* we found some unused; consolidate the list */
|
|
6269
|
-
for (i=y+1; i<= tl[0].mid; i++)
|
|
6270
|
-
if (tl[i].mref)
|
|
6271
|
-
tl[y++] = tl[i];
|
|
6272
|
-
tl[0].mid = y-1;
|
|
6273
|
-
/* decrease the check threshold toward its original value */
|
|
6274
|
-
if (!txn->mt_rpcheck)
|
|
6275
|
-
txn->mt_rpcheck = 1;
|
|
6276
|
-
while (txn->mt_rpcheck < tl[0].mid && txn->mt_rpcheck < MDB_TRPAGE_SIZE/2)
|
|
6277
|
-
txn->mt_rpcheck *= 2;
|
|
6278
|
-
}
|
|
6279
|
-
}
|
|
6280
|
-
if (tl[0].mid < MDB_TRPAGE_SIZE) {
|
|
6281
|
-
id3.mref = 1;
|
|
6282
|
-
if (id3.mid)
|
|
6283
|
-
goto found;
|
|
6284
|
-
/* don't map past last written page in read-only envs */
|
|
6285
|
-
if ((env->me_flags & MDB_RDONLY) && pgno + MDB_RPAGE_CHUNK-1 > txn->mt_last_pgno)
|
|
6286
|
-
id3.mcnt = txn->mt_last_pgno + 1 - pgno;
|
|
6287
|
-
else
|
|
6288
|
-
id3.mcnt = MDB_RPAGE_CHUNK;
|
|
6289
|
-
len = id3.mcnt * env->me_psize;
|
|
6290
|
-
id3.mid = pgno;
|
|
6291
|
-
|
|
6292
|
-
/* search for page in env */
|
|
6293
|
-
pthread_mutex_lock(&env->me_rpmutex);
|
|
6294
|
-
x = mdb_mid3l_search(el, pgno);
|
|
6295
|
-
if (x <= el[0].mid && el[x].mid == pgno) {
|
|
6296
|
-
id3.mptr = el[x].mptr;
|
|
6297
|
-
id3.mcnt = el[x].mcnt;
|
|
6298
|
-
/* check for overflow size */
|
|
6299
|
-
p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
|
|
6300
|
-
if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) {
|
|
6301
|
-
id3.mcnt = p->mp_pages + rem;
|
|
6302
|
-
len = id3.mcnt * env->me_psize;
|
|
6303
|
-
SET_OFF(off, pgno * env->me_psize);
|
|
6304
|
-
MAP(rc, env, id3.mptr, len, off);
|
|
6305
|
-
if (rc)
|
|
6306
|
-
goto fail;
|
|
6307
|
-
if (!el[x].mref) {
|
|
6308
|
-
munmap(el[x].mptr, env->me_psize * el[x].mcnt);
|
|
6309
|
-
el[x].mptr = id3.mptr;
|
|
6310
|
-
el[x].mcnt = id3.mcnt;
|
|
6311
|
-
} else {
|
|
6312
|
-
id3.mid = pg0;
|
|
6313
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6314
|
-
goto found;
|
|
6315
|
-
}
|
|
6316
|
-
}
|
|
6317
|
-
el[x].mref++;
|
|
6318
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6319
|
-
goto found;
|
|
6320
|
-
}
|
|
6321
|
-
if (el[0].mid >= MDB_ERPAGE_MAX - env->me_rpcheck) {
|
|
6322
|
-
/* purge unref'd pages */
|
|
6323
|
-
unsigned i, y = 0;
|
|
6324
|
-
for (i=1; i<=el[0].mid; i++) {
|
|
6325
|
-
if (!el[i].mref) {
|
|
6326
|
-
if (!y) y = i;
|
|
6327
|
-
munmap(el[i].mptr, env->me_psize * el[i].mcnt);
|
|
6328
|
-
}
|
|
6329
|
-
}
|
|
6330
|
-
if (!y) {
|
|
6331
|
-
if (retries) {
|
|
6332
|
-
/* see if we can unref some local pages */
|
|
6333
|
-
retries--;
|
|
6334
|
-
id3.mid = 0;
|
|
6335
|
-
goto retry;
|
|
6336
|
-
}
|
|
6337
|
-
if (el[0].mid >= MDB_ERPAGE_MAX) {
|
|
6338
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6339
|
-
return MDB_MAP_FULL;
|
|
6340
|
-
}
|
|
6341
|
-
env->me_rpcheck /= 2;
|
|
6342
|
-
} else {
|
|
6343
|
-
for (i=y+1; i<= el[0].mid; i++)
|
|
6344
|
-
if (el[i].mref)
|
|
6345
|
-
el[y++] = el[i];
|
|
6346
|
-
el[0].mid = y-1;
|
|
6347
|
-
if (!env->me_rpcheck)
|
|
6348
|
-
env->me_rpcheck = 1;
|
|
6349
|
-
while (env->me_rpcheck < el[0].mid && env->me_rpcheck < MDB_ERPAGE_SIZE/2)
|
|
6350
|
-
env->me_rpcheck *= 2;
|
|
6351
|
-
}
|
|
6352
|
-
}
|
|
6353
|
-
SET_OFF(off, pgno * env->me_psize);
|
|
6354
|
-
MAP(rc, env, id3.mptr, len, off);
|
|
6355
|
-
if (rc) {
|
|
6356
|
-
fail:
|
|
6357
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6358
|
-
return rc;
|
|
6359
|
-
}
|
|
6360
|
-
/* check for overflow size */
|
|
6361
|
-
p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
|
|
6362
|
-
if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) {
|
|
6363
|
-
id3.mcnt = p->mp_pages + rem;
|
|
6364
|
-
munmap(id3.mptr, len);
|
|
6365
|
-
len = id3.mcnt * env->me_psize;
|
|
6366
|
-
MAP(rc, env, id3.mptr, len, off);
|
|
6367
|
-
if (rc)
|
|
6368
|
-
goto fail;
|
|
6369
|
-
}
|
|
6370
|
-
mdb_mid3l_insert(el, &id3);
|
|
6371
|
-
pthread_mutex_unlock(&env->me_rpmutex);
|
|
6372
|
-
found:
|
|
6373
|
-
mdb_mid3l_insert(tl, &id3);
|
|
6374
|
-
} else {
|
|
6375
|
-
return MDB_TXN_FULL;
|
|
6376
|
-
}
|
|
6377
|
-
ok:
|
|
6378
|
-
p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
|
|
6379
|
-
#if MDB_DEBUG /* we don't need this check any more */
|
|
6380
|
-
if (IS_OVERFLOW(p)) {
|
|
6381
|
-
mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt);
|
|
6382
|
-
}
|
|
6383
|
-
#endif
|
|
6384
|
-
*ret = p;
|
|
6385
|
-
return MDB_SUCCESS;
|
|
6386
|
-
}
|
|
6387
|
-
#endif
|
|
6388
|
-
|
|
6389
5516
|
/** Find the address of the page corresponding to a given page number.
|
|
6390
5517
|
* Set #MDB_TXN_ERROR on failure.
|
|
6391
5518
|
* @param[in] mc the cursor accessing the page.
|
|
@@ -6398,10 +5525,11 @@ static int
|
|
|
6398
5525
|
mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
|
|
6399
5526
|
{
|
|
6400
5527
|
MDB_txn *txn = mc->mc_txn;
|
|
5528
|
+
MDB_env *env = txn->mt_env;
|
|
6401
5529
|
MDB_page *p = NULL;
|
|
6402
5530
|
int level;
|
|
6403
5531
|
|
|
6404
|
-
if (! (
|
|
5532
|
+
if (! (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_WRITEMAP))) {
|
|
6405
5533
|
MDB_txn *tx2 = txn;
|
|
6406
5534
|
level = 1;
|
|
6407
5535
|
do {
|
|
@@ -6416,7 +5544,8 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
|
|
|
6416
5544
|
MDB_ID pn = pgno << 1;
|
|
6417
5545
|
x = mdb_midl_search(tx2->mt_spill_pgs, pn);
|
|
6418
5546
|
if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
|
|
6419
|
-
|
|
5547
|
+
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
|
|
5548
|
+
goto done;
|
|
6420
5549
|
}
|
|
6421
5550
|
}
|
|
6422
5551
|
if (dl[0].mid) {
|
|
@@ -6430,28 +5559,15 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
|
|
|
6430
5559
|
} while ((tx2 = tx2->mt_parent) != NULL);
|
|
6431
5560
|
}
|
|
6432
5561
|
|
|
6433
|
-
if (pgno
|
|
6434
|
-
|
|
5562
|
+
if (pgno < txn->mt_next_pgno) {
|
|
5563
|
+
level = 0;
|
|
5564
|
+
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
|
|
5565
|
+
} else {
|
|
5566
|
+
DPRINTF(("page %"Z"u not found", pgno));
|
|
6435
5567
|
txn->mt_flags |= MDB_TXN_ERROR;
|
|
6436
5568
|
return MDB_PAGE_NOTFOUND;
|
|
6437
5569
|
}
|
|
6438
5570
|
|
|
6439
|
-
level = 0;
|
|
6440
|
-
|
|
6441
|
-
mapped:
|
|
6442
|
-
{
|
|
6443
|
-
#ifdef MDB_VL32
|
|
6444
|
-
int rc = mdb_rpage_get(txn, pgno, &p);
|
|
6445
|
-
if (rc) {
|
|
6446
|
-
txn->mt_flags |= MDB_TXN_ERROR;
|
|
6447
|
-
return rc;
|
|
6448
|
-
}
|
|
6449
|
-
#else
|
|
6450
|
-
MDB_env *env = txn->mt_env;
|
|
6451
|
-
p = (MDB_page *)(env->me_map + env->me_psize * pgno);
|
|
6452
|
-
#endif
|
|
6453
|
-
}
|
|
6454
|
-
|
|
6455
5571
|
done:
|
|
6456
5572
|
*ret = p;
|
|
6457
5573
|
if (lvl)
|
|
@@ -6473,13 +5589,13 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
|
|
|
6473
5589
|
MDB_node *node;
|
|
6474
5590
|
indx_t i;
|
|
6475
5591
|
|
|
6476
|
-
DPRINTF(("branch page %"
|
|
5592
|
+
DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp)));
|
|
6477
5593
|
/* Don't assert on branch pages in the FreeDB. We can get here
|
|
6478
5594
|
* while in the process of rebalancing a FreeDB branch page; we must
|
|
6479
5595
|
* let that proceed. ITS#8336
|
|
6480
5596
|
*/
|
|
6481
5597
|
mdb_cassert(mc, !mc->mc_dbi || NUMKEYS(mp) > 1);
|
|
6482
|
-
DPRINTF(("found index 0 to page %"
|
|
5598
|
+
DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0))));
|
|
6483
5599
|
|
|
6484
5600
|
if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) {
|
|
6485
5601
|
i = 0;
|
|
@@ -6534,7 +5650,7 @@ ready:
|
|
|
6534
5650
|
return MDB_CORRUPTED;
|
|
6535
5651
|
}
|
|
6536
5652
|
|
|
6537
|
-
DPRINTF(("found leaf page %"
|
|
5653
|
+
DPRINTF(("found leaf page %"Z"u for key [%s]", mp->mp_pgno,
|
|
6538
5654
|
key ? DKEY(key) : "null"));
|
|
6539
5655
|
mc->mc_flags |= C_INITIALIZED;
|
|
6540
5656
|
mc->mc_flags &= ~C_EOF;
|
|
@@ -6604,7 +5720,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
|
|
|
6604
5720
|
MDB_node *leaf = mdb_node_search(&mc2,
|
|
6605
5721
|
&mc->mc_dbx->md_name, &exact);
|
|
6606
5722
|
if (!exact)
|
|
6607
|
-
return
|
|
5723
|
+
return MDB_BAD_DBI;
|
|
6608
5724
|
if ((leaf->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA)
|
|
6609
5725
|
return MDB_INCOMPATIBLE; /* not a named DB */
|
|
6610
5726
|
rc = mdb_node_read(&mc2, leaf, &data);
|
|
@@ -6630,26 +5746,14 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
|
|
|
6630
5746
|
}
|
|
6631
5747
|
|
|
6632
5748
|
mdb_cassert(mc, root > 1);
|
|
6633
|
-
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
|
|
6634
|
-
#ifdef MDB_VL32
|
|
6635
|
-
if (mc->mc_pg[0])
|
|
6636
|
-
MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[0]);
|
|
6637
|
-
#endif
|
|
5749
|
+
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
|
|
6638
5750
|
if ((rc = mdb_page_get(mc, root, &mc->mc_pg[0], NULL)) != 0)
|
|
6639
5751
|
return rc;
|
|
6640
|
-
}
|
|
6641
5752
|
|
|
6642
|
-
#ifdef MDB_VL32
|
|
6643
|
-
{
|
|
6644
|
-
int i;
|
|
6645
|
-
for (i=1; i<mc->mc_snum; i++)
|
|
6646
|
-
MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]);
|
|
6647
|
-
}
|
|
6648
|
-
#endif
|
|
6649
5753
|
mc->mc_snum = 1;
|
|
6650
5754
|
mc->mc_top = 0;
|
|
6651
5755
|
|
|
6652
|
-
DPRINTF(("db %d root page %"
|
|
5756
|
+
DPRINTF(("db %d root page %"Z"u has flags 0x%X",
|
|
6653
5757
|
DDBI(mc), root, mc->mc_pg[0]->mp_flags));
|
|
6654
5758
|
|
|
6655
5759
|
if (flags & MDB_PS_MODIFY) {
|
|
@@ -6674,7 +5778,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
|
|
|
6674
5778
|
MDB_ID pn = pg << 1;
|
|
6675
5779
|
int rc;
|
|
6676
5780
|
|
|
6677
|
-
DPRINTF(("free ov page %"
|
|
5781
|
+
DPRINTF(("free ov page %"Z"u (%d)", pg, ovpages));
|
|
6678
5782
|
/* If the page is dirty or on the spill list we just acquired it,
|
|
6679
5783
|
* so we should give it back to our current free list, if any.
|
|
6680
5784
|
* Otherwise put it onto the list of pages we freed in this txn.
|
|
@@ -6715,7 +5819,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
|
|
|
6715
5819
|
j = ++(dl[0].mid);
|
|
6716
5820
|
dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */
|
|
6717
5821
|
txn->mt_flags |= MDB_TXN_ERROR;
|
|
6718
|
-
return
|
|
5822
|
+
return MDB_CORRUPTED;
|
|
6719
5823
|
}
|
|
6720
5824
|
}
|
|
6721
5825
|
txn->mt_dirty_room++;
|
|
@@ -6735,10 +5839,6 @@ release:
|
|
|
6735
5839
|
if (rc)
|
|
6736
5840
|
return rc;
|
|
6737
5841
|
}
|
|
6738
|
-
#ifdef MDB_VL32
|
|
6739
|
-
if (mc->mc_ovpg == mp)
|
|
6740
|
-
mc->mc_ovpg = NULL;
|
|
6741
|
-
#endif
|
|
6742
5842
|
mc->mc_db->md_overflow_pages -= ovpages;
|
|
6743
5843
|
return 0;
|
|
6744
5844
|
}
|
|
@@ -6756,10 +5856,6 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data)
|
|
|
6756
5856
|
pgno_t pgno;
|
|
6757
5857
|
int rc;
|
|
6758
5858
|
|
|
6759
|
-
if (MC_OVPG(mc)) {
|
|
6760
|
-
MDB_PAGE_UNREF(mc->mc_txn, MC_OVPG(mc));
|
|
6761
|
-
MC_SET_OVPG(mc, NULL);
|
|
6762
|
-
}
|
|
6763
5859
|
if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
|
|
6764
5860
|
data->mv_size = NODEDSZ(leaf);
|
|
6765
5861
|
data->mv_data = NODEDATA(leaf);
|
|
@@ -6771,11 +5867,10 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data)
|
|
|
6771
5867
|
data->mv_size = NODEDSZ(leaf);
|
|
6772
5868
|
memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
|
|
6773
5869
|
if ((rc = mdb_page_get(mc, pgno, &omp, NULL)) != 0) {
|
|
6774
|
-
DPRINTF(("read overflow page %"
|
|
5870
|
+
DPRINTF(("read overflow page %"Z"u failed", pgno));
|
|
6775
5871
|
return rc;
|
|
6776
5872
|
}
|
|
6777
5873
|
data->mv_data = METADATA(omp);
|
|
6778
|
-
MC_SET_OVPG(mc, omp);
|
|
6779
5874
|
|
|
6780
5875
|
return MDB_SUCCESS;
|
|
6781
5876
|
}
|
|
@@ -6786,7 +5881,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
|
|
|
6786
5881
|
{
|
|
6787
5882
|
MDB_cursor mc;
|
|
6788
5883
|
MDB_xcursor mx;
|
|
6789
|
-
int exact = 0
|
|
5884
|
+
int exact = 0;
|
|
6790
5885
|
DKBUF;
|
|
6791
5886
|
|
|
6792
5887
|
DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
|
|
@@ -6798,12 +5893,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
|
|
|
6798
5893
|
return MDB_BAD_TXN;
|
|
6799
5894
|
|
|
6800
5895
|
mdb_cursor_init(&mc, txn, dbi, &mx);
|
|
6801
|
-
|
|
6802
|
-
/* unref all the pages when MDB_VL32 - caller must copy the data
|
|
6803
|
-
* before doing anything else
|
|
6804
|
-
*/
|
|
6805
|
-
MDB_CURSOR_UNREF(&mc, 1);
|
|
6806
|
-
return rc;
|
|
5896
|
+
return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
|
|
6807
5897
|
}
|
|
6808
5898
|
|
|
6809
5899
|
/** Find a sibling for a page.
|
|
@@ -6820,19 +5910,13 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
|
|
|
6820
5910
|
int rc;
|
|
6821
5911
|
MDB_node *indx;
|
|
6822
5912
|
MDB_page *mp;
|
|
6823
|
-
#ifdef MDB_VL32
|
|
6824
|
-
MDB_page *op;
|
|
6825
|
-
#endif
|
|
6826
5913
|
|
|
6827
5914
|
if (mc->mc_snum < 2) {
|
|
6828
5915
|
return MDB_NOTFOUND; /* root has no siblings */
|
|
6829
5916
|
}
|
|
6830
5917
|
|
|
6831
|
-
#ifdef MDB_VL32
|
|
6832
|
-
op = mc->mc_pg[mc->mc_top];
|
|
6833
|
-
#endif
|
|
6834
5918
|
mdb_cursor_pop(mc);
|
|
6835
|
-
DPRINTF(("parent page is page %"
|
|
5919
|
+
DPRINTF(("parent page is page %"Z"u, index %u",
|
|
6836
5920
|
mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]));
|
|
6837
5921
|
|
|
6838
5922
|
if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top]))
|
|
@@ -6855,8 +5939,6 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
|
|
|
6855
5939
|
}
|
|
6856
5940
|
mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
|
|
6857
5941
|
|
|
6858
|
-
MDB_PAGE_UNREF(mc->mc_txn, op);
|
|
6859
|
-
|
|
6860
5942
|
indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
|
6861
5943
|
if ((rc = mdb_page_get(mc, NODEPGNO(indx), &mp, NULL)) != 0) {
|
|
6862
5944
|
/* mc will be inconsistent if caller does mc_snum++ as above */
|
|
@@ -6904,9 +5986,6 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
6904
5986
|
return rc;
|
|
6905
5987
|
}
|
|
6906
5988
|
}
|
|
6907
|
-
else {
|
|
6908
|
-
MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
|
|
6909
|
-
}
|
|
6910
5989
|
} else {
|
|
6911
5990
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
6912
5991
|
if (op == MDB_NEXT_DUP)
|
|
@@ -6914,7 +5993,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
6914
5993
|
}
|
|
6915
5994
|
}
|
|
6916
5995
|
|
|
6917
|
-
DPRINTF(("cursor_next: top page is %"
|
|
5996
|
+
DPRINTF(("cursor_next: top page is %"Z"u in cursor %p",
|
|
6918
5997
|
mdb_dbg_pgno(mp), (void *) mc));
|
|
6919
5998
|
if (mc->mc_flags & C_DEL) {
|
|
6920
5999
|
mc->mc_flags ^= C_DEL;
|
|
@@ -6928,12 +6007,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
6928
6007
|
return rc;
|
|
6929
6008
|
}
|
|
6930
6009
|
mp = mc->mc_pg[mc->mc_top];
|
|
6931
|
-
DPRINTF(("next page is %"
|
|
6010
|
+
DPRINTF(("next page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]));
|
|
6932
6011
|
} else
|
|
6933
6012
|
mc->mc_ki[mc->mc_top]++;
|
|
6934
6013
|
|
|
6935
6014
|
skip:
|
|
6936
|
-
DPRINTF(("==> cursor points to page %"
|
|
6015
|
+
DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
|
|
6937
6016
|
mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
|
|
6938
6017
|
|
|
6939
6018
|
if (IS_LEAF2(mp)) {
|
|
@@ -6990,9 +6069,6 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
6990
6069
|
return rc;
|
|
6991
6070
|
}
|
|
6992
6071
|
}
|
|
6993
|
-
else {
|
|
6994
|
-
MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
|
|
6995
|
-
}
|
|
6996
6072
|
} else {
|
|
6997
6073
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
6998
6074
|
if (op == MDB_PREV_DUP)
|
|
@@ -7000,7 +6076,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
7000
6076
|
}
|
|
7001
6077
|
}
|
|
7002
6078
|
|
|
7003
|
-
DPRINTF(("cursor_prev: top page is %"
|
|
6079
|
+
DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p",
|
|
7004
6080
|
mdb_dbg_pgno(mp), (void *) mc));
|
|
7005
6081
|
|
|
7006
6082
|
mc->mc_flags &= ~(C_EOF|C_DEL);
|
|
@@ -7012,11 +6088,11 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
7012
6088
|
}
|
|
7013
6089
|
mp = mc->mc_pg[mc->mc_top];
|
|
7014
6090
|
mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1;
|
|
7015
|
-
DPRINTF(("prev page is %"
|
|
6091
|
+
DPRINTF(("prev page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]));
|
|
7016
6092
|
} else
|
|
7017
6093
|
mc->mc_ki[mc->mc_top]--;
|
|
7018
6094
|
|
|
7019
|
-
DPRINTF(("==> cursor points to page %"
|
|
6095
|
+
DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
|
|
7020
6096
|
mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
|
|
7021
6097
|
|
|
7022
6098
|
if (!IS_LEAF(mp))
|
|
@@ -7057,10 +6133,8 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
7057
6133
|
if (key->mv_size == 0)
|
|
7058
6134
|
return MDB_BAD_VALSIZE;
|
|
7059
6135
|
|
|
7060
|
-
if (mc->mc_xcursor)
|
|
7061
|
-
MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
|
|
6136
|
+
if (mc->mc_xcursor)
|
|
7062
6137
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
7063
|
-
}
|
|
7064
6138
|
|
|
7065
6139
|
/* See if we're already on the right page */
|
|
7066
6140
|
if (mc->mc_flags & C_INITIALIZED) {
|
|
@@ -7071,7 +6145,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
7071
6145
|
mc->mc_ki[mc->mc_top] = 0;
|
|
7072
6146
|
return MDB_NOTFOUND;
|
|
7073
6147
|
}
|
|
7074
|
-
if (mp
|
|
6148
|
+
if (MP_FLAGS(mp) & P_LEAF2) {
|
|
7075
6149
|
nodekey.mv_size = mc->mc_db->md_pad;
|
|
7076
6150
|
nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size);
|
|
7077
6151
|
} else {
|
|
@@ -7092,7 +6166,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
7092
6166
|
unsigned int i;
|
|
7093
6167
|
unsigned int nkeys = NUMKEYS(mp);
|
|
7094
6168
|
if (nkeys > 1) {
|
|
7095
|
-
if (mp
|
|
6169
|
+
if (MP_FLAGS(mp) & P_LEAF2) {
|
|
7096
6170
|
nodekey.mv_data = LEAF2KEY(mp,
|
|
7097
6171
|
nkeys-1, nodekey.mv_size);
|
|
7098
6172
|
} else {
|
|
@@ -7110,7 +6184,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
7110
6184
|
if (rc < 0) {
|
|
7111
6185
|
if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) {
|
|
7112
6186
|
/* This is definitely the right page, skip search_page */
|
|
7113
|
-
if (mp
|
|
6187
|
+
if (MP_FLAGS(mp) & P_LEAF2) {
|
|
7114
6188
|
nodekey.mv_data = LEAF2KEY(mp,
|
|
7115
6189
|
mc->mc_ki[mc->mc_top], nodekey.mv_size);
|
|
7116
6190
|
} else {
|
|
@@ -7216,8 +6290,10 @@ set1:
|
|
|
7216
6290
|
if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS)
|
|
7217
6291
|
return rc;
|
|
7218
6292
|
dcmp = mc->mc_dbx->md_dcmp;
|
|
7219
|
-
|
|
6293
|
+
#if UINT_MAX < SIZE_MAX
|
|
6294
|
+
if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
|
|
7220
6295
|
dcmp = mdb_cmp_clong;
|
|
6296
|
+
#endif
|
|
7221
6297
|
rc = dcmp(data, &olddata);
|
|
7222
6298
|
if (rc) {
|
|
7223
6299
|
if (op == MDB_GET_BOTH || rc > 0)
|
|
@@ -7249,10 +6325,8 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data)
|
|
|
7249
6325
|
int rc;
|
|
7250
6326
|
MDB_node *leaf;
|
|
7251
6327
|
|
|
7252
|
-
if (mc->mc_xcursor)
|
|
7253
|
-
MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
|
|
6328
|
+
if (mc->mc_xcursor)
|
|
7254
6329
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
7255
|
-
}
|
|
7256
6330
|
|
|
7257
6331
|
if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
|
|
7258
6332
|
rc = mdb_page_search(mc, NULL, MDB_PS_FIRST);
|
|
@@ -7296,10 +6370,8 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data)
|
|
|
7296
6370
|
int rc;
|
|
7297
6371
|
MDB_node *leaf;
|
|
7298
6372
|
|
|
7299
|
-
if (mc->mc_xcursor)
|
|
7300
|
-
MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
|
|
6373
|
+
if (mc->mc_xcursor)
|
|
7301
6374
|
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
7302
|
-
}
|
|
7303
6375
|
|
|
7304
6376
|
if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
|
|
7305
6377
|
rc = mdb_page_search(mc, NULL, MDB_PS_LAST);
|
|
@@ -7489,6 +6561,7 @@ fetchm:
|
|
|
7489
6561
|
rc = MDB_NOTFOUND;
|
|
7490
6562
|
break;
|
|
7491
6563
|
}
|
|
6564
|
+
mc->mc_flags &= ~C_EOF;
|
|
7492
6565
|
{
|
|
7493
6566
|
MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
|
7494
6567
|
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
@@ -7555,8 +6628,8 @@ mdb_cursor_touch(MDB_cursor *mc)
|
|
|
7555
6628
|
/** Do not spill pages to disk if txn is getting full, may fail instead */
|
|
7556
6629
|
#define MDB_NOSPILL 0x8000
|
|
7557
6630
|
|
|
7558
|
-
int
|
|
7559
|
-
|
|
6631
|
+
static int
|
|
6632
|
+
_mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
7560
6633
|
unsigned int flags)
|
|
7561
6634
|
{
|
|
7562
6635
|
MDB_env *env;
|
|
@@ -7675,7 +6748,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
7675
6748
|
*mc->mc_dbflag |= DB_DIRTY;
|
|
7676
6749
|
if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED))
|
|
7677
6750
|
== MDB_DUPFIXED)
|
|
7678
|
-
np
|
|
6751
|
+
MP_FLAGS(np) |= P_LEAF2;
|
|
7679
6752
|
mc->mc_flags |= C_INITIALIZED;
|
|
7680
6753
|
} else {
|
|
7681
6754
|
/* make sure all cursor pages are writable */
|
|
@@ -7697,7 +6770,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
7697
6770
|
fp_flags = P_LEAF|P_DIRTY;
|
|
7698
6771
|
fp = env->me_pbuf;
|
|
7699
6772
|
fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
|
|
7700
|
-
fp
|
|
6773
|
+
MP_LOWER(fp) = MP_UPPER(fp) = (PAGEHDRSZ-PAGEBASE);
|
|
7701
6774
|
olddata.mv_size = PAGEHDRSZ;
|
|
7702
6775
|
goto prep_subDB;
|
|
7703
6776
|
}
|
|
@@ -7756,8 +6829,10 @@ more:
|
|
|
7756
6829
|
if (flags == MDB_CURRENT)
|
|
7757
6830
|
goto current;
|
|
7758
6831
|
dcmp = mc->mc_dbx->md_dcmp;
|
|
7759
|
-
|
|
6832
|
+
#if UINT_MAX < SIZE_MAX
|
|
6833
|
+
if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
|
|
7760
6834
|
dcmp = mdb_cmp_clong;
|
|
6835
|
+
#endif
|
|
7761
6836
|
/* does data match? */
|
|
7762
6837
|
if (!dcmp(data, &olddata)) {
|
|
7763
6838
|
if (flags & (MDB_NODUPDATA|MDB_APPENDDUP))
|
|
@@ -7771,18 +6846,18 @@ more:
|
|
|
7771
6846
|
dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size);
|
|
7772
6847
|
|
|
7773
6848
|
/* Make sub-page header for the dup items, with dummy body */
|
|
7774
|
-
fp
|
|
7775
|
-
fp
|
|
6849
|
+
MP_FLAGS(fp) = P_LEAF|P_DIRTY|P_SUBP;
|
|
6850
|
+
MP_LOWER(fp) = (PAGEHDRSZ-PAGEBASE);
|
|
7776
6851
|
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
|
|
7777
6852
|
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
|
7778
|
-
fp
|
|
6853
|
+
MP_FLAGS(fp) |= P_LEAF2;
|
|
7779
6854
|
fp->mp_pad = data->mv_size;
|
|
7780
6855
|
xdata.mv_size += 2 * data->mv_size; /* leave space for 2 more */
|
|
7781
6856
|
} else {
|
|
7782
6857
|
xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) +
|
|
7783
6858
|
(dkey.mv_size & 1) + (data->mv_size & 1);
|
|
7784
6859
|
}
|
|
7785
|
-
fp
|
|
6860
|
+
MP_UPPER(fp) = xdata.mv_size - PAGEBASE;
|
|
7786
6861
|
olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */
|
|
7787
6862
|
} else if (leaf->mn_flags & F_SUBDATA) {
|
|
7788
6863
|
/* Data is on sub-DB, just store it */
|
|
@@ -7805,8 +6880,8 @@ more:
|
|
|
7805
6880
|
}
|
|
7806
6881
|
/* FALLTHRU */ /* Big enough MDB_DUPFIXED sub-page */
|
|
7807
6882
|
case MDB_CURRENT:
|
|
7808
|
-
fp
|
|
7809
|
-
COPY_PGNO(fp
|
|
6883
|
+
MP_FLAGS(fp) |= P_DIRTY;
|
|
6884
|
+
COPY_PGNO(MP_PGNO(fp), MP_PGNO(mp));
|
|
7810
6885
|
mc->mc_xcursor->mx_cursor.mc_pg[0] = fp;
|
|
7811
6886
|
flags |= F_DUPDATA;
|
|
7812
6887
|
goto put_sub;
|
|
@@ -7814,7 +6889,7 @@ more:
|
|
|
7814
6889
|
xdata.mv_size = olddata.mv_size + offset;
|
|
7815
6890
|
}
|
|
7816
6891
|
|
|
7817
|
-
fp_flags = fp
|
|
6892
|
+
fp_flags = MP_FLAGS(fp);
|
|
7818
6893
|
if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) {
|
|
7819
6894
|
/* Too big for a sub-page, convert to sub-DB */
|
|
7820
6895
|
fp_flags &= ~P_SUBP;
|
|
@@ -7844,16 +6919,16 @@ prep_subDB:
|
|
|
7844
6919
|
sub_root = mp;
|
|
7845
6920
|
}
|
|
7846
6921
|
if (mp != fp) {
|
|
7847
|
-
mp
|
|
7848
|
-
mp
|
|
7849
|
-
mp
|
|
7850
|
-
mp
|
|
6922
|
+
MP_FLAGS(mp) = fp_flags | P_DIRTY;
|
|
6923
|
+
MP_PAD(mp) = MP_PAD(fp);
|
|
6924
|
+
MP_LOWER(mp) = MP_LOWER(fp);
|
|
6925
|
+
MP_UPPER(mp) = MP_UPPER(fp) + offset;
|
|
7851
6926
|
if (fp_flags & P_LEAF2) {
|
|
7852
6927
|
memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
|
|
7853
6928
|
} else {
|
|
7854
|
-
memcpy((char *)mp + mp
|
|
7855
|
-
olddata.mv_size - fp
|
|
7856
|
-
memcpy((char *)(
|
|
6929
|
+
memcpy((char *)mp + MP_UPPER(mp) + PAGEBASE, (char *)fp + MP_UPPER(fp) + PAGEBASE,
|
|
6930
|
+
olddata.mv_size - MP_UPPER(fp) - PAGEBASE);
|
|
6931
|
+
memcpy((char *)MP_PTRS(mp), (char *)MP_PTRS(fp), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0]));
|
|
7857
6932
|
for (i=0; i<NUMKEYS(fp); i++)
|
|
7858
6933
|
mp->mp_ptrs[i] += offset;
|
|
7859
6934
|
}
|
|
@@ -7918,7 +6993,7 @@ current:
|
|
|
7918
6993
|
* Copy end of page, adjusting alignment so
|
|
7919
6994
|
* compiler may copy words instead of bytes.
|
|
7920
6995
|
*/
|
|
7921
|
-
off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t);
|
|
6996
|
+
off = (PAGEHDRSZ + data->mv_size) & -(int)sizeof(size_t);
|
|
7922
6997
|
memcpy((size_t *)((char *)np + off),
|
|
7923
6998
|
(size_t *)((char *)omp + off), sz - off);
|
|
7924
6999
|
sz = PAGEHDRSZ;
|
|
@@ -7946,11 +7021,14 @@ current:
|
|
|
7946
7021
|
else if (!(mc->mc_flags & C_SUB))
|
|
7947
7022
|
memcpy(olddata.mv_data, data->mv_data, data->mv_size);
|
|
7948
7023
|
else {
|
|
7024
|
+
if (key->mv_size != NODEKSZ(leaf))
|
|
7025
|
+
goto new_ksize;
|
|
7949
7026
|
memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
|
|
7950
7027
|
goto fix_parent;
|
|
7951
7028
|
}
|
|
7952
7029
|
return MDB_SUCCESS;
|
|
7953
7030
|
}
|
|
7031
|
+
new_ksize:
|
|
7954
7032
|
mdb_node_del(mc, 0);
|
|
7955
7033
|
}
|
|
7956
7034
|
|
|
@@ -7997,7 +7075,7 @@ new_sub:
|
|
|
7997
7075
|
*/
|
|
7998
7076
|
if (do_sub) {
|
|
7999
7077
|
int xflags, new_dupdata;
|
|
8000
|
-
|
|
7078
|
+
size_t ecount;
|
|
8001
7079
|
put_sub:
|
|
8002
7080
|
xdata.mv_size = 0;
|
|
8003
7081
|
xdata.mv_data = "";
|
|
@@ -8014,7 +7092,7 @@ put_sub:
|
|
|
8014
7092
|
new_dupdata = (int)dkey.mv_size;
|
|
8015
7093
|
/* converted, write the original data first */
|
|
8016
7094
|
if (dkey.mv_size) {
|
|
8017
|
-
rc =
|
|
7095
|
+
rc = _mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
|
|
8018
7096
|
if (rc)
|
|
8019
7097
|
goto bad_sub;
|
|
8020
7098
|
/* we've done our job */
|
|
@@ -8042,7 +7120,7 @@ put_sub:
|
|
|
8042
7120
|
ecount = mc->mc_xcursor->mx_db.md_entries;
|
|
8043
7121
|
if (flags & MDB_APPENDDUP)
|
|
8044
7122
|
xflags |= MDB_APPEND;
|
|
8045
|
-
rc =
|
|
7123
|
+
rc = _mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
|
|
8046
7124
|
if (flags & F_SUBDATA) {
|
|
8047
7125
|
void *db = NODEDATA(leaf);
|
|
8048
7126
|
memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
|
|
@@ -8076,14 +7154,27 @@ put_sub:
|
|
|
8076
7154
|
return rc;
|
|
8077
7155
|
bad_sub:
|
|
8078
7156
|
if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */
|
|
8079
|
-
rc =
|
|
7157
|
+
rc = MDB_CORRUPTED;
|
|
8080
7158
|
}
|
|
8081
7159
|
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
|
8082
7160
|
return rc;
|
|
8083
7161
|
}
|
|
8084
7162
|
|
|
8085
7163
|
int
|
|
8086
|
-
|
|
7164
|
+
mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
7165
|
+
unsigned int flags)
|
|
7166
|
+
{
|
|
7167
|
+
DKBUF;
|
|
7168
|
+
DDBUF;
|
|
7169
|
+
int rc = _mdb_cursor_put(mc, key, data, flags);
|
|
7170
|
+
MDB_TRACE(("%p, %"Z"u[%s], %"Z"u%s, %u",
|
|
7171
|
+
mc, key ? key->mv_size:0, DKEY(key), data ? data->mv_size:0,
|
|
7172
|
+
data ? mdb_dval(mc->mc_txn, mc->mc_dbi, data, dbuf):"", flags));
|
|
7173
|
+
return rc;
|
|
7174
|
+
}
|
|
7175
|
+
|
|
7176
|
+
static int
|
|
7177
|
+
_mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
|
|
8087
7178
|
{
|
|
8088
7179
|
MDB_node *leaf;
|
|
8089
7180
|
MDB_page *mp;
|
|
@@ -8121,7 +7212,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
|
|
|
8121
7212
|
if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
|
|
8122
7213
|
mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
|
|
8123
7214
|
}
|
|
8124
|
-
rc =
|
|
7215
|
+
rc = _mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
|
|
8125
7216
|
if (rc)
|
|
8126
7217
|
return rc;
|
|
8127
7218
|
/* If sub-DB still has entries, we're done */
|
|
@@ -8185,6 +7276,14 @@ fail:
|
|
|
8185
7276
|
return rc;
|
|
8186
7277
|
}
|
|
8187
7278
|
|
|
7279
|
+
int
|
|
7280
|
+
mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
|
|
7281
|
+
{
|
|
7282
|
+
MDB_TRACE(("%p, %u",
|
|
7283
|
+
mc, flags));
|
|
7284
|
+
return _mdb_cursor_del(mc, flags);
|
|
7285
|
+
}
|
|
7286
|
+
|
|
8188
7287
|
/** Allocate and initialize new pages for a database.
|
|
8189
7288
|
* Set #MDB_TXN_ERROR on failure.
|
|
8190
7289
|
* @param[in] mc a cursor on the database being added to.
|
|
@@ -8202,7 +7301,7 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp)
|
|
|
8202
7301
|
|
|
8203
7302
|
if ((rc = mdb_page_alloc(mc, num, &np)))
|
|
8204
7303
|
return rc;
|
|
8205
|
-
DPRINTF(("allocated new mpage %"
|
|
7304
|
+
DPRINTF(("allocated new mpage %"Z"u, page size %u",
|
|
8206
7305
|
np->mp_pgno, mc->mc_txn->mt_env->me_psize));
|
|
8207
7306
|
np->mp_flags = flags | P_DIRTY;
|
|
8208
7307
|
np->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
|
@@ -8301,9 +7400,9 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
|
|
|
8301
7400
|
void *ndata;
|
|
8302
7401
|
DKBUF;
|
|
8303
7402
|
|
|
8304
|
-
mdb_cassert(mc, mp
|
|
7403
|
+
mdb_cassert(mc, MP_UPPER(mp) >= MP_LOWER(mp));
|
|
8305
7404
|
|
|
8306
|
-
DPRINTF(("add to %s %spage %"
|
|
7405
|
+
DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]",
|
|
8307
7406
|
IS_LEAF(mp) ? "leaf" : "branch",
|
|
8308
7407
|
IS_SUBP(mp) ? "sub-" : "",
|
|
8309
7408
|
mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0,
|
|
@@ -8320,8 +7419,8 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
|
|
|
8320
7419
|
memcpy(ptr, key->mv_data, ksize);
|
|
8321
7420
|
|
|
8322
7421
|
/* Just using these for counting */
|
|
8323
|
-
mp
|
|
8324
|
-
mp
|
|
7422
|
+
MP_LOWER(mp) += sizeof(indx_t);
|
|
7423
|
+
MP_UPPER(mp) -= ksize - sizeof(indx_t);
|
|
8325
7424
|
return MDB_SUCCESS;
|
|
8326
7425
|
}
|
|
8327
7426
|
|
|
@@ -8344,7 +7443,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
|
|
|
8344
7443
|
goto full;
|
|
8345
7444
|
if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
|
|
8346
7445
|
return rc;
|
|
8347
|
-
DPRINTF(("allocated overflow page %"
|
|
7446
|
+
DPRINTF(("allocated overflow page %"Z"u", ofp->mp_pgno));
|
|
8348
7447
|
flags |= F_BIGDATA;
|
|
8349
7448
|
goto update;
|
|
8350
7449
|
} else {
|
|
@@ -8358,14 +7457,14 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
|
|
|
8358
7457
|
update:
|
|
8359
7458
|
/* Move higher pointers up one slot. */
|
|
8360
7459
|
for (i = NUMKEYS(mp); i > indx; i--)
|
|
8361
|
-
mp
|
|
7460
|
+
MP_PTRS(mp)[i] = MP_PTRS(mp)[i - 1];
|
|
8362
7461
|
|
|
8363
7462
|
/* Adjust free space offsets. */
|
|
8364
|
-
ofs = mp
|
|
8365
|
-
mdb_cassert(mc, ofs >= mp
|
|
8366
|
-
mp
|
|
8367
|
-
mp
|
|
8368
|
-
mp
|
|
7463
|
+
ofs = MP_UPPER(mp) - node_size;
|
|
7464
|
+
mdb_cassert(mc, ofs >= MP_LOWER(mp) + sizeof(indx_t));
|
|
7465
|
+
MP_PTRS(mp)[indx] = ofs;
|
|
7466
|
+
MP_UPPER(mp) = ofs;
|
|
7467
|
+
MP_LOWER(mp) += sizeof(indx_t);
|
|
8369
7468
|
|
|
8370
7469
|
/* Write the node data. */
|
|
8371
7470
|
node = NODEPTR(mp, indx);
|
|
@@ -8401,9 +7500,9 @@ update:
|
|
|
8401
7500
|
return MDB_SUCCESS;
|
|
8402
7501
|
|
|
8403
7502
|
full:
|
|
8404
|
-
DPRINTF(("not enough room in page %"
|
|
7503
|
+
DPRINTF(("not enough room in page %"Z"u, got %u ptrs",
|
|
8405
7504
|
mdb_dbg_pgno(mp), NUMKEYS(mp)));
|
|
8406
|
-
DPRINTF(("upper-lower = %u - %u = %"Z"d", mp
|
|
7505
|
+
DPRINTF(("upper-lower = %u - %u = %"Z"d", MP_UPPER(mp),MP_LOWER(mp),room));
|
|
8407
7506
|
DPRINTF(("node size = %"Z"u", node_size));
|
|
8408
7507
|
mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
|
|
8409
7508
|
return MDB_PAGE_FULL;
|
|
@@ -8424,7 +7523,7 @@ mdb_node_del(MDB_cursor *mc, int ksize)
|
|
|
8424
7523
|
MDB_node *node;
|
|
8425
7524
|
char *base;
|
|
8426
7525
|
|
|
8427
|
-
DPRINTF(("delete node %u on %s page %"
|
|
7526
|
+
DPRINTF(("delete node %u on %s page %"Z"u", indx,
|
|
8428
7527
|
IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp)));
|
|
8429
7528
|
numkeys = NUMKEYS(mp);
|
|
8430
7529
|
mdb_cassert(mc, indx < numkeys);
|
|
@@ -8434,8 +7533,8 @@ mdb_node_del(MDB_cursor *mc, int ksize)
|
|
|
8434
7533
|
base = LEAF2KEY(mp, indx, ksize);
|
|
8435
7534
|
if (x)
|
|
8436
7535
|
memmove(base, base + ksize, x * ksize);
|
|
8437
|
-
mp
|
|
8438
|
-
mp
|
|
7536
|
+
MP_LOWER(mp) -= sizeof(indx_t);
|
|
7537
|
+
MP_UPPER(mp) += ksize - sizeof(indx_t);
|
|
8439
7538
|
return;
|
|
8440
7539
|
}
|
|
8441
7540
|
|
|
@@ -8449,21 +7548,21 @@ mdb_node_del(MDB_cursor *mc, int ksize)
|
|
|
8449
7548
|
}
|
|
8450
7549
|
sz = EVEN(sz);
|
|
8451
7550
|
|
|
8452
|
-
ptr = mp
|
|
7551
|
+
ptr = MP_PTRS(mp)[indx];
|
|
8453
7552
|
for (i = j = 0; i < numkeys; i++) {
|
|
8454
7553
|
if (i != indx) {
|
|
8455
|
-
mp
|
|
8456
|
-
if (mp
|
|
8457
|
-
mp
|
|
7554
|
+
MP_PTRS(mp)[j] = MP_PTRS(mp)[i];
|
|
7555
|
+
if (MP_PTRS(mp)[i] < ptr)
|
|
7556
|
+
MP_PTRS(mp)[j] += sz;
|
|
8458
7557
|
j++;
|
|
8459
7558
|
}
|
|
8460
7559
|
}
|
|
8461
7560
|
|
|
8462
|
-
base = (char *)mp + mp
|
|
8463
|
-
memmove(base + sz, base, ptr - mp
|
|
7561
|
+
base = (char *)mp + MP_UPPER(mp) + PAGEBASE;
|
|
7562
|
+
memmove(base + sz, base, ptr - MP_UPPER(mp));
|
|
8464
7563
|
|
|
8465
|
-
mp
|
|
8466
|
-
mp
|
|
7564
|
+
MP_LOWER(mp) -= sizeof(indx_t);
|
|
7565
|
+
MP_UPPER(mp) += sz;
|
|
8467
7566
|
}
|
|
8468
7567
|
|
|
8469
7568
|
/** Compact the main page after deleting a node on a subpage.
|
|
@@ -8492,11 +7591,11 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
|
|
|
8492
7591
|
} else {
|
|
8493
7592
|
xp = (MDB_page *)((char *)sp + delta); /* destination subpage */
|
|
8494
7593
|
for (i = NUMKEYS(sp); --i >= 0; )
|
|
8495
|
-
xp
|
|
7594
|
+
MP_PTRS(xp)[i] = MP_PTRS(sp)[i] - delta;
|
|
8496
7595
|
len = PAGEHDRSZ;
|
|
8497
7596
|
}
|
|
8498
|
-
sp
|
|
8499
|
-
COPY_PGNO(sp
|
|
7597
|
+
MP_UPPER(sp) = MP_LOWER(sp);
|
|
7598
|
+
COPY_PGNO(MP_PGNO(sp), mp->mp_pgno);
|
|
8500
7599
|
SETDSZ(node, nsize);
|
|
8501
7600
|
|
|
8502
7601
|
/* Shift <lower nodes...initial part of subpage> upward */
|
|
@@ -8533,8 +7632,7 @@ mdb_xcursor_init0(MDB_cursor *mc)
|
|
|
8533
7632
|
mx->mx_cursor.mc_dbflag = &mx->mx_dbflag;
|
|
8534
7633
|
mx->mx_cursor.mc_snum = 0;
|
|
8535
7634
|
mx->mx_cursor.mc_top = 0;
|
|
8536
|
-
|
|
8537
|
-
mx->mx_cursor.mc_flags = C_SUB | (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP));
|
|
7635
|
+
mx->mx_cursor.mc_flags = C_SUB;
|
|
8538
7636
|
mx->mx_dbx.md_name.mv_size = 0;
|
|
8539
7637
|
mx->mx_dbx.md_name.mv_data = NULL;
|
|
8540
7638
|
mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp;
|
|
@@ -8553,12 +7651,12 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
|
|
|
8553
7651
|
{
|
|
8554
7652
|
MDB_xcursor *mx = mc->mc_xcursor;
|
|
8555
7653
|
|
|
8556
|
-
mx->mx_cursor.mc_flags &= C_SUB|C_ORIG_RDONLY|C_WRITEMAP;
|
|
8557
7654
|
if (node->mn_flags & F_SUBDATA) {
|
|
8558
7655
|
memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db));
|
|
8559
7656
|
mx->mx_cursor.mc_pg[0] = 0;
|
|
8560
7657
|
mx->mx_cursor.mc_snum = 0;
|
|
8561
7658
|
mx->mx_cursor.mc_top = 0;
|
|
7659
|
+
mx->mx_cursor.mc_flags = C_SUB;
|
|
8562
7660
|
} else {
|
|
8563
7661
|
MDB_page *fp = NODEDATA(node);
|
|
8564
7662
|
mx->mx_db.md_pad = 0;
|
|
@@ -8568,10 +7666,10 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
|
|
|
8568
7666
|
mx->mx_db.md_leaf_pages = 1;
|
|
8569
7667
|
mx->mx_db.md_overflow_pages = 0;
|
|
8570
7668
|
mx->mx_db.md_entries = NUMKEYS(fp);
|
|
8571
|
-
COPY_PGNO(mx->mx_db.md_root, fp
|
|
7669
|
+
COPY_PGNO(mx->mx_db.md_root, MP_PGNO(fp));
|
|
8572
7670
|
mx->mx_cursor.mc_snum = 1;
|
|
8573
7671
|
mx->mx_cursor.mc_top = 0;
|
|
8574
|
-
mx->mx_cursor.mc_flags
|
|
7672
|
+
mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB;
|
|
8575
7673
|
mx->mx_cursor.mc_pg[0] = fp;
|
|
8576
7674
|
mx->mx_cursor.mc_ki[0] = 0;
|
|
8577
7675
|
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
|
@@ -8581,11 +7679,13 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
|
|
|
8581
7679
|
mx->mx_db.md_flags |= MDB_INTEGERKEY;
|
|
8582
7680
|
}
|
|
8583
7681
|
}
|
|
8584
|
-
DPRINTF(("Sub-db -%u root page %"
|
|
7682
|
+
DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi,
|
|
8585
7683
|
mx->mx_db.md_root));
|
|
8586
7684
|
mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA;
|
|
8587
|
-
|
|
7685
|
+
#if UINT_MAX < SIZE_MAX
|
|
7686
|
+
if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
|
|
8588
7687
|
mx->mx_dbx.md_cmp = mdb_cmp_clong;
|
|
7688
|
+
#endif
|
|
8589
7689
|
}
|
|
8590
7690
|
|
|
8591
7691
|
|
|
@@ -8608,7 +7708,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata)
|
|
|
8608
7708
|
mx->mx_cursor.mc_flags |= C_INITIALIZED;
|
|
8609
7709
|
mx->mx_cursor.mc_ki[0] = 0;
|
|
8610
7710
|
mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA;
|
|
8611
|
-
#if UINT_MAX <
|
|
7711
|
+
#if UINT_MAX < SIZE_MAX
|
|
8612
7712
|
mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp;
|
|
8613
7713
|
#endif
|
|
8614
7714
|
} else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) {
|
|
@@ -8616,7 +7716,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata)
|
|
|
8616
7716
|
}
|
|
8617
7717
|
mx->mx_db = src_mx->mx_db;
|
|
8618
7718
|
mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0];
|
|
8619
|
-
DPRINTF(("Sub-db -%u root page %"
|
|
7719
|
+
DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi,
|
|
8620
7720
|
mx->mx_db.md_root));
|
|
8621
7721
|
}
|
|
8622
7722
|
|
|
@@ -8635,8 +7735,7 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
|
|
|
8635
7735
|
mc->mc_top = 0;
|
|
8636
7736
|
mc->mc_pg[0] = 0;
|
|
8637
7737
|
mc->mc_ki[0] = 0;
|
|
8638
|
-
|
|
8639
|
-
mc->mc_flags = txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP);
|
|
7738
|
+
mc->mc_flags = 0;
|
|
8640
7739
|
if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
|
|
8641
7740
|
mdb_tassert(txn, mx != NULL);
|
|
8642
7741
|
mc->mc_xcursor = mx;
|
|
@@ -8678,6 +7777,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
|
|
|
8678
7777
|
return ENOMEM;
|
|
8679
7778
|
}
|
|
8680
7779
|
|
|
7780
|
+
MDB_TRACE(("%p, %u = %p", txn, dbi, mc));
|
|
8681
7781
|
*ret = mc;
|
|
8682
7782
|
|
|
8683
7783
|
return MDB_SUCCESS;
|
|
@@ -8701,7 +7801,7 @@ mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc)
|
|
|
8701
7801
|
|
|
8702
7802
|
/* Return the count of duplicate data items for the current key */
|
|
8703
7803
|
int
|
|
8704
|
-
mdb_cursor_count(MDB_cursor *mc,
|
|
7804
|
+
mdb_cursor_count(MDB_cursor *mc, size_t *countp)
|
|
8705
7805
|
{
|
|
8706
7806
|
MDB_node *leaf;
|
|
8707
7807
|
|
|
@@ -8741,14 +7841,9 @@ mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp)
|
|
|
8741
7841
|
void
|
|
8742
7842
|
mdb_cursor_close(MDB_cursor *mc)
|
|
8743
7843
|
{
|
|
8744
|
-
|
|
8745
|
-
MDB_CURSOR_UNREF(mc, 0);
|
|
8746
|
-
}
|
|
7844
|
+
MDB_TRACE(("%p", mc));
|
|
8747
7845
|
if (mc && !mc->mc_backup) {
|
|
8748
|
-
/*
|
|
8749
|
-
* A read-only txn (!C_UNTRACK) may have been freed already,
|
|
8750
|
-
* so do not peek inside it. Only write txns track cursors.
|
|
8751
|
-
*/
|
|
7846
|
+
/* remove from txn, if tracked */
|
|
8752
7847
|
if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) {
|
|
8753
7848
|
MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
|
|
8754
7849
|
while (*prev && *prev != mc) prev = &(*prev)->mc_next;
|
|
@@ -8799,7 +7894,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
|
|
|
8799
7894
|
char kbuf2[DKBUF_MAXKEYSIZE*2+1];
|
|
8800
7895
|
k2.mv_data = NODEKEY(node);
|
|
8801
7896
|
k2.mv_size = node->mn_ksize;
|
|
8802
|
-
DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"
|
|
7897
|
+
DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u",
|
|
8803
7898
|
indx, ptr,
|
|
8804
7899
|
mdb_dkey(&k2, kbuf2),
|
|
8805
7900
|
DKEY(key),
|
|
@@ -8947,7 +8042,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
|
|
|
8947
8042
|
return rc;
|
|
8948
8043
|
}
|
|
8949
8044
|
|
|
8950
|
-
DPRINTF(("moving %s node %u [%s] on page %"
|
|
8045
|
+
DPRINTF(("moving %s node %u [%s] on page %"Z"u to node %u on page %"Z"u",
|
|
8951
8046
|
IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch",
|
|
8952
8047
|
csrc->mc_ki[csrc->mc_top],
|
|
8953
8048
|
DKEY(&key),
|
|
@@ -9033,7 +8128,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
|
|
|
9033
8128
|
key.mv_size = NODEKSZ(srcnode);
|
|
9034
8129
|
key.mv_data = NODEKEY(srcnode);
|
|
9035
8130
|
}
|
|
9036
|
-
DPRINTF(("update separator for source page %"
|
|
8131
|
+
DPRINTF(("update separator for source page %"Z"u to [%s]",
|
|
9037
8132
|
csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key)));
|
|
9038
8133
|
mdb_cursor_copy(csrc, &mn);
|
|
9039
8134
|
mn.mc_snum--;
|
|
@@ -9064,7 +8159,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
|
|
|
9064
8159
|
key.mv_size = NODEKSZ(srcnode);
|
|
9065
8160
|
key.mv_data = NODEKEY(srcnode);
|
|
9066
8161
|
}
|
|
9067
|
-
DPRINTF(("update separator for destination page %"
|
|
8162
|
+
DPRINTF(("update separator for destination page %"Z"u to [%s]",
|
|
9068
8163
|
cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key)));
|
|
9069
8164
|
mdb_cursor_copy(cdst, &mn);
|
|
9070
8165
|
mn.mc_snum--;
|
|
@@ -9110,7 +8205,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
9110
8205
|
psrc = csrc->mc_pg[csrc->mc_top];
|
|
9111
8206
|
pdst = cdst->mc_pg[cdst->mc_top];
|
|
9112
8207
|
|
|
9113
|
-
DPRINTF(("merging page %"
|
|
8208
|
+
DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno));
|
|
9114
8209
|
|
|
9115
8210
|
mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
|
|
9116
8211
|
mdb_cassert(csrc, cdst->mc_snum > 1);
|
|
@@ -9167,7 +8262,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
9167
8262
|
}
|
|
9168
8263
|
}
|
|
9169
8264
|
|
|
9170
|
-
DPRINTF(("dst page %"
|
|
8265
|
+
DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)",
|
|
9171
8266
|
pdst->mp_pgno, NUMKEYS(pdst),
|
|
9172
8267
|
(float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10));
|
|
9173
8268
|
|
|
@@ -9251,7 +8346,6 @@ mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
9251
8346
|
cdst->mc_snum = csrc->mc_snum;
|
|
9252
8347
|
cdst->mc_top = csrc->mc_top;
|
|
9253
8348
|
cdst->mc_flags = csrc->mc_flags;
|
|
9254
|
-
MC_SET_OVPG(cdst, MC_OVPG(csrc));
|
|
9255
8349
|
|
|
9256
8350
|
for (i=0; i<csrc->mc_snum; i++) {
|
|
9257
8351
|
cdst->mc_pg[i] = csrc->mc_pg[i];
|
|
@@ -9280,14 +8374,14 @@ mdb_rebalance(MDB_cursor *mc)
|
|
|
9280
8374
|
minkeys = 1;
|
|
9281
8375
|
thresh = FILL_THRESHOLD;
|
|
9282
8376
|
}
|
|
9283
|
-
DPRINTF(("rebalancing %s page %"
|
|
8377
|
+
DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)",
|
|
9284
8378
|
IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
|
|
9285
8379
|
mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]),
|
|
9286
8380
|
(float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10));
|
|
9287
8381
|
|
|
9288
8382
|
if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= thresh &&
|
|
9289
8383
|
NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
|
|
9290
|
-
DPRINTF(("no need to rebalance page %"
|
|
8384
|
+
DPRINTF(("no need to rebalance page %"Z"u, above fill threshold",
|
|
9291
8385
|
mdb_dbg_pgno(mc->mc_pg[mc->mc_top])));
|
|
9292
8386
|
return MDB_SUCCESS;
|
|
9293
8387
|
}
|
|
@@ -9416,7 +8510,7 @@ mdb_rebalance(MDB_cursor *mc)
|
|
|
9416
8510
|
fromleft = 1;
|
|
9417
8511
|
}
|
|
9418
8512
|
|
|
9419
|
-
DPRINTF(("found neighbor page %"
|
|
8513
|
+
DPRINTF(("found neighbor page %"Z"u (%u keys, %.1f%% full)",
|
|
9420
8514
|
mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]),
|
|
9421
8515
|
(float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10));
|
|
9422
8516
|
|
|
@@ -9522,7 +8616,7 @@ mdb_cursor_del0(MDB_cursor *mc)
|
|
|
9522
8616
|
goto fail;
|
|
9523
8617
|
}
|
|
9524
8618
|
if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) {
|
|
9525
|
-
MDB_node *node = NODEPTR(m3->mc_pg[
|
|
8619
|
+
MDB_node *node = NODEPTR(m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]);
|
|
9526
8620
|
/* If this node has dupdata, it may need to be reinited
|
|
9527
8621
|
* because its data has moved.
|
|
9528
8622
|
* If the xcursor was not initd it must be reinited.
|
|
@@ -9557,6 +8651,8 @@ int
|
|
|
9557
8651
|
mdb_del(MDB_txn *txn, MDB_dbi dbi,
|
|
9558
8652
|
MDB_val *key, MDB_val *data)
|
|
9559
8653
|
{
|
|
8654
|
+
DKBUF;
|
|
8655
|
+
DDBUF;
|
|
9560
8656
|
if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID))
|
|
9561
8657
|
return EINVAL;
|
|
9562
8658
|
|
|
@@ -9568,6 +8664,9 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
|
|
|
9568
8664
|
data = NULL;
|
|
9569
8665
|
}
|
|
9570
8666
|
|
|
8667
|
+
MDB_TRACE(("%p, %u, %"Z"u[%s], %"Z"u%s",
|
|
8668
|
+
txn, dbi, key ? key->mv_size:0, DKEY(key), data ? data->mv_size:0,
|
|
8669
|
+
data ? mdb_dval(txn, dbi, data, dbuf):""));
|
|
9571
8670
|
return mdb_del0(txn, dbi, key, data, 0);
|
|
9572
8671
|
}
|
|
9573
8672
|
|
|
@@ -9605,9 +8704,10 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi,
|
|
|
9605
8704
|
* run out of space, triggering a split. We need this
|
|
9606
8705
|
* cursor to be consistent until the end of the rebalance.
|
|
9607
8706
|
*/
|
|
8707
|
+
mc.mc_flags |= C_UNTRACK;
|
|
9608
8708
|
mc.mc_next = txn->mt_cursors[dbi];
|
|
9609
8709
|
txn->mt_cursors[dbi] = &mc;
|
|
9610
|
-
rc =
|
|
8710
|
+
rc = _mdb_cursor_del(&mc, flags);
|
|
9611
8711
|
txn->mt_cursors[dbi] = mc.mc_next;
|
|
9612
8712
|
}
|
|
9613
8713
|
return rc;
|
|
@@ -9646,7 +8746,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9646
8746
|
newindx = mc->mc_ki[mc->mc_top];
|
|
9647
8747
|
nkeys = NUMKEYS(mp);
|
|
9648
8748
|
|
|
9649
|
-
DPRINTF(("-----> splitting %s page %"
|
|
8749
|
+
DPRINTF(("-----> splitting %s page %"Z"u and adding [%s] at index %i/%i",
|
|
9650
8750
|
IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno,
|
|
9651
8751
|
DKEY(newkey), mc->mc_ki[mc->mc_top], nkeys));
|
|
9652
8752
|
|
|
@@ -9654,7 +8754,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9654
8754
|
if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp)))
|
|
9655
8755
|
return rc;
|
|
9656
8756
|
rp->mp_pad = mp->mp_pad;
|
|
9657
|
-
DPRINTF(("new right sibling: page %"
|
|
8757
|
+
DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno));
|
|
9658
8758
|
|
|
9659
8759
|
/* Usually when splitting the root page, the cursor
|
|
9660
8760
|
* height is 1. But when called from mdb_update_key,
|
|
@@ -9672,7 +8772,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9672
8772
|
mc->mc_pg[0] = pp;
|
|
9673
8773
|
mc->mc_ki[0] = 0;
|
|
9674
8774
|
mc->mc_db->md_root = pp->mp_pgno;
|
|
9675
|
-
DPRINTF(("root split! new root = %"
|
|
8775
|
+
DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno));
|
|
9676
8776
|
new_root = mc->mc_db->md_depth++;
|
|
9677
8777
|
|
|
9678
8778
|
/* Add left (implicit) pointer. */
|
|
@@ -9689,7 +8789,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9689
8789
|
ptop = 0;
|
|
9690
8790
|
} else {
|
|
9691
8791
|
ptop = mc->mc_top-1;
|
|
9692
|
-
DPRINTF(("parent branch page is %"
|
|
8792
|
+
DPRINTF(("parent branch page is %"Z"u", mc->mc_pg[ptop]->mp_pgno));
|
|
9693
8793
|
}
|
|
9694
8794
|
|
|
9695
8795
|
mdb_cursor_copy(mc, &mn);
|
|
@@ -9745,9 +8845,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9745
8845
|
mc->mc_ki[mc->mc_top] = x;
|
|
9746
8846
|
}
|
|
9747
8847
|
} else {
|
|
9748
|
-
int psize, nsize, k;
|
|
8848
|
+
int psize, nsize, k, keythresh;
|
|
8849
|
+
|
|
9749
8850
|
/* Maximum free space in an empty page */
|
|
9750
8851
|
pmax = env->me_psize - PAGEHDRSZ;
|
|
8852
|
+
/* Threshold number of keys considered "small" */
|
|
8853
|
+
keythresh = env->me_psize >> 7;
|
|
8854
|
+
|
|
9751
8855
|
if (IS_LEAF(mp))
|
|
9752
8856
|
nsize = mdb_leaf_size(env, newkey, newdata);
|
|
9753
8857
|
else
|
|
@@ -9788,7 +8892,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9788
8892
|
* the split so the new page is emptier than the old page.
|
|
9789
8893
|
* This yields better packing during sequential inserts.
|
|
9790
8894
|
*/
|
|
9791
|
-
if (nkeys <
|
|
8895
|
+
if (nkeys < keythresh || nsize > pmax/16 || newindx >= nkeys) {
|
|
9792
8896
|
/* Find split point */
|
|
9793
8897
|
psize = 0;
|
|
9794
8898
|
if (newindx <= split_indx || newindx >= nkeys) {
|
|
@@ -9864,7 +8968,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9864
8968
|
} else {
|
|
9865
8969
|
/* find right page's left sibling */
|
|
9866
8970
|
mc->mc_ki[ptop] = mn.mc_ki[ptop];
|
|
9867
|
-
|
|
8971
|
+
mdb_cursor_sibling(mc, 0);
|
|
9868
8972
|
}
|
|
9869
8973
|
}
|
|
9870
8974
|
} else {
|
|
@@ -9873,8 +8977,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
9873
8977
|
mn.mc_top++;
|
|
9874
8978
|
}
|
|
9875
8979
|
if (rc != MDB_SUCCESS) {
|
|
9876
|
-
if (rc == MDB_NOTFOUND) /* improper mdb_cursor_sibling() result */
|
|
9877
|
-
rc = MDB_PROBLEM;
|
|
9878
8980
|
goto done;
|
|
9879
8981
|
}
|
|
9880
8982
|
if (nflags & MDB_APPEND) {
|
|
@@ -10047,6 +9149,8 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
|
|
|
10047
9149
|
MDB_cursor mc;
|
|
10048
9150
|
MDB_xcursor mx;
|
|
10049
9151
|
int rc;
|
|
9152
|
+
DKBUF;
|
|
9153
|
+
DDBUF;
|
|
10050
9154
|
|
|
10051
9155
|
if (!key || !data || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID))
|
|
10052
9156
|
return EINVAL;
|
|
@@ -10057,10 +9161,12 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
|
|
|
10057
9161
|
if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))
|
|
10058
9162
|
return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
|
|
10059
9163
|
|
|
9164
|
+
MDB_TRACE(("%p, %u, %"Z"u[%s], %"Z"u%s, %u",
|
|
9165
|
+
txn, dbi, key ? key->mv_size:0, DKEY(key), data->mv_size, mdb_dval(txn, dbi, data, dbuf), flags));
|
|
10060
9166
|
mdb_cursor_init(&mc, txn, dbi, &mx);
|
|
10061
9167
|
mc.mc_next = txn->mt_cursors[dbi];
|
|
10062
9168
|
txn->mt_cursors[dbi] = &mc;
|
|
10063
|
-
rc =
|
|
9169
|
+
rc = _mdb_cursor_put(&mc, key, data, flags);
|
|
10064
9170
|
txn->mt_cursors[dbi] = mc.mc_next;
|
|
10065
9171
|
return rc;
|
|
10066
9172
|
}
|
|
@@ -10078,8 +9184,8 @@ typedef struct mdb_copy {
|
|
|
10078
9184
|
pthread_cond_t mc_cond; /**< Condition variable for #mc_new */
|
|
10079
9185
|
char *mc_wbuf[2];
|
|
10080
9186
|
char *mc_over[2];
|
|
10081
|
-
|
|
10082
|
-
|
|
9187
|
+
size_t mc_wlen[2];
|
|
9188
|
+
size_t mc_olen[2];
|
|
10083
9189
|
pgno_t mc_next_pgno;
|
|
10084
9190
|
HANDLE mc_fd;
|
|
10085
9191
|
int mc_toggle; /**< Buffer number in provider */
|
|
@@ -10096,7 +9202,8 @@ mdb_env_copythr(void *arg)
|
|
|
10096
9202
|
{
|
|
10097
9203
|
mdb_copy *my = arg;
|
|
10098
9204
|
char *ptr;
|
|
10099
|
-
int toggle = 0,
|
|
9205
|
+
int toggle = 0, rc;
|
|
9206
|
+
size_t wsize;
|
|
10100
9207
|
#ifdef _WIN32
|
|
10101
9208
|
DWORD len;
|
|
10102
9209
|
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
|
@@ -10209,7 +9316,6 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags)
|
|
|
10209
9316
|
|
|
10210
9317
|
mc.mc_snum = 1;
|
|
10211
9318
|
mc.mc_txn = my->mc_txn;
|
|
10212
|
-
mc.mc_flags = my->mc_txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP);
|
|
10213
9319
|
|
|
10214
9320
|
rc = mdb_page_get(&mc, *pg, &mc.mc_pg[0], NULL);
|
|
10215
9321
|
if (rc)
|
|
@@ -10464,7 +9570,7 @@ finish:
|
|
|
10464
9570
|
my.mc_error = rc;
|
|
10465
9571
|
mdb_env_cthr_toggle(&my, 1 | MDB_EOF);
|
|
10466
9572
|
rc = THREAD_FINISH(thr);
|
|
10467
|
-
|
|
9573
|
+
_mdb_txn_abort(txn);
|
|
10468
9574
|
|
|
10469
9575
|
done:
|
|
10470
9576
|
#ifdef _WIN32
|
|
@@ -10487,7 +9593,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
|
|
|
10487
9593
|
MDB_txn *txn = NULL;
|
|
10488
9594
|
mdb_mutexref_t wmutex = NULL;
|
|
10489
9595
|
int rc;
|
|
10490
|
-
|
|
9596
|
+
size_t wsize, w3;
|
|
10491
9597
|
char *ptr;
|
|
10492
9598
|
#ifdef _WIN32
|
|
10493
9599
|
DWORD len, w2;
|
|
@@ -10548,7 +9654,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
|
|
|
10548
9654
|
|
|
10549
9655
|
w3 = txn->mt_next_pgno * env->me_psize;
|
|
10550
9656
|
{
|
|
10551
|
-
|
|
9657
|
+
size_t fsize = 0;
|
|
10552
9658
|
if ((rc = mdb_fsize(env->me_fd, &fsize)))
|
|
10553
9659
|
goto leave;
|
|
10554
9660
|
if (w3 > fsize)
|
|
@@ -10576,7 +9682,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
|
|
|
10576
9682
|
}
|
|
10577
9683
|
|
|
10578
9684
|
leave:
|
|
10579
|
-
|
|
9685
|
+
_mdb_txn_abort(txn);
|
|
10580
9686
|
return rc;
|
|
10581
9687
|
}
|
|
10582
9688
|
|
|
@@ -10791,6 +9897,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
|
10791
9897
|
}
|
|
10792
9898
|
}
|
|
10793
9899
|
mdb_default_cmp(txn, MAIN_DBI);
|
|
9900
|
+
MDB_TRACE(("%p, (null), %u = %u", txn, flags, MAIN_DBI));
|
|
10794
9901
|
return MDB_SUCCESS;
|
|
10795
9902
|
}
|
|
10796
9903
|
|
|
@@ -10852,7 +9959,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
|
10852
9959
|
dummy.md_root = P_INVALID;
|
|
10853
9960
|
dummy.md_flags = flags & PERSISTENT_FLAGS;
|
|
10854
9961
|
WITH_CURSOR_TRACKING(mc,
|
|
10855
|
-
rc =
|
|
9962
|
+
rc = _mdb_cursor_put(&mc, &key, &data, F_SUBDATA));
|
|
10856
9963
|
dbflag |= DB_DIRTY;
|
|
10857
9964
|
}
|
|
10858
9965
|
|
|
@@ -10877,6 +9984,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
|
10877
9984
|
if (!unused) {
|
|
10878
9985
|
txn->mt_numdbs++;
|
|
10879
9986
|
}
|
|
9987
|
+
MDB_TRACE(("%p, %s, %u = %u", txn, name, flags, slot));
|
|
10880
9988
|
}
|
|
10881
9989
|
|
|
10882
9990
|
return rc;
|
|
@@ -10908,6 +10016,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
|
|
|
10908
10016
|
ptr = env->me_dbxs[dbi].md_name.mv_data;
|
|
10909
10017
|
/* If there was no name, this was already closed */
|
|
10910
10018
|
if (ptr) {
|
|
10019
|
+
MDB_TRACE(("%p, %u", env, dbi));
|
|
10911
10020
|
env->me_dbxs[dbi].md_name.mv_data = NULL;
|
|
10912
10021
|
env->me_dbxs[dbi].md_name.mv_size = 0;
|
|
10913
10022
|
env->me_dbflags[dbi] = 0;
|
|
@@ -10952,11 +10061,6 @@ mdb_drop0(MDB_cursor *mc, int subs)
|
|
|
10952
10061
|
mdb_cursor_pop(mc);
|
|
10953
10062
|
|
|
10954
10063
|
mdb_cursor_copy(mc, &mx);
|
|
10955
|
-
#ifdef MDB_VL32
|
|
10956
|
-
/* bump refcount for mx's pages */
|
|
10957
|
-
for (i=0; i<mc->mc_snum; i++)
|
|
10958
|
-
mdb_page_get(&mx, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL);
|
|
10959
|
-
#endif
|
|
10960
10064
|
while (mc->mc_snum > 0) {
|
|
10961
10065
|
MDB_page *mp = mc->mc_pg[mc->mc_top];
|
|
10962
10066
|
unsigned n = NUMKEYS(mp);
|
|
@@ -11022,8 +10126,6 @@ pop:
|
|
|
11022
10126
|
done:
|
|
11023
10127
|
if (rc)
|
|
11024
10128
|
txn->mt_flags |= MDB_TXN_ERROR;
|
|
11025
|
-
/* drop refcount for mx's pages */
|
|
11026
|
-
MDB_CURSOR_UNREF(&mx, 0);
|
|
11027
10129
|
} else if (rc == MDB_NOTFOUND) {
|
|
11028
10130
|
rc = MDB_SUCCESS;
|
|
11029
10131
|
}
|
|
@@ -11049,6 +10151,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
|
|
|
11049
10151
|
if (rc)
|
|
11050
10152
|
return rc;
|
|
11051
10153
|
|
|
10154
|
+
MDB_TRACE(("%u, %d", dbi, del));
|
|
11052
10155
|
rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
|
|
11053
10156
|
/* Invalidate the dropped DB's cursors */
|
|
11054
10157
|
for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
|
|
@@ -11143,7 +10246,7 @@ mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
|
|
11143
10246
|
if (mr[i].mr_pid) {
|
|
11144
10247
|
txnid_t txnid = mr[i].mr_txnid;
|
|
11145
10248
|
sprintf(buf, txnid == (txnid_t)-1 ?
|
|
11146
|
-
"%10d %"Z"x -\n" : "%10d %"Z"x %"
|
|
10249
|
+
"%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n",
|
|
11147
10250
|
(int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid);
|
|
11148
10251
|
if (first) {
|
|
11149
10252
|
first = 0;
|
|
@@ -11248,7 +10351,7 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
|
|
|
11248
10351
|
}
|
|
11249
10352
|
for (; j<rdrs; j++)
|
|
11250
10353
|
if (mr[j].mr_pid == pid) {
|
|
11251
|
-
DPRINTF(("clear stale reader pid %u txn %"
|
|
10354
|
+
DPRINTF(("clear stale reader pid %u txn %"Z"d",
|
|
11252
10355
|
(unsigned) pid, mr[j].mr_txnid));
|
|
11253
10356
|
mr[j].mr_pid = 0;
|
|
11254
10357
|
count++;
|