lmdb 0.7.3 → 0.7.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -4
  3. data/Rakefile +48 -0
  4. data/ext/lmdb_ext/extconf.rb +27 -10
  5. data/ext/lmdb_ext/lmdb_ext.c +22 -8
  6. data/lib/lmdb/database.rb +2 -1
  7. data/lib/lmdb/version.rb +1 -1
  8. data/lmdb.gemspec +4 -1
  9. data/spec/lmdb_spec.rb +9 -0
  10. data/vendor/liblmdb/VERSION +1 -0
  11. data/vendor/{libraries/liblmdb → liblmdb}/lmdb.h +18 -63
  12. data/vendor/{libraries/liblmdb → liblmdb}/mdb.c +583 -1480
  13. data/vendor/{libraries/liblmdb → liblmdb}/midl.c +0 -62
  14. data/vendor/{libraries/liblmdb → liblmdb}/midl.h +4 -16
  15. metadata +7 -34
  16. data/CONTRIBUTORS +0 -8
  17. data/behaviour.org +0 -35
  18. data/ext/lmdb_ext/prototypes.sh +0 -4
  19. data/vendor/libraries/liblmdb/.gitignore +0 -24
  20. data/vendor/libraries/liblmdb/COPYRIGHT +0 -20
  21. data/vendor/libraries/liblmdb/Doxyfile +0 -1631
  22. data/vendor/libraries/liblmdb/LICENSE +0 -47
  23. data/vendor/libraries/liblmdb/Makefile +0 -118
  24. data/vendor/libraries/liblmdb/intro.doc +0 -192
  25. data/vendor/libraries/liblmdb/mdb_copy.1 +0 -61
  26. data/vendor/libraries/liblmdb/mdb_copy.c +0 -84
  27. data/vendor/libraries/liblmdb/mdb_drop.1 +0 -40
  28. data/vendor/libraries/liblmdb/mdb_drop.c +0 -135
  29. data/vendor/libraries/liblmdb/mdb_dump.1 +0 -81
  30. data/vendor/libraries/liblmdb/mdb_dump.c +0 -319
  31. data/vendor/libraries/liblmdb/mdb_load.1 +0 -84
  32. data/vendor/libraries/liblmdb/mdb_load.c +0 -492
  33. data/vendor/libraries/liblmdb/mdb_stat.1 +0 -70
  34. data/vendor/libraries/liblmdb/mdb_stat.c +0 -264
  35. data/vendor/libraries/liblmdb/mtest.c +0 -177
  36. data/vendor/libraries/liblmdb/mtest2.c +0 -124
  37. data/vendor/libraries/liblmdb/mtest3.c +0 -133
  38. data/vendor/libraries/liblmdb/mtest4.c +0 -168
  39. data/vendor/libraries/liblmdb/mtest5.c +0 -135
  40. data/vendor/libraries/liblmdb/mtest6.c +0 -141
  41. data/vendor/libraries/liblmdb/sample-bdb.txt +0 -73
  42. data/vendor/libraries/liblmdb/sample-mdb.txt +0 -62
  43. data/vendor/libraries/liblmdb/tooltag +0 -27
@@ -35,7 +35,7 @@
35
35
  #ifndef _GNU_SOURCE
36
36
  #define _GNU_SOURCE 1
37
37
  #endif
38
- #if defined(MDB_VL32) || defined(__WIN64__)
38
+ #if defined(__WIN64__)
39
39
  #define _FILE_OFFSET_BITS 64
40
40
  #endif
41
41
  #ifdef _WIN32
@@ -43,41 +43,6 @@
43
43
  #include <windows.h>
44
44
  #include <wchar.h> /* get wcscpy() */
45
45
 
46
- /* We use native NT APIs to setup the memory map, so that we can
47
- * let the DB file grow incrementally instead of always preallocating
48
- * the full size. These APIs are defined in <wdm.h> and <ntifs.h>
49
- * but those headers are meant for driver-level development and
50
- * conflict with the regular user-level headers, so we explicitly
51
- * declare them here. We get pointers to these functions from
52
- * NTDLL.DLL at runtime, to avoid buildtime dependencies on any
53
- * NTDLL import libraries.
54
- */
55
- typedef NTSTATUS (WINAPI NtCreateSectionFunc)
56
- (OUT PHANDLE sh, IN ACCESS_MASK acc,
57
- IN void * oa OPTIONAL,
58
- IN PLARGE_INTEGER ms OPTIONAL,
59
- IN ULONG pp, IN ULONG aa, IN HANDLE fh OPTIONAL);
60
-
61
- static NtCreateSectionFunc *NtCreateSection;
62
-
63
- typedef enum _SECTION_INHERIT {
64
- ViewShare = 1,
65
- ViewUnmap = 2
66
- } SECTION_INHERIT;
67
-
68
- typedef NTSTATUS (WINAPI NtMapViewOfSectionFunc)
69
- (IN PHANDLE sh, IN HANDLE ph,
70
- IN OUT PVOID *addr, IN ULONG_PTR zbits,
71
- IN SIZE_T cs, IN OUT PLARGE_INTEGER off OPTIONAL,
72
- IN OUT PSIZE_T vs, IN SECTION_INHERIT ih,
73
- IN ULONG at, IN ULONG pp);
74
-
75
- static NtMapViewOfSectionFunc *NtMapViewOfSection;
76
-
77
- typedef NTSTATUS (WINAPI NtCloseFunc)(HANDLE h);
78
-
79
- static NtCloseFunc *NtClose;
80
-
81
46
  /** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
82
47
  * as int64 which is wrong. MSVC doesn't define it at all, so just
83
48
  * don't use it.
@@ -96,7 +61,6 @@ static NtCloseFunc *NtClose;
96
61
  # define SSIZE_MAX INT_MAX
97
62
  # endif
98
63
  #endif
99
- #define MDB_OFF_T int64_t
100
64
  #else
101
65
  #include <sys/types.h>
102
66
  #include <sys/stat.h>
@@ -109,13 +73,11 @@ static NtCloseFunc *NtClose;
109
73
  #include <sys/file.h>
110
74
  #endif
111
75
  #include <fcntl.h>
112
- #define MDB_OFF_T off_t
113
76
  #endif
114
77
 
115
78
  #if defined(__mips) && defined(__linux)
116
79
  /* MIPS has cache coherency issues, requires explicit cache control */
117
- #include <asm/cachectl.h>
118
- extern int cacheflush(char *addr, int nbytes, int cache);
80
+ #include <sys/cachectl.h>
119
81
  #define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache)
120
82
  #else
121
83
  #define CACHEFLUSH(addr, bytes, cache)
@@ -146,7 +108,7 @@ typedef SSIZE_T ssize_t;
146
108
  #include <unistd.h>
147
109
  #endif
148
110
 
149
- #if defined(__sun) || defined(__ANDROID__)
111
+ #if defined(__sun) || defined(ANDROID)
150
112
  /* Most platforms have posix_memalign, older may only have memalign */
151
113
  #define HAVE_MEMALIGN 1
152
114
  #include <malloc.h>
@@ -165,36 +127,36 @@ typedef SSIZE_T ssize_t;
165
127
  # define MDB_USE_POSIX_MUTEX 1
166
128
  # define MDB_USE_ROBUST 1
167
129
  #elif defined(__APPLE__) || defined (BSD) || defined(__FreeBSD_kernel__)
168
- # if !(defined(MDB_USE_POSIX_MUTEX) || defined(MDB_USE_POSIX_SEM))
169
- # define MDB_USE_SYSV_SEM 1
130
+ # define MDB_USE_POSIX_SEM 1
131
+ # if defined(__APPLE__)
132
+ # define MDB_FDATASYNC(fd) fcntl(fd, F_FULLFSYNC)
133
+ # else
134
+ # define MDB_FDATASYNC fsync
170
135
  # endif
136
+ #elif defined(ANDROID)
171
137
  # define MDB_FDATASYNC fsync
172
- #elif defined(__ANDROID__)
138
+ #elif defined(__HAIKU__)
139
+ # define MDB_USE_POSIX_SEM 1
173
140
  # define MDB_FDATASYNC fsync
174
141
  #endif
175
142
 
143
+ /* NetBSD does not define union semun in sys/sem.h */
144
+ #if defined(__NetBSD__) && !defined(_SEM_SEMUN_UNDEFINED)
145
+ # define _SEM_SEMUN_UNDEFINED 1
146
+ #endif
147
+
176
148
  #ifndef _WIN32
177
149
  #include <pthread.h>
178
150
  #include <signal.h>
179
151
  #ifdef MDB_USE_POSIX_SEM
180
152
  # define MDB_USE_HASH 1
181
153
  #include <semaphore.h>
182
- #elif defined(MDB_USE_SYSV_SEM)
183
- #include <sys/ipc.h>
184
- #include <sys/sem.h>
185
- #ifdef _SEM_SEMUN_UNDEFINED
186
- union semun {
187
- int val;
188
- struct semid_ds *buf;
189
- unsigned short *array;
190
- };
191
- #endif /* _SEM_SEMUN_UNDEFINED */
192
154
  #else
193
155
  #define MDB_USE_POSIX_MUTEX 1
194
- #endif /* MDB_USE_POSIX_SEM */
195
- #endif /* !_WIN32 */
156
+ #endif
157
+ #endif
196
158
 
197
- #if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) + defined(MDB_USE_SYSV_SEM) \
159
+ #if defined(_WIN32) + defined(MDB_USE_POSIX_SEM) \
198
160
  + defined(MDB_USE_POSIX_MUTEX) != 1
199
161
  # error "Ambiguous shared-lock implementation"
200
162
  #endif
@@ -245,19 +207,25 @@ union semun {
245
207
 
246
208
  #if (BYTE_ORDER == LITTLE_ENDIAN) == (BYTE_ORDER == BIG_ENDIAN)
247
209
  # error "Unknown or unsupported endianness (BYTE_ORDER)"
248
- #elif (-6 & 5) || CHAR_BIT!=8 || UINT_MAX!=0xffffffff || MDB_SIZE_MAX%UINT_MAX
210
+ #elif (-6 & 5) || CHAR_BIT != 8 || UINT_MAX < 0xffffffff || ULONG_MAX % 0xFFFF
249
211
  # error "Two's complement, reasonably sized integer types, please"
250
212
  #endif
251
213
 
252
- #ifdef __GNUC__
253
- /** Put infrequently used env functions in separate section */
254
- # ifdef __APPLE__
255
- # define ESECT __attribute__ ((section("__TEXT,text_env")))
214
+ #if (((__clang_major__ << 8) | __clang_minor__) >= 0x0302) || (((__GNUC__ << 8) | __GNUC_MINOR__) >= 0x0403)
215
+ /** Mark infrequently used env functions as cold. This puts them in a separate
216
+ * section, and optimizes them for size */
217
+ #define ESECT __attribute__ ((cold))
218
+ #else
219
+ /* On older compilers, use a separate section */
220
+ # ifdef __GNUC__
221
+ # ifdef __APPLE__
222
+ # define ESECT __attribute__ ((section("__TEXT,text_env")))
223
+ # else
224
+ # define ESECT __attribute__ ((section("text_env")))
225
+ # endif
256
226
  # else
257
- # define ESECT __attribute__ ((section("text_env")))
227
+ # define ESECT
258
228
  # endif
259
- #else
260
- #define ESECT
261
229
  #endif
262
230
 
263
231
  #ifdef _WIN32
@@ -296,8 +264,6 @@ union semun {
296
264
  #define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10)
297
265
  #ifdef _WIN32
298
266
  #define MDB_OWNERDEAD ((int) WAIT_ABANDONED)
299
- #elif defined MDB_USE_SYSV_SEM
300
- #define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11)
301
267
  #elif defined(MDB_USE_POSIX_MUTEX) && defined(EOWNERDEAD)
302
268
  #define MDB_OWNERDEAD EOWNERDEAD /**< #LOCK_MUTEX0() result if dead owner */
303
269
  #endif
@@ -308,14 +274,13 @@ union semun {
308
274
  /** Some platforms define the EOWNERDEAD error code
309
275
  * even though they don't support Robust Mutexes.
310
276
  * Compile with -DMDB_USE_ROBUST=0, or use some other
311
- * mechanism like -DMDB_USE_SYSV_SEM instead of
312
- * -DMDB_USE_POSIX_MUTEX. (SysV semaphores are
313
- * also Robust, but some systems don't support them
314
- * either.)
277
+ * mechanism like -DMDB_USE_POSIX_SEM instead of
278
+ * -DMDB_USE_POSIX_MUTEX.
279
+ * (Posix semaphores are not robust.)
315
280
  */
316
281
  #ifndef MDB_USE_ROBUST
317
282
  /* Android currently lacks Robust Mutex support. So does glibc < 2.4. */
318
- # if defined(MDB_USE_POSIX_MUTEX) && (defined(__ANDROID__) || \
283
+ # if defined(MDB_USE_POSIX_MUTEX) && (defined(ANDROID) || \
319
284
  (defined(__GLIBC__) && GLIBC_VER < 0x020004))
320
285
  # define MDB_USE_ROBUST 0
321
286
  # else
@@ -375,10 +340,12 @@ typedef HANDLE mdb_mutex_t, mdb_mutexref_t;
375
340
  #else
376
341
  #define MDB_PROCESS_QUERY_LIMITED_INFORMATION 0x1000
377
342
  #endif
343
+ #define Z "I"
378
344
  #else
379
345
  #define THREAD_RET void *
380
346
  #define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg)
381
347
  #define THREAD_FINISH(thr) pthread_join(thr,NULL)
348
+ #define Z "z" /**< printf format modifier for size_t */
382
349
 
383
350
  /** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */
384
351
  #define MDB_PIDLOCK 1
@@ -397,40 +364,6 @@ mdb_sem_wait(sem_t *sem)
397
364
  return rc;
398
365
  }
399
366
 
400
- #elif defined MDB_USE_SYSV_SEM
401
-
402
- typedef struct mdb_mutex {
403
- int semid;
404
- int semnum;
405
- int *locked;
406
- } mdb_mutex_t[1], *mdb_mutexref_t;
407
-
408
- #define LOCK_MUTEX0(mutex) mdb_sem_wait(mutex)
409
- #define UNLOCK_MUTEX(mutex) do { \
410
- struct sembuf sb = { 0, 1, SEM_UNDO }; \
411
- sb.sem_num = (mutex)->semnum; \
412
- *(mutex)->locked = 0; \
413
- semop((mutex)->semid, &sb, 1); \
414
- } while(0)
415
-
416
- static int
417
- mdb_sem_wait(mdb_mutexref_t sem)
418
- {
419
- int rc, *locked = sem->locked;
420
- struct sembuf sb = { 0, -1, SEM_UNDO };
421
- sb.sem_num = sem->semnum;
422
- do {
423
- if (!semop(sem->semid, &sb, 1)) {
424
- rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS;
425
- *locked = 1;
426
- break;
427
- }
428
- } while ((rc = errno) == EINTR);
429
- return rc;
430
- }
431
-
432
- #define mdb_mutex_consistent(mutex) 0
433
-
434
367
  #else /* MDB_USE_POSIX_MUTEX: */
435
368
  /** Shared mutex/semaphore as the original is stored.
436
369
  *
@@ -451,7 +384,7 @@ typedef pthread_mutex_t *mdb_mutexref_t;
451
384
  /** Mark mutex-protected data as repaired, after death of previous owner.
452
385
  */
453
386
  #define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex)
454
- #endif /* MDB_USE_POSIX_SEM || MDB_USE_SYSV_SEM */
387
+ #endif /* MDB_USE_POSIX_SEM */
455
388
 
456
389
  /** Get the error code for the last failed system function.
457
390
  */
@@ -476,25 +409,12 @@ typedef pthread_mutex_t *mdb_mutexref_t;
476
409
  #define GET_PAGESIZE(x) ((x) = sysconf(_SC_PAGE_SIZE))
477
410
  #endif
478
411
 
479
- #define Z MDB_FMT_Z /**< printf/scanf format modifier for size_t */
480
- #define Yu MDB_PRIy(u) /**< printf format for #mdb_size_t */
481
- #define Yd MDB_PRIy(d) /**< printf format for 'signed #mdb_size_t' */
482
-
483
- #ifdef MDB_USE_SYSV_SEM
484
- #define MNAME_LEN (sizeof(int))
412
+ #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
413
+ #define MNAME_LEN 32
485
414
  #else
486
415
  #define MNAME_LEN (sizeof(pthread_mutex_t))
487
416
  #endif
488
417
 
489
- /** Initial part of #MDB_env.me_mutexname[].
490
- * Changes to this code must be reflected in #MDB_LOCK_FORMAT.
491
- */
492
- #ifdef _WIN32
493
- #define MUTEXNAME_PREFIX "Global\\MDB"
494
- #elif defined MDB_USE_POSIX_SEM
495
- #define MUTEXNAME_PREFIX "/MDB"
496
- #endif
497
-
498
418
  /** @} */
499
419
 
500
420
  #ifdef MDB_ROBUST_SUPPORTED
@@ -573,18 +493,26 @@ typedef MDB_ID txnid_t;
573
493
  #define MDB_DEBUG 0
574
494
  #endif
575
495
 
496
+ #define MDB_DBG_INFO 1
497
+ #define MDB_DBG_TRACE 2
498
+
576
499
  #if MDB_DEBUG
577
- static int mdb_debug;
500
+ static int mdb_debug = MDB_DBG_TRACE;
578
501
  static txnid_t mdb_debug_start;
579
502
 
580
503
  /** Print a debug message with printf formatting.
581
504
  * Requires double parenthesis around 2 or more args.
582
505
  */
583
- # define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args))
506
+ # define DPRINTF(args) ((void) ((mdb_debug & MDB_DBG_INFO) && DPRINTF0 args))
584
507
  # define DPRINTF0(fmt, ...) \
585
508
  fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__)
509
+ /** Trace info for replaying */
510
+ # define MDB_TRACE(args) ((void) ((mdb_debug & MDB_DBG_TRACE) && DPRINTF1 args))
511
+ # define DPRINTF1(fmt, ...) \
512
+ fprintf(stderr, ">%d:%s: " fmt "\n", getpid(), mdb_func_, __VA_ARGS__)
586
513
  #else
587
514
  # define DPRINTF(args) ((void) 0)
515
+ # define MDB_TRACE(args) ((void) 0)
588
516
  #endif
589
517
  /** Print a debug string.
590
518
  * The string is printed literally, with no format processing.
@@ -636,11 +564,7 @@ static txnid_t mdb_debug_start;
636
564
  /** The version number for a database's datafile format. */
637
565
  #define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
638
566
  /** The version number for a database's lockfile format. */
639
- #define MDB_LOCK_VERSION ((MDB_DEVEL) ? 999 : 2)
640
- /** Number of bits representing #MDB_LOCK_VERSION in #MDB_LOCK_FORMAT.
641
- * The remaining bits must leave room for #MDB_lock_desc.
642
- */
643
- #define MDB_LOCK_VERSION_BITS 12
567
+ #define MDB_LOCK_VERSION 1
644
568
 
645
569
  /** @brief The max size of a key we can write, or 0 for computed max.
646
570
  *
@@ -685,6 +609,11 @@ static txnid_t mdb_debug_start;
685
609
  * This is used for printing a hex dump of a key's contents.
686
610
  */
687
611
  #define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1]
612
+ /** A data value buffer.
613
+ * @ingroup debug
614
+ * This is used for printing a hex dump of a #MDB_DUPSORT value's contents.
615
+ */
616
+ #define DDBUF char dbuf[DKBUF_MAXKEYSIZE*2+1+2]
688
617
  /** Display a key in hex.
689
618
  * @ingroup debug
690
619
  * Invoke a function to display a key in hex.
@@ -692,6 +621,7 @@ static txnid_t mdb_debug_start;
692
621
  #define DKEY(x) mdb_dkey(x, kbuf)
693
622
  #else
694
623
  #define DKBUF
624
+ #define DDBUF
695
625
  #define DKEY(x) 0
696
626
  #endif
697
627
 
@@ -706,27 +636,12 @@ static txnid_t mdb_debug_start;
706
636
  /** Round \b n up to an even number. */
707
637
  #define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
708
638
 
709
- /** Least significant 1-bit of \b n. n must be of an unsigned type. */
710
- #define LOW_BIT(n) ((n) & (-(n)))
711
-
712
- /** (log2(\b p2) % \b n), for p2 = power of 2 and 0 < n < 8. */
713
- #define LOG2_MOD(p2, n) (7 - 86 / ((p2) % ((1U<<(n))-1) + 11))
714
- /* Explanation: Let p2 = 2**(n*y + x), x<n and M = (1U<<n)-1. Now p2 =
715
- * (M+1)**y * 2**x = 2**x (mod M). Finally "/" "happens" to return 7-x.
716
- */
717
-
718
- /** Should be alignment of \b type. Ensure it is a power of 2. */
719
- #define ALIGNOF2(type) \
720
- LOW_BIT(offsetof(struct { char ch_; type align_; }, align_))
721
-
722
639
  /** Used for offsets within a single page.
723
640
  * Since memory pages are typically 4 or 8KB in size, 12-13 bits,
724
641
  * this is plenty.
725
642
  */
726
643
  typedef uint16_t indx_t;
727
644
 
728
- typedef unsigned long long mdb_hash_t;
729
-
730
645
  /** Default size of memory map.
731
646
  * This is certainly too small for any actual applications. Apps should always set
732
647
  * the size explicitly using #mdb_env_set_mapsize().
@@ -844,6 +759,14 @@ typedef struct MDB_txbody {
844
759
  uint32_t mtb_magic;
845
760
  /** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */
846
761
  uint32_t mtb_format;
762
+ #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
763
+ char mtb_rmname[MNAME_LEN];
764
+ #else
765
+ /** Mutex protecting access to this table.
766
+ * This is the reader table lock used with LOCK_MUTEX().
767
+ */
768
+ mdb_mutex_t mtb_rmutex;
769
+ #endif
847
770
  /** The ID of the last transaction committed to the database.
848
771
  * This is recorded here only for convenience; the value can always
849
772
  * be determined by reading the main database meta pages.
@@ -854,18 +777,6 @@ typedef struct MDB_txbody {
854
777
  * when readers release their slots.
855
778
  */
856
779
  volatile unsigned mtb_numreaders;
857
- #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
858
- /** Binary form of names of the reader/writer locks */
859
- mdb_hash_t mtb_mutexid;
860
- #elif defined(MDB_USE_SYSV_SEM)
861
- int mtb_semid;
862
- int mtb_rlocked;
863
- #else
864
- /** Mutex protecting access to this table.
865
- * This is the reader table lock used with LOCK_MUTEX().
866
- */
867
- mdb_mutex_t mtb_rmutex;
868
- #endif
869
780
  } MDB_txbody;
870
781
 
871
782
  /** The actual reader table definition. */
@@ -875,80 +786,30 @@ typedef struct MDB_txninfo {
875
786
  #define mti_magic mt1.mtb.mtb_magic
876
787
  #define mti_format mt1.mtb.mtb_format
877
788
  #define mti_rmutex mt1.mtb.mtb_rmutex
789
+ #define mti_rmname mt1.mtb.mtb_rmname
878
790
  #define mti_txnid mt1.mtb.mtb_txnid
879
791
  #define mti_numreaders mt1.mtb.mtb_numreaders
880
- #define mti_mutexid mt1.mtb.mtb_mutexid
881
- #ifdef MDB_USE_SYSV_SEM
882
- #define mti_semid mt1.mtb.mtb_semid
883
- #define mti_rlocked mt1.mtb.mtb_rlocked
884
- #endif
885
792
  char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
886
793
  } mt1;
887
- #if !(defined(_WIN32) || defined(MDB_USE_POSIX_SEM))
888
794
  union {
889
- #ifdef MDB_USE_SYSV_SEM
890
- int mt2_wlocked;
891
- #define mti_wlocked mt2.mt2_wlocked
795
+ #if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
796
+ char mt2_wmname[MNAME_LEN];
797
+ #define mti_wmname mt2.mt2_wmname
892
798
  #else
893
799
  mdb_mutex_t mt2_wmutex;
894
800
  #define mti_wmutex mt2.mt2_wmutex
895
801
  #endif
896
802
  char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)];
897
803
  } mt2;
898
- #endif
899
804
  MDB_reader mti_readers[1];
900
805
  } MDB_txninfo;
901
806
 
902
807
  /** Lockfile format signature: version, features and field layout */
903
808
  #define MDB_LOCK_FORMAT \
904
- ((uint32_t) \
905
- (((MDB_LOCK_VERSION) % (1U << MDB_LOCK_VERSION_BITS)) \
906
- + MDB_lock_desc * (1U << MDB_LOCK_VERSION_BITS)))
907
-
908
- /** Lock type and layout. Values 0-119. _WIN32 implies #MDB_PIDLOCK.
909
- * Some low values are reserved for future tweaks.
910
- */
911
- #ifdef _WIN32
912
- # define MDB_LOCK_TYPE (0 + ALIGNOF2(mdb_hash_t)/8 % 2)
913
- #elif defined MDB_USE_POSIX_SEM
914
- # define MDB_LOCK_TYPE (4 + ALIGNOF2(mdb_hash_t)/8 % 2)
915
- #elif defined MDB_USE_SYSV_SEM
916
- # define MDB_LOCK_TYPE (8)
917
- #elif defined MDB_USE_POSIX_MUTEX
918
- /* We do not know the inside of a POSIX mutex and how to check if mutexes
919
- * used by two executables are compatible. Just check alignment and size.
920
- */
921
- # define MDB_LOCK_TYPE (10 + \
922
- LOG2_MOD(ALIGNOF2(pthread_mutex_t), 5) + \
923
- sizeof(pthread_mutex_t) / 4U % 22 * 5)
924
- #endif
925
-
926
- enum {
927
- /** Magic number for lockfile layout and features.
928
- *
929
- * This *attempts* to stop liblmdb variants compiled with conflicting
930
- * options from using the lockfile at the same time and thus breaking
931
- * it. It describes locking types, and sizes and sometimes alignment
932
- * of the various lockfile items.
933
- *
934
- * The detected ranges are mostly guesswork, or based simply on how
935
- * big they could be without using more bits. So we can tweak them
936
- * in good conscience when updating #MDB_LOCK_VERSION.
937
- */
938
- MDB_lock_desc =
939
- /* Default CACHELINE=64 vs. other values (have seen mention of 32-256) */
940
- (CACHELINE==64 ? 0 : 1 + LOG2_MOD(CACHELINE >> (CACHELINE>64), 5))
941
- + 6 * (sizeof(MDB_PID_T)/4 % 3) /* legacy(2) to word(4/8)? */
942
- + 18 * (sizeof(pthread_t)/4 % 5) /* can be struct{id, active data} */
943
- + 90 * (sizeof(MDB_txbody) / CACHELINE % 3)
944
- + 270 * (MDB_LOCK_TYPE % 120)
945
- /* The above is < 270*120 < 2**15 */
946
- + ((sizeof(txnid_t) == 8) << 15) /* 32bit/64bit */
947
- + ((sizeof(MDB_reader) > CACHELINE) << 16)
948
- /* Not really needed - implied by MDB_LOCK_TYPE != (_WIN32 locking) */
949
- + (((MDB_PIDLOCK) != 0) << 17)
950
- /* 18 bits total: Must be <= (32 - MDB_LOCK_VERSION_BITS). */
951
- };
809
+ ((uint32_t) \
810
+ ((MDB_LOCK_VERSION) \
811
+ /* Flags which describe functionality */ \
812
+ + (((MDB_PIDLOCK) != 0) << 16)))
952
813
  /** @} */
953
814
 
954
815
  /** Common header for all page types. The page type depends on #mp_flags.
@@ -1003,9 +864,26 @@ typedef struct MDB_page {
1003
864
  } pb;
1004
865
  uint32_t pb_pages; /**< number of overflow pages */
1005
866
  } mp_pb;
1006
- indx_t mp_ptrs[1]; /**< dynamic size */
867
+ indx_t mp_ptrs[0]; /**< dynamic size */
1007
868
  } MDB_page;
1008
869
 
870
+ /** Alternate page header, for 2-byte aligned access */
871
+ typedef struct MDB_page2 {
872
+ uint16_t mp2_p[sizeof(pgno_t)/2];
873
+ uint16_t mp2_pad;
874
+ uint16_t mp2_flags;
875
+ indx_t mp2_lower;
876
+ indx_t mp2_upper;
877
+ indx_t mp2_ptrs[0];
878
+ } MDB_page2;
879
+
880
+ #define MP_PGNO(p) (((MDB_page2 *)(void *)(p))->mp2_p)
881
+ #define MP_PAD(p) (((MDB_page2 *)(void *)(p))->mp2_pad)
882
+ #define MP_FLAGS(p) (((MDB_page2 *)(void *)(p))->mp2_flags)
883
+ #define MP_LOWER(p) (((MDB_page2 *)(void *)(p))->mp2_lower)
884
+ #define MP_UPPER(p) (((MDB_page2 *)(void *)(p))->mp2_upper)
885
+ #define MP_PTRS(p) (((MDB_page2 *)(void *)(p))->mp2_ptrs)
886
+
1009
887
  /** Size of the page header, excluding dynamic data at the end */
1010
888
  #define PAGEHDRSZ ((unsigned) offsetof(MDB_page, mp_ptrs))
1011
889
 
@@ -1016,10 +894,10 @@ typedef struct MDB_page {
1016
894
  #define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0)
1017
895
 
1018
896
  /** Number of nodes on a page */
1019
- #define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1)
897
+ #define NUMKEYS(p) ((MP_LOWER(p) - (PAGEHDRSZ-PAGEBASE)) >> 1)
1020
898
 
1021
899
  /** The amount of space remaining in the page */
1022
- #define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower)
900
+ #define SIZELEFT(p) (indx_t)(MP_UPPER(p) - MP_LOWER(p))
1023
901
 
1024
902
  /** The percentage of space used in the page, in tenths of a percent. */
1025
903
  #define PAGEFILL(env, p) (1000L * ((env)->me_psize - PAGEHDRSZ - SIZELEFT(p)) / \
@@ -1030,15 +908,15 @@ typedef struct MDB_page {
1030
908
  #define FILL_THRESHOLD 250
1031
909
 
1032
910
  /** Test if a page is a leaf page */
1033
- #define IS_LEAF(p) F_ISSET((p)->mp_flags, P_LEAF)
911
+ #define IS_LEAF(p) F_ISSET(MP_FLAGS(p), P_LEAF)
1034
912
  /** Test if a page is a LEAF2 page */
1035
- #define IS_LEAF2(p) F_ISSET((p)->mp_flags, P_LEAF2)
913
+ #define IS_LEAF2(p) F_ISSET(MP_FLAGS(p), P_LEAF2)
1036
914
  /** Test if a page is a branch page */
1037
- #define IS_BRANCH(p) F_ISSET((p)->mp_flags, P_BRANCH)
915
+ #define IS_BRANCH(p) F_ISSET(MP_FLAGS(p), P_BRANCH)
1038
916
  /** Test if a page is an overflow page */
1039
- #define IS_OVERFLOW(p) F_ISSET((p)->mp_flags, P_OVERFLOW)
917
+ #define IS_OVERFLOW(p) F_ISSET(MP_FLAGS(p), P_OVERFLOW)
1040
918
  /** Test if a page is a sub page */
1041
- #define IS_SUBP(p) F_ISSET((p)->mp_flags, P_SUBP)
919
+ #define IS_SUBP(p) F_ISSET(MP_FLAGS(p), P_SUBP)
1042
920
 
1043
921
  /** The number of overflow pages needed to store the given size. */
1044
922
  #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
@@ -1106,7 +984,7 @@ typedef struct MDB_node {
1106
984
  #define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
1107
985
 
1108
986
  /** Address of node \b i in page \b p */
1109
- #define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE))
987
+ #define NODEPTR(p, i) ((MDB_node *)((char *)(p) + MP_PTRS(p)[i] + PAGEBASE))
1110
988
 
1111
989
  /** Address of the key for the node */
1112
990
  #define NODEKEY(node) (void *)((node)->mn_data)
@@ -1134,8 +1012,10 @@ typedef struct MDB_node {
1134
1012
  /** Copy a page number from src to dst */
1135
1013
  #ifdef MISALIGNED_OK
1136
1014
  #define COPY_PGNO(dst,src) dst = src
1015
+ #undef MP_PGNO
1016
+ #define MP_PGNO(p) ((p)->mp_pgno)
1137
1017
  #else
1138
- #if MDB_SIZE_MAX > 0xffffffffU
1018
+ #if SIZE_MAX > 4294967295UL
1139
1019
  #define COPY_PGNO(dst,src) do { \
1140
1020
  unsigned short *s, *d; \
1141
1021
  s = (unsigned short *)&(src); \
@@ -1176,7 +1056,7 @@ typedef struct MDB_db {
1176
1056
  pgno_t md_branch_pages; /**< number of internal pages */
1177
1057
  pgno_t md_leaf_pages; /**< number of leaf pages */
1178
1058
  pgno_t md_overflow_pages; /**< number of overflow pages */
1179
- mdb_size_t md_entries; /**< number of data items */
1059
+ size_t md_entries; /**< number of data items */
1180
1060
  pgno_t md_root; /**< the root page of this tree */
1181
1061
  } MDB_db;
1182
1062
 
@@ -1206,16 +1086,8 @@ typedef struct MDB_meta {
1206
1086
  uint32_t mm_magic;
1207
1087
  /** Version number of this file. Must be set to #MDB_DATA_VERSION. */
1208
1088
  uint32_t mm_version;
1209
- #ifdef MDB_VL32
1210
- union { /* always zero since we don't support fixed mapping in MDB_VL32 */
1211
- MDB_ID mmun_ull;
1212
- void *mmun_address;
1213
- } mm_un;
1214
- #define mm_address mm_un.mmun_address
1215
- #else
1216
1089
  void *mm_address; /**< address for fixed mapping */
1217
- #endif
1218
- mdb_size_t mm_mapsize; /**< size of mmap region */
1090
+ size_t mm_mapsize; /**< size of mmap region */
1219
1091
  MDB_db mm_dbs[CORE_DBS]; /**< first is free space, 2nd is main db */
1220
1092
  /** The size of pages used in this DB */
1221
1093
  #define mm_psize mm_dbs[FREE_DBI].md_pad
@@ -1261,9 +1133,6 @@ struct MDB_txn {
1261
1133
  /** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */
1262
1134
  MDB_txn *mt_child;
1263
1135
  pgno_t mt_next_pgno; /**< next unallocated page */
1264
- #ifdef MDB_VL32
1265
- pgno_t mt_last_pgno; /**< last written page */
1266
- #endif
1267
1136
  /** The ID of this transaction. IDs are integers incrementing from 1.
1268
1137
  * Only committed write transactions increment the ID. If a transaction
1269
1138
  * aborts, the ID may be re-used by the next writer.
@@ -1311,19 +1180,6 @@ struct MDB_txn {
1311
1180
  MDB_cursor **mt_cursors;
1312
1181
  /** Array of flags for each DB */
1313
1182
  unsigned char *mt_dbflags;
1314
- #ifdef MDB_VL32
1315
- /** List of read-only pages (actually chunks) */
1316
- MDB_ID3L mt_rpages;
1317
- /** We map chunks of 16 pages. Even though Windows uses 4KB pages, all
1318
- * mappings must begin on 64KB boundaries. So we round off all pgnos to
1319
- * a chunk boundary. We do the same on Linux for symmetry, and also to
1320
- * reduce the frequency of mmap/munmap calls.
1321
- */
1322
- #define MDB_RPAGE_CHUNK 16
1323
- #define MDB_TRPAGE_SIZE 4096 /**< size of #mt_rpages array of chunks */
1324
- #define MDB_TRPAGE_MAX (MDB_TRPAGE_SIZE-1) /**< maximum chunk index */
1325
- unsigned int mt_rpcheck; /**< threshold for reclaiming unref'd chunks */
1326
- #endif
1327
1183
  /** Number of DB records in use, or 0 when the txn is finished.
1328
1184
  * This number only ever increments until the txn finishes; we
1329
1185
  * don't decrement it when individual DB handles are closed.
@@ -1335,9 +1191,7 @@ struct MDB_txn {
1335
1191
  * @{
1336
1192
  */
1337
1193
  /** #mdb_txn_begin() flags */
1338
- #define MDB_TXN_BEGIN_FLAGS (MDB_NOMETASYNC|MDB_NOSYNC|MDB_RDONLY)
1339
- #define MDB_TXN_NOMETASYNC MDB_NOMETASYNC /**< don't sync meta for this txn on commit */
1340
- #define MDB_TXN_NOSYNC MDB_NOSYNC /**< don't sync this txn on commit */
1194
+ #define MDB_TXN_BEGIN_FLAGS MDB_RDONLY
1341
1195
  #define MDB_TXN_RDONLY MDB_RDONLY /**< read-only transaction */
1342
1196
  /* internal txn flags */
1343
1197
  #define MDB_TXN_WRITEMAP MDB_WRITEMAP /**< copy of #MDB_env flag in writers */
@@ -1403,24 +1257,10 @@ struct MDB_cursor {
1403
1257
  #define C_SUB 0x04 /**< Cursor is a sub-cursor */
1404
1258
  #define C_DEL 0x08 /**< last op was a cursor_del */
1405
1259
  #define C_UNTRACK 0x40 /**< Un-track cursor when closing */
1406
- #define C_WRITEMAP MDB_TXN_WRITEMAP /**< Copy of txn flag */
1407
- /** Read-only cursor into the txn's original snapshot in the map.
1408
- * Set for read-only txns, and in #mdb_page_alloc() for #FREE_DBI when
1409
- * #MDB_DEVEL & 2. Only implements code which is necessary for this.
1410
- */
1411
- #define C_ORIG_RDONLY MDB_TXN_RDONLY
1412
1260
  /** @} */
1413
1261
  unsigned int mc_flags; /**< @ref mdb_cursor */
1414
1262
  MDB_page *mc_pg[CURSOR_STACK]; /**< stack of pushed pages */
1415
1263
  indx_t mc_ki[CURSOR_STACK]; /**< stack of page indices */
1416
- #ifdef MDB_VL32
1417
- MDB_page *mc_ovpg; /**< a referenced overflow page */
1418
- # define MC_OVPG(mc) ((mc)->mc_ovpg)
1419
- # define MC_SET_OVPG(mc, pg) ((mc)->mc_ovpg = (pg))
1420
- #else
1421
- # define MC_OVPG(mc) ((MDB_page *)0)
1422
- # define MC_SET_OVPG(mc, pg) ((void)0)
1423
- #endif
1424
1264
  };
1425
1265
 
1426
1266
  /** Context for sorted-dup records.
@@ -1467,12 +1307,6 @@ struct MDB_env {
1467
1307
  HANDLE me_fd; /**< The main data file */
1468
1308
  HANDLE me_lfd; /**< The lock file */
1469
1309
  HANDLE me_mfd; /**< For writing and syncing the meta pages */
1470
- #ifdef _WIN32
1471
- #ifdef MDB_VL32
1472
- HANDLE me_fmh; /**< File Mapping handle */
1473
- #endif /* MDB_VL32 */
1474
- HANDLE me_ovfd; /**< Overlapped/async with write-through file handle */
1475
- #endif /* _WIN32 */
1476
1310
  /** Failed to update the meta page. Probably an I/O error. */
1477
1311
  #define MDB_FATAL_ERROR 0x80000000U
1478
1312
  /** Some fields are initialized. */
@@ -1497,8 +1331,8 @@ struct MDB_env {
1497
1331
  void *me_pbuf; /**< scratch area for DUPSORT put() */
1498
1332
  MDB_txn *me_txn; /**< current write transaction */
1499
1333
  MDB_txn *me_txn0; /**< prealloc'd write transaction */
1500
- mdb_size_t me_mapsize; /**< size of the data memory map */
1501
- MDB_OFF_T me_size; /**< current file size */
1334
+ size_t me_mapsize; /**< size of the data memory map */
1335
+ off_t me_size; /**< current file size */
1502
1336
  pgno_t me_maxpg; /**< me_mapsize / me_psize */
1503
1337
  MDB_dbx *me_dbxs; /**< array of static DB info */
1504
1338
  uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
@@ -1523,8 +1357,6 @@ struct MDB_env {
1523
1357
  int me_live_reader; /**< have liveness lock in reader table */
1524
1358
  #ifdef _WIN32
1525
1359
  int me_pidquery; /**< Used in OpenProcess */
1526
- OVERLAPPED *ov; /**< Used for for overlapping I/O requests */
1527
- int ovs; /**< Count of OVERLAPPEDs */
1528
1360
  #endif
1529
1361
  #ifdef MDB_USE_POSIX_MUTEX /* Posix mutexes reside in shared mem */
1530
1362
  # define me_rmutex me_txns->mti_rmutex /**< Shared reader lock */
@@ -1532,17 +1364,6 @@ struct MDB_env {
1532
1364
  #else
1533
1365
  mdb_mutex_t me_rmutex;
1534
1366
  mdb_mutex_t me_wmutex;
1535
- # if defined(_WIN32) || defined(MDB_USE_POSIX_SEM)
1536
- /** Half-initialized name of mutexes, to be completed by #MUTEXNAME() */
1537
- char me_mutexname[sizeof(MUTEXNAME_PREFIX) + 11];
1538
- # endif
1539
- #endif
1540
- #ifdef MDB_VL32
1541
- MDB_ID3L me_rpages; /**< like #mt_rpages, but global to env */
1542
- pthread_mutex_t me_rpmutex; /**< control access to #me_rpages */
1543
- #define MDB_ERPAGE_SIZE 16384
1544
- #define MDB_ERPAGE_MAX (MDB_ERPAGE_SIZE-1)
1545
- unsigned int me_rpcheck;
1546
1367
  #endif
1547
1368
  void *me_userctx; /**< User-settable context */
1548
1369
  MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
@@ -1604,7 +1425,7 @@ static int mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst);
1604
1425
  static int mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata,
1605
1426
  pgno_t newpgno, unsigned int nflags);
1606
1427
 
1607
- static int mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta);
1428
+ static int mdb_env_read_header(MDB_env *env, MDB_meta *meta);
1608
1429
  static MDB_meta *mdb_env_pick_meta(const MDB_env *env);
1609
1430
  static int mdb_env_write_meta(MDB_txn *txn);
1610
1431
  #if defined(MDB_USE_POSIX_MUTEX) && !defined(MDB_ROBUST_SUPPORTED) /* Drop unused excl arg */
@@ -1628,6 +1449,9 @@ static int mdb_update_key(MDB_cursor *mc, MDB_val *key);
1628
1449
  static void mdb_cursor_pop(MDB_cursor *mc);
1629
1450
  static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
1630
1451
 
1452
+ static int _mdb_cursor_del(MDB_cursor *mc, unsigned int flags);
1453
+ static int _mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data, unsigned int flags);
1454
+
1631
1455
  static int mdb_cursor_del0(MDB_cursor *mc);
1632
1456
  static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags);
1633
1457
  static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
@@ -1651,18 +1475,13 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead);
1651
1475
  static MDB_cmp_func mdb_cmp_memn, mdb_cmp_memnr, mdb_cmp_int, mdb_cmp_cint, mdb_cmp_long;
1652
1476
  /** @endcond */
1653
1477
 
1654
- /** Compare two items pointing at '#mdb_size_t's of unknown alignment. */
1478
+ /** Compare two items pointing at size_t's of unknown alignment. */
1655
1479
  #ifdef MISALIGNED_OK
1656
1480
  # define mdb_cmp_clong mdb_cmp_long
1657
1481
  #else
1658
1482
  # define mdb_cmp_clong mdb_cmp_cint
1659
1483
  #endif
1660
1484
 
1661
- /** True if we need #mdb_cmp_clong() instead of \b cmp for #MDB_INTEGERDUP */
1662
- #define NEED_CMP_CLONG(cmp, ksize) \
1663
- (UINT_MAX < MDB_SIZE_MAX && \
1664
- (cmp) == mdb_cmp_int && (ksize) == sizeof(mdb_size_t))
1665
-
1666
1485
  #ifdef _WIN32
1667
1486
  static SECURITY_DESCRIPTOR mdb_null_sd;
1668
1487
  static SECURITY_ATTRIBUTES mdb_all_sa;
@@ -1704,7 +1523,6 @@ static char *const mdb_errstr[] = {
1704
1523
  "MDB_BAD_TXN: Transaction must abort, has a child, or is invalid",
1705
1524
  "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
1706
1525
  "MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
1707
- "MDB_PROBLEM: Unexpected problem - txn should abort",
1708
1526
  };
1709
1527
 
1710
1528
  char *
@@ -1749,9 +1567,11 @@ mdb_strerror(int err)
1749
1567
  buf[0] = 0;
1750
1568
  FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM |
1751
1569
  FORMAT_MESSAGE_IGNORE_INSERTS,
1752
- NULL, err, 0, ptr, MSGSIZE, (va_list *)buf+MSGSIZE);
1570
+ NULL, err, 0, ptr, MSGSIZE, NULL);
1753
1571
  return ptr;
1754
1572
  #else
1573
+ if (err < 0)
1574
+ return "Invalid error code";
1755
1575
  return strerror(err);
1756
1576
  #endif
1757
1577
  }
@@ -1789,7 +1609,7 @@ static pgno_t
1789
1609
  mdb_dbg_pgno(MDB_page *mp)
1790
1610
  {
1791
1611
  pgno_t ret;
1792
- COPY_PGNO(ret, mp->mp_pgno);
1612
+ COPY_PGNO(ret, MP_PGNO(mp));
1793
1613
  return ret;
1794
1614
  }
1795
1615
 
@@ -1823,6 +1643,18 @@ mdb_dkey(MDB_val *key, char *buf)
1823
1643
  return buf;
1824
1644
  }
1825
1645
 
1646
+ static char *
1647
+ mdb_dval(MDB_txn *txn, MDB_dbi dbi, MDB_val *data, char *buf)
1648
+ {
1649
+ if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
1650
+ mdb_dkey(data, buf+1);
1651
+ *buf = '[';
1652
+ strcpy(buf + data->mv_size * 2 + 1, "]");
1653
+ } else
1654
+ *buf = '\0';
1655
+ return buf;
1656
+ }
1657
+
1826
1658
  static const char *
1827
1659
  mdb_leafnode_type(MDB_node *n)
1828
1660
  {
@@ -1836,33 +1668,33 @@ void
1836
1668
  mdb_page_list(MDB_page *mp)
1837
1669
  {
1838
1670
  pgno_t pgno = mdb_dbg_pgno(mp);
1839
- const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : "";
1671
+ const char *type, *state = (MP_FLAGS(mp) & P_DIRTY) ? ", dirty" : "";
1840
1672
  MDB_node *node;
1841
1673
  unsigned int i, nkeys, nsize, total = 0;
1842
1674
  MDB_val key;
1843
1675
  DKBUF;
1844
1676
 
1845
- switch (mp->mp_flags & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) {
1677
+ switch (MP_FLAGS(mp) & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) {
1846
1678
  case P_BRANCH: type = "Branch page"; break;
1847
1679
  case P_LEAF: type = "Leaf page"; break;
1848
1680
  case P_LEAF|P_SUBP: type = "Sub-page"; break;
1849
1681
  case P_LEAF|P_LEAF2: type = "LEAF2 page"; break;
1850
1682
  case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break;
1851
1683
  case P_OVERFLOW:
1852
- fprintf(stderr, "Overflow page %"Yu" pages %u%s\n",
1684
+ fprintf(stderr, "Overflow page %"Z"u pages %u%s\n",
1853
1685
  pgno, mp->mp_pages, state);
1854
1686
  return;
1855
1687
  case P_META:
1856
- fprintf(stderr, "Meta-page %"Yu" txnid %"Yu"\n",
1688
+ fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n",
1857
1689
  pgno, ((MDB_meta *)METADATA(mp))->mm_txnid);
1858
1690
  return;
1859
1691
  default:
1860
- fprintf(stderr, "Bad page %"Yu" flags 0x%X\n", pgno, mp->mp_flags);
1692
+ fprintf(stderr, "Bad page %"Z"u flags 0x%X\n", pgno, MP_FLAGS(mp));
1861
1693
  return;
1862
1694
  }
1863
1695
 
1864
1696
  nkeys = NUMKEYS(mp);
1865
- fprintf(stderr, "%s %"Yu" numkeys %d%s\n", type, pgno, nkeys, state);
1697
+ fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state);
1866
1698
 
1867
1699
  for (i=0; i<nkeys; i++) {
1868
1700
  if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
@@ -1877,7 +1709,7 @@ mdb_page_list(MDB_page *mp)
1877
1709
  key.mv_data = node->mn_data;
1878
1710
  nsize = NODESIZE + key.mv_size;
1879
1711
  if (IS_BRANCH(mp)) {
1880
- fprintf(stderr, "key %d: page %"Yu", %s\n", i, NODEPGNO(node),
1712
+ fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
1881
1713
  DKEY(&key));
1882
1714
  total += nsize;
1883
1715
  } else {
@@ -1893,7 +1725,7 @@ mdb_page_list(MDB_page *mp)
1893
1725
  total = EVEN(total);
1894
1726
  }
1895
1727
  fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
1896
- IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp));
1728
+ IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + MP_LOWER(mp), total, SIZELEFT(mp));
1897
1729
  }
1898
1730
 
1899
1731
  void
@@ -1973,7 +1805,7 @@ static void mdb_audit(MDB_txn *txn)
1973
1805
  }
1974
1806
  }
1975
1807
  if (freecount + count + NUM_METAS != txn->mt_next_pgno) {
1976
- fprintf(stderr, "audit: %"Yu" freecount: %"Yu" count: %"Yu" total: %"Yu" next_pgno: %"Yu"\n",
1808
+ fprintf(stderr, "audit: %"Z"u freecount: %"Z"u count: %"Z"u total: %"Z"u next_pgno: %"Z"u\n",
1977
1809
  txn->mt_txnid, freecount, count+NUM_METAS,
1978
1810
  freecount+count+NUM_METAS, txn->mt_next_pgno);
1979
1811
  }
@@ -1990,8 +1822,10 @@ int
1990
1822
  mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b)
1991
1823
  {
1992
1824
  MDB_cmp_func *dcmp = txn->mt_dbxs[dbi].md_dcmp;
1993
- if (NEED_CMP_CLONG(dcmp, a->mv_size))
1825
+ #if UINT_MAX < SIZE_MAX
1826
+ if (dcmp == mdb_cmp_int && a->mv_size == sizeof(size_t))
1994
1827
  dcmp = mdb_cmp_clong;
1828
+ #endif
1995
1829
  return dcmp(a, b);
1996
1830
  }
1997
1831
 
@@ -2072,53 +1906,6 @@ mdb_dlist_free(MDB_txn *txn)
2072
1906
  dl[0].mid = 0;
2073
1907
  }
2074
1908
 
2075
- #ifdef MDB_VL32
2076
- static void
2077
- mdb_page_unref(MDB_txn *txn, MDB_page *mp)
2078
- {
2079
- pgno_t pgno;
2080
- MDB_ID3L tl = txn->mt_rpages;
2081
- unsigned x, rem;
2082
- if (mp->mp_flags & (P_SUBP|P_DIRTY))
2083
- return;
2084
- rem = mp->mp_pgno & (MDB_RPAGE_CHUNK-1);
2085
- pgno = mp->mp_pgno ^ rem;
2086
- x = mdb_mid3l_search(tl, pgno);
2087
- if (x != tl[0].mid && tl[x+1].mid == mp->mp_pgno)
2088
- x++;
2089
- if (tl[x].mref)
2090
- tl[x].mref--;
2091
- }
2092
- #define MDB_PAGE_UNREF(txn, mp) mdb_page_unref(txn, mp)
2093
-
2094
- static void
2095
- mdb_cursor_unref(MDB_cursor *mc)
2096
- {
2097
- int i;
2098
- if (mc->mc_txn->mt_rpages[0].mid) {
2099
- if (!mc->mc_snum || !mc->mc_pg[0] || IS_SUBP(mc->mc_pg[0]))
2100
- return;
2101
- for (i=0; i<mc->mc_snum; i++)
2102
- mdb_page_unref(mc->mc_txn, mc->mc_pg[i]);
2103
- if (mc->mc_ovpg) {
2104
- mdb_page_unref(mc->mc_txn, mc->mc_ovpg);
2105
- mc->mc_ovpg = 0;
2106
- }
2107
- }
2108
- mc->mc_snum = mc->mc_top = 0;
2109
- mc->mc_pg[0] = NULL;
2110
- mc->mc_flags &= ~C_INITIALIZED;
2111
- }
2112
- #define MDB_CURSOR_UNREF(mc, force) \
2113
- (((force) || ((mc)->mc_flags & C_INITIALIZED)) \
2114
- ? mdb_cursor_unref(mc) \
2115
- : (void)0)
2116
-
2117
- #else
2118
- #define MDB_PAGE_UNREF(txn, mp)
2119
- #define MDB_CURSOR_UNREF(mc, force) ((void)0)
2120
- #endif /* MDB_VL32 */
2121
-
2122
1909
  /** Loosen or free a single page.
2123
1910
  * Saves single pages to a list for future reuse
2124
1911
  * in this same txn. It has been pulled from the freeDB
@@ -2148,7 +1935,7 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
2148
1935
  if (mp != dl[x].mptr) { /* bad cursor? */
2149
1936
  mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
2150
1937
  txn->mt_flags |= MDB_TXN_ERROR;
2151
- return MDB_PROBLEM;
1938
+ return MDB_CORRUPTED;
2152
1939
  }
2153
1940
  /* ok, it's ours */
2154
1941
  loose = 1;
@@ -2160,7 +1947,8 @@ mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
2160
1947
  }
2161
1948
  }
2162
1949
  if (loose) {
2163
- DPRINTF(("loosen db %d page %"Yu, DDBI(mc), mp->mp_pgno));
1950
+ DPRINTF(("loosen db %d page %"Z"u", DDBI(mc),
1951
+ mp->mp_pgno));
2164
1952
  NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
2165
1953
  txn->mt_loose_pgs = mp;
2166
1954
  txn->mt_loose_count++;
@@ -2193,9 +1981,13 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
2193
1981
  unsigned i, j;
2194
1982
  int rc = MDB_SUCCESS, level;
2195
1983
 
2196
- /* Mark pages seen by cursors: First m0, then tracked cursors */
2197
- for (i = txn->mt_numdbs;; ) {
2198
- if (mc->mc_flags & C_INITIALIZED) {
1984
+ /* Mark pages seen by cursors */
1985
+ if (mc->mc_flags & C_UNTRACK)
1986
+ mc = NULL; /* will find mc in mt_cursors */
1987
+ for (i = txn->mt_numdbs;; mc = txn->mt_cursors[--i]) {
1988
+ for (; mc; mc=mc->mc_next) {
1989
+ if (!(mc->mc_flags & C_INITIALIZED))
1990
+ continue;
2199
1991
  for (m3 = mc;; m3 = &mx->mx_cursor) {
2200
1992
  mp = NULL;
2201
1993
  for (j=0; j<m3->mc_snum; j++) {
@@ -2214,13 +2006,10 @@ mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
2214
2006
  break;
2215
2007
  }
2216
2008
  }
2217
- mc = mc->mc_next;
2218
- for (; !mc || mc == m0; mc = txn->mt_cursors[--i])
2219
- if (i == 0)
2220
- goto mark_done;
2009
+ if (i == 0)
2010
+ break;
2221
2011
  }
2222
2012
 
2223
- mark_done:
2224
2013
  if (all) {
2225
2014
  /* Mark dirty root pages */
2226
2015
  for (i=0; i<txn->mt_numdbs; i++) {
@@ -2396,16 +2185,12 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
2396
2185
  {
2397
2186
  MDB_ID2 mid;
2398
2187
  int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
2399
- #ifdef _WIN32 /* With Windows we always write dirty pages with WriteFile,
2400
- * so we always want them ordered */
2401
- insert = mdb_mid2l_insert;
2402
- #else /* but otherwise with writemaps, we just use msync, we
2403
- * don't need the ordering and just append */
2404
- if (txn->mt_flags & MDB_TXN_WRITEMAP)
2188
+
2189
+ if (txn->mt_flags & MDB_TXN_WRITEMAP) {
2405
2190
  insert = mdb_mid2l_append;
2406
- else
2191
+ } else {
2407
2192
  insert = mdb_mid2l_insert;
2408
- #endif
2193
+ }
2409
2194
  mid.mid = mp->mp_pgno;
2410
2195
  mid.mptr = mp;
2411
2196
  rc = insert(txn->mt_u.dirty_list, &mid);
@@ -2421,8 +2206,6 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
2421
2206
  * Do not modify the freedB, just merge freeDB records into me_pghead[]
2422
2207
  * and move me_pglast to say which records were consumed. Only this
2423
2208
  * function can create me_pghead and move me_pglast/mt_next_pgno.
2424
- * When #MDB_DEVEL & 2, it is not affected by #mdb_freelist_save(): it
2425
- * then uses the transaction's original snapshot of the freeDB.
2426
2209
  * @param[in] mc cursor A cursor handle identifying the transaction and
2427
2210
  * database for which we are allocating.
2428
2211
  * @param[in] num the number of pages to allocate.
@@ -2460,7 +2243,8 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
2460
2243
  np = txn->mt_loose_pgs;
2461
2244
  txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np);
2462
2245
  txn->mt_loose_count--;
2463
- DPRINTF(("db %d use loose page %"Yu, DDBI(mc), np->mp_pgno));
2246
+ DPRINTF(("db %d use loose page %"Z"u", DDBI(mc),
2247
+ np->mp_pgno));
2464
2248
  *mp = np;
2465
2249
  return MDB_SUCCESS;
2466
2250
  }
@@ -2497,14 +2281,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
2497
2281
  last = env->me_pglast;
2498
2282
  oldest = env->me_pgoldest;
2499
2283
  mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
2500
- #if (MDB_DEVEL) & 2 /* "& 2" so MDB_DEVEL=1 won't hide bugs breaking freeDB */
2501
- /* Use original snapshot. TODO: Should need less care in code
2502
- * which modifies the database. Maybe we can delete some code?
2503
- */
2504
- m2.mc_flags |= C_ORIG_RDONLY;
2505
- m2.mc_db = &env->me_metas[(txn->mt_txnid-1) & 1]->mm_dbs[FREE_DBI];
2506
- m2.mc_dbflag = (unsigned char *)""; /* probably unnecessary */
2507
- #endif
2508
2284
  if (last) {
2509
2285
  op = MDB_SET_RANGE;
2510
2286
  key.mv_data = &last; /* will look up last+1 */
@@ -2562,10 +2338,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
2562
2338
  }
2563
2339
  env->me_pglast = last;
2564
2340
  #if (MDB_DEBUG) > 1
2565
- DPRINTF(("IDL read txn %"Yu" root %"Yu" num %u",
2341
+ DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u",
2566
2342
  last, txn->mt_dbs[FREE_DBI].md_root, i));
2567
2343
  for (j = i; j; j--)
2568
- DPRINTF(("IDL %"Yu, idl[j]));
2344
+ DPRINTF(("IDL %"Z"u", idl[j]));
2569
2345
  #endif
2570
2346
  /* Merge in descending sorted order */
2571
2347
  mdb_midl_xmerge(mop, idl);
@@ -2580,20 +2356,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
2580
2356
  rc = MDB_MAP_FULL;
2581
2357
  goto fail;
2582
2358
  }
2583
- #if defined(_WIN32) && !defined(MDB_VL32)
2584
- if (!(env->me_flags & MDB_RDONLY)) {
2585
- void *p;
2586
- p = (MDB_page *)(env->me_map + env->me_psize * pgno);
2587
- p = VirtualAlloc(p, env->me_psize * num, MEM_COMMIT,
2588
- (env->me_flags & MDB_WRITEMAP) ? PAGE_READWRITE:
2589
- PAGE_READONLY);
2590
- if (!p) {
2591
- DPUTS("VirtualAlloc failed");
2592
- rc = ErrCode();
2593
- goto fail;
2594
- }
2595
- }
2596
- #endif
2597
2359
 
2598
2360
  search_done:
2599
2361
  if (env->me_flags & MDB_WRITEMAP) {
@@ -2723,7 +2485,7 @@ mdb_page_touch(MDB_cursor *mc)
2723
2485
  pgno_t pgno;
2724
2486
  int rc;
2725
2487
 
2726
- if (!F_ISSET(mp->mp_flags, P_DIRTY)) {
2488
+ if (!F_ISSET(MP_FLAGS(mp), P_DIRTY)) {
2727
2489
  if (txn->mt_flags & MDB_TXN_SPILLS) {
2728
2490
  np = NULL;
2729
2491
  rc = mdb_page_unspill(txn, mp, &np);
@@ -2736,7 +2498,7 @@ mdb_page_touch(MDB_cursor *mc)
2736
2498
  (rc = mdb_page_alloc(mc, 1, &np)))
2737
2499
  goto fail;
2738
2500
  pgno = np->mp_pgno;
2739
- DPRINTF(("touched db %d page %"Yu" -> %"Yu, DDBI(mc),
2501
+ DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc),
2740
2502
  mp->mp_pgno, pgno));
2741
2503
  mdb_cassert(mc, mp->mp_pgno != pgno);
2742
2504
  mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
@@ -2760,7 +2522,7 @@ mdb_page_touch(MDB_cursor *mc)
2760
2522
  if (mp != dl[x].mptr) { /* bad cursor? */
2761
2523
  mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
2762
2524
  txn->mt_flags |= MDB_TXN_ERROR;
2763
- return MDB_PROBLEM;
2525
+ return MDB_CORRUPTED;
2764
2526
  }
2765
2527
  return 0;
2766
2528
  }
@@ -2804,7 +2566,6 @@ done:
2804
2566
  }
2805
2567
  }
2806
2568
  }
2807
- MDB_PAGE_UNREF(mc->mc_txn, mp);
2808
2569
  return 0;
2809
2570
 
2810
2571
  fail:
@@ -2813,22 +2574,18 @@ fail:
2813
2574
  }
2814
2575
 
2815
2576
  int
2816
- mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs)
2577
+ mdb_env_sync(MDB_env *env, int force)
2817
2578
  {
2818
2579
  int rc = 0;
2819
2580
  if (env->me_flags & MDB_RDONLY)
2820
2581
  return EACCES;
2821
- if (force
2822
- #ifndef _WIN32 /* Sync is normally achieved in Windows by doing WRITE_THROUGH writes */
2823
- || !(env->me_flags & MDB_NOSYNC)
2824
- #endif
2825
- ) {
2582
+ if (force || !F_ISSET(env->me_flags, MDB_NOSYNC)) {
2826
2583
  if (env->me_flags & MDB_WRITEMAP) {
2827
2584
  int flags = ((env->me_flags & MDB_MAPASYNC) && !force)
2828
2585
  ? MS_ASYNC : MS_SYNC;
2829
- if (MDB_MSYNC(env->me_map, env->me_psize * numpgs, flags))
2586
+ if (MDB_MSYNC(env->me_map, env->me_mapsize, flags))
2830
2587
  rc = ErrCode();
2831
- #ifdef _WIN32
2588
+ #if defined(_WIN32) || defined(__APPLE__)
2832
2589
  else if (flags == MS_SYNC && MDB_FDATASYNC(env->me_fd))
2833
2590
  rc = ErrCode();
2834
2591
  #endif
@@ -2846,13 +2603,6 @@ mdb_env_sync0(MDB_env *env, int force, pgno_t numpgs)
2846
2603
  return rc;
2847
2604
  }
2848
2605
 
2849
- int
2850
- mdb_env_sync(MDB_env *env, int force)
2851
- {
2852
- MDB_meta *m = mdb_env_pick_meta(env);
2853
- return mdb_env_sync0(env, force, m->mm_last_pg+1);
2854
- }
2855
-
2856
2606
  /** Back up parent txn's cursors, then grab the originals for tracking */
2857
2607
  static int
2858
2608
  mdb_cursor_shadow(MDB_txn *src, MDB_txn *dst)
@@ -3058,9 +2808,14 @@ mdb_txn_renew0(MDB_txn *txn)
3058
2808
  do /* LY: Retry on a race, ITS#7970. */
3059
2809
  r->mr_txnid = ti->mti_txnid;
3060
2810
  while(r->mr_txnid != ti->mti_txnid);
2811
+ if (!r->mr_txnid && (env->me_flags & MDB_RDONLY)) {
2812
+ meta = mdb_env_pick_meta(env);
2813
+ r->mr_txnid = meta->mm_txnid;
2814
+ } else {
2815
+ meta = env->me_metas[r->mr_txnid & 1];
2816
+ }
3061
2817
  txn->mt_txnid = r->mr_txnid;
3062
2818
  txn->mt_u.reader = r;
3063
- meta = env->me_metas[txn->mt_txnid & 1];
3064
2819
  }
3065
2820
 
3066
2821
  } else {
@@ -3077,7 +2832,7 @@ mdb_txn_renew0(MDB_txn *txn)
3077
2832
  txn->mt_txnid++;
3078
2833
  #if MDB_DEBUG
3079
2834
  if (txn->mt_txnid == mdb_debug_start)
3080
- mdb_debug = 1;
2835
+ mdb_debug = MDB_DBG_INFO;
3081
2836
  #endif
3082
2837
  txn->mt_child = NULL;
3083
2838
  txn->mt_loose_pgs = NULL;
@@ -3097,9 +2852,6 @@ mdb_txn_renew0(MDB_txn *txn)
3097
2852
 
3098
2853
  /* Moved to here to avoid a data race in read TXNs */
3099
2854
  txn->mt_next_pgno = meta->mm_last_pg+1;
3100
- #ifdef MDB_VL32
3101
- txn->mt_last_pgno = txn->mt_next_pgno - 1;
3102
- #endif
3103
2855
 
3104
2856
  txn->mt_flags = flags;
3105
2857
 
@@ -3135,7 +2887,7 @@ mdb_txn_renew(MDB_txn *txn)
3135
2887
 
3136
2888
  rc = mdb_txn_renew0(txn);
3137
2889
  if (rc == MDB_SUCCESS) {
3138
- DPRINTF(("renew txn %"Yu"%c %p on mdbenv %p, root page %"Yu,
2890
+ DPRINTF(("renew txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
3139
2891
  txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
3140
2892
  (void *)txn, (void *)txn->mt_env, txn->mt_dbs[MAIN_DBI].md_root));
3141
2893
  }
@@ -3178,17 +2930,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
3178
2930
  DPRINTF(("calloc: %s", strerror(errno)));
3179
2931
  return ENOMEM;
3180
2932
  }
3181
- #ifdef MDB_VL32
3182
- if (!parent) {
3183
- txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
3184
- if (!txn->mt_rpages) {
3185
- free(txn);
3186
- return ENOMEM;
3187
- }
3188
- txn->mt_rpages[0].mid = 0;
3189
- txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
3190
- }
3191
- #endif
3192
2933
  txn->mt_dbxs = env->me_dbxs; /* static */
3193
2934
  txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
3194
2935
  txn->mt_dbflags = (unsigned char *)txn + size - env->me_maxdbs;
@@ -3216,9 +2957,6 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
3216
2957
  parent->mt_child = txn;
3217
2958
  txn->mt_parent = parent;
3218
2959
  txn->mt_numdbs = parent->mt_numdbs;
3219
- #ifdef MDB_VL32
3220
- txn->mt_rpages = parent->mt_rpages;
3221
- #endif
3222
2960
  memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDB_db));
3223
2961
  /* Copy parent's mt_dbflags, but clear DB_NEW */
3224
2962
  for (i=0; i<txn->mt_numdbs; i++)
@@ -3245,18 +2983,17 @@ renew:
3245
2983
  }
3246
2984
  if (rc) {
3247
2985
  if (txn != env->me_txn0) {
3248
- #ifdef MDB_VL32
3249
- free(txn->mt_rpages);
3250
- #endif
2986
+ free(txn->mt_u.dirty_list);
3251
2987
  free(txn);
3252
2988
  }
3253
2989
  } else {
3254
2990
  txn->mt_flags |= flags; /* could not change txn=me_txn0 earlier */
3255
2991
  *ret = txn;
3256
- DPRINTF(("begin txn %"Yu"%c %p on mdbenv %p, root page %"Yu,
2992
+ DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
3257
2993
  txn->mt_txnid, (flags & MDB_RDONLY) ? 'r' : 'w',
3258
2994
  (void *) txn, (void *) env, txn->mt_dbs[MAIN_DBI].md_root));
3259
2995
  }
2996
+ MDB_TRACE(("%p, %p, %u = %p", env, parent, flags, txn));
3260
2997
 
3261
2998
  return rc;
3262
2999
  }
@@ -3268,7 +3005,7 @@ mdb_txn_env(MDB_txn *txn)
3268
3005
  return txn->mt_env;
3269
3006
  }
3270
3007
 
3271
- mdb_size_t
3008
+ size_t
3272
3009
  mdb_txn_id(MDB_txn *txn)
3273
3010
  {
3274
3011
  if(!txn) return 0;
@@ -3320,7 +3057,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
3320
3057
  /* Export or close DBI handles opened in this txn */
3321
3058
  mdb_dbis_update(txn, mode & MDB_END_UPDATE);
3322
3059
 
3323
- DPRINTF(("%s txn %"Yu"%c %p on mdbenv %p, root page %"Yu,
3060
+ DPRINTF(("%s txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
3324
3061
  names[mode & MDB_END_OPMASK],
3325
3062
  txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
3326
3063
  (void *) txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root));
@@ -3349,6 +3086,7 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
3349
3086
 
3350
3087
  txn->mt_numdbs = 0;
3351
3088
  txn->mt_flags = MDB_TXN_FINISHED;
3089
+ mdb_midl_free(txn->mt_spill_pgs);
3352
3090
 
3353
3091
  if (!txn->mt_parent) {
3354
3092
  mdb_midl_shrink(&txn->mt_free_pgs);
@@ -3370,35 +3108,10 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
3370
3108
  mdb_midl_free(txn->mt_free_pgs);
3371
3109
  free(txn->mt_u.dirty_list);
3372
3110
  }
3373
- mdb_midl_free(txn->mt_spill_pgs);
3374
3111
 
3375
3112
  mdb_midl_free(pghead);
3376
3113
  }
3377
- #ifdef MDB_VL32
3378
- if (!txn->mt_parent) {
3379
- MDB_ID3L el = env->me_rpages, tl = txn->mt_rpages;
3380
- unsigned i, x, n = tl[0].mid;
3381
- pthread_mutex_lock(&env->me_rpmutex);
3382
- for (i = 1; i <= n; i++) {
3383
- if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
3384
- /* tmp overflow pages that we didn't share in env */
3385
- munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
3386
- } else {
3387
- x = mdb_mid3l_search(el, tl[i].mid);
3388
- if (tl[i].mptr == el[x].mptr) {
3389
- el[x].mref--;
3390
- } else {
3391
- /* another tmp overflow page */
3392
- munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
3393
- }
3394
- }
3395
- }
3396
- pthread_mutex_unlock(&env->me_rpmutex);
3397
- tl[0].mid = 0;
3398
- if (mode & MDB_END_FREE)
3399
- free(tl);
3400
- }
3401
- #endif
3114
+
3402
3115
  if (mode & MDB_END_FREE)
3403
3116
  free(txn);
3404
3117
  }
@@ -3416,23 +3129,27 @@ mdb_txn_reset(MDB_txn *txn)
3416
3129
  mdb_txn_end(txn, MDB_END_RESET);
3417
3130
  }
3418
3131
 
3419
- void
3420
- mdb_txn_abort(MDB_txn *txn)
3132
+ static void
3133
+ _mdb_txn_abort(MDB_txn *txn)
3421
3134
  {
3422
3135
  if (txn == NULL)
3423
3136
  return;
3424
3137
 
3425
3138
  if (txn->mt_child)
3426
- mdb_txn_abort(txn->mt_child);
3139
+ _mdb_txn_abort(txn->mt_child);
3427
3140
 
3428
3141
  mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE);
3429
3142
  }
3430
3143
 
3144
+ void
3145
+ mdb_txn_abort(MDB_txn *txn)
3146
+ {
3147
+ MDB_TRACE(("%p", txn));
3148
+ _mdb_txn_abort(txn);
3149
+ }
3150
+
3431
3151
  /** Save the freelist as of this transaction to the freeDB.
3432
3152
  * This changes the freelist. Keep trying until it stabilizes.
3433
- *
3434
- * When (MDB_DEVEL) & 2, the changes do not affect #mdb_page_alloc(),
3435
- * it then uses the transaction's original snapshot of the freeDB.
3436
3153
  */
3437
3154
  static int
3438
3155
  mdb_freelist_save(MDB_txn *txn)
@@ -3521,7 +3238,7 @@ mdb_freelist_save(MDB_txn *txn)
3521
3238
  pglast = head_id = *(txnid_t *)key.mv_data;
3522
3239
  total_room = head_room = 0;
3523
3240
  mdb_tassert(txn, pglast <= env->me_pglast);
3524
- rc = mdb_cursor_del(&mc, 0);
3241
+ rc = _mdb_cursor_del(&mc, 0);
3525
3242
  if (rc)
3526
3243
  return rc;
3527
3244
  }
@@ -3541,7 +3258,7 @@ mdb_freelist_save(MDB_txn *txn)
3541
3258
  do {
3542
3259
  freecnt = free_pgs[0];
3543
3260
  data.mv_size = MDB_IDL_SIZEOF(free_pgs);
3544
- rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
3261
+ rc = _mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
3545
3262
  if (rc)
3546
3263
  return rc;
3547
3264
  /* Retry if mt_free_pgs[] grew during the Put() */
@@ -3552,10 +3269,10 @@ mdb_freelist_save(MDB_txn *txn)
3552
3269
  #if (MDB_DEBUG) > 1
3553
3270
  {
3554
3271
  unsigned int i = free_pgs[0];
3555
- DPRINTF(("IDL write txn %"Yu" root %"Yu" num %u",
3272
+ DPRINTF(("IDL write txn %"Z"u root %"Z"u num %u",
3556
3273
  txn->mt_txnid, txn->mt_dbs[FREE_DBI].md_root, i));
3557
3274
  for (; i; i--)
3558
- DPRINTF(("IDL %"Yu, free_pgs[i]));
3275
+ DPRINTF(("IDL %"Z"u", free_pgs[i]));
3559
3276
  }
3560
3277
  #endif
3561
3278
  continue;
@@ -3590,7 +3307,7 @@ mdb_freelist_save(MDB_txn *txn)
3590
3307
  key.mv_size = sizeof(head_id);
3591
3308
  key.mv_data = &head_id;
3592
3309
  data.mv_size = (head_room + 1) * sizeof(pgno_t);
3593
- rc = mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
3310
+ rc = _mdb_cursor_put(&mc, &key, &data, MDB_RESERVE);
3594
3311
  if (rc)
3595
3312
  return rc;
3596
3313
  /* IDL is initially empty, zero out at least the length */
@@ -3645,7 +3362,7 @@ mdb_freelist_save(MDB_txn *txn)
3645
3362
  data.mv_data = mop -= len;
3646
3363
  save = mop[0];
3647
3364
  mop[0] = len;
3648
- rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
3365
+ rc = _mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
3649
3366
  mop[0] = save;
3650
3367
  if (rc || !(mop_len -= len))
3651
3368
  break;
@@ -3666,31 +3383,21 @@ mdb_page_flush(MDB_txn *txn, int keep)
3666
3383
  MDB_ID2L dl = txn->mt_u.dirty_list;
3667
3384
  unsigned psize = env->me_psize, j;
3668
3385
  int i, pagecount = dl[0].mid, rc;
3669
- size_t size = 0;
3670
- MDB_OFF_T pos = 0;
3386
+ size_t size = 0, pos = 0;
3671
3387
  pgno_t pgno = 0;
3672
3388
  MDB_page *dp = NULL;
3673
3389
  #ifdef _WIN32
3674
- OVERLAPPED *ov = env->ov;
3675
- MDB_page *wdp;
3676
- int async_i = 0;
3677
- HANDLE fd = (env->me_flags & MDB_NOSYNC) ? env->me_fd : env->me_ovfd;
3390
+ OVERLAPPED ov;
3678
3391
  #else
3679
3392
  struct iovec iov[MDB_COMMIT_PAGES];
3680
- HANDLE fd = env->me_fd;
3681
- #endif
3682
- ssize_t wsize = 0, wres;
3683
- MDB_OFF_T wpos = 0, next_pos = 1; /* impossible pos, so pos != next_pos */
3393
+ ssize_t wpos = 0, wsize = 0, wres;
3394
+ size_t next_pos = 1; /* impossible pos, so pos != next_pos */
3684
3395
  int n = 0;
3396
+ #endif
3685
3397
 
3686
3398
  j = i = keep;
3687
- if (env->me_flags & MDB_WRITEMAP
3688
- #ifdef _WIN32
3689
- /* In windows, we still do writes to the file (with write-through enabled in sync mode),
3690
- * as this is faster than FlushViewOfFile/FlushFileBuffers */
3691
- && (env->me_flags & MDB_NOSYNC)
3692
- #endif
3693
- ) {
3399
+
3400
+ if (env->me_flags & MDB_WRITEMAP) {
3694
3401
  /* Clear dirty flags */
3695
3402
  while (++i <= pagecount) {
3696
3403
  dp = dl[i].mptr;
@@ -3705,27 +3412,6 @@ mdb_page_flush(MDB_txn *txn, int keep)
3705
3412
  goto done;
3706
3413
  }
3707
3414
 
3708
- #ifdef _WIN32
3709
- if (pagecount - keep >= env->ovs) {
3710
- /* ran out of room in ov array, and re-malloc, copy handles and free previous */
3711
- int ovs = (pagecount - keep) * 1.5; /* provide extra padding to reduce number of re-allocations */
3712
- int new_size = ovs * sizeof(OVERLAPPED);
3713
- ov = malloc(new_size);
3714
- if (ov == NULL)
3715
- return ENOMEM;
3716
- int previous_size = env->ovs * sizeof(OVERLAPPED);
3717
- memcpy(ov, env->ov, previous_size); /* Copy previous OVERLAPPED data to retain event handles */
3718
- /* And clear rest of memory */
3719
- memset(&ov[env->ovs], 0, new_size - previous_size);
3720
- if (env->ovs > 0) {
3721
- free(env->ov); /* release previous allocation */
3722
- }
3723
-
3724
- env->ov = ov;
3725
- env->ovs = ovs;
3726
- }
3727
- #endif
3728
-
3729
3415
  /* Write the pages */
3730
3416
  for (;;) {
3731
3417
  if (++i <= pagecount) {
@@ -3743,65 +3429,46 @@ mdb_page_flush(MDB_txn *txn, int keep)
3743
3429
  size = psize;
3744
3430
  if (IS_OVERFLOW(dp)) size *= dp->mp_pages;
3745
3431
  }
3746
- /* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
3747
- if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE
3748
3432
  #ifdef _WIN32
3749
- /* If writemap is enabled, consecutive page positions infer
3750
- * contiguous (mapped) memory.
3751
- * Otherwise force write pages one at a time.
3752
- * Windows actually supports scatter/gather I/O, but only on
3753
- * unbuffered file handles. Since we're relying on the OS page
3754
- * cache for all our data, that's self-defeating. So we just
3755
- * write pages one at a time. We use the ov structure to set
3756
- * the write offset, to at least save the overhead of a Seek
3757
- * system call.
3758
- */
3759
- || !(env->me_flags & MDB_WRITEMAP)
3760
- #endif
3761
- ) {
3433
+ else break;
3434
+
3435
+ /* Windows actually supports scatter/gather I/O, but only on
3436
+ * unbuffered file handles. Since we're relying on the OS page
3437
+ * cache for all our data, that's self-defeating. So we just
3438
+ * write pages one at a time. We use the ov structure to set
3439
+ * the write offset, to at least save the overhead of a Seek
3440
+ * system call.
3441
+ */
3442
+ DPRINTF(("committing page %"Z"u", pgno));
3443
+ memset(&ov, 0, sizeof(ov));
3444
+ ov.Offset = pos & 0xffffffff;
3445
+ ov.OffsetHigh = pos >> 16 >> 16;
3446
+ if (!WriteFile(env->me_fd, dp, size, NULL, &ov)) {
3447
+ rc = ErrCode();
3448
+ DPRINTF(("WriteFile: %d", rc));
3449
+ return rc;
3450
+ }
3451
+ #else
3452
+ /* Write up to MDB_COMMIT_PAGES dirty pages at a time. */
3453
+ if (pos!=next_pos || n==MDB_COMMIT_PAGES || wsize+size>MAX_WRITE) {
3762
3454
  if (n) {
3763
3455
  retry_write:
3764
3456
  /* Write previous page(s) */
3765
- DPRINTF(("committing page %"Z"u", pgno));
3766
- #ifdef _WIN32
3767
- OVERLAPPED *this_ov = &ov[async_i];
3768
- /* Clear status, and keep hEvent, we reuse that */
3769
- this_ov->Internal = 0;
3770
- this_ov->Offset = wpos & 0xffffffff;
3771
- this_ov->OffsetHigh = wpos >> 16 >> 16;
3772
- if (!F_ISSET(env->me_flags, MDB_NOSYNC) && !this_ov->hEvent) {
3773
- HANDLE event = CreateEvent(NULL, FALSE, FALSE, NULL);
3774
- if (!event) {
3775
- rc = ErrCode();
3776
- DPRINTF(("CreateEvent: %s", strerror(rc)));
3777
- return rc;
3778
- }
3779
- this_ov->hEvent = event;
3780
- }
3781
- if (!WriteFile(fd, wdp, wsize, NULL, this_ov)) {
3782
- rc = ErrCode();
3783
- if (rc != ERROR_IO_PENDING) {
3784
- DPRINTF(("WriteFile: %d", rc));
3785
- return rc;
3786
- }
3787
- }
3788
- async_i++;
3789
- #else
3790
3457
  #ifdef MDB_USE_PWRITEV
3791
- wres = pwritev(fd, iov, n, wpos);
3458
+ wres = pwritev(env->me_fd, iov, n, wpos);
3792
3459
  #else
3793
3460
  if (n == 1) {
3794
- wres = pwrite(fd, iov[0].iov_base, wsize, wpos);
3461
+ wres = pwrite(env->me_fd, iov[0].iov_base, wsize, wpos);
3795
3462
  } else {
3796
3463
  retry_seek:
3797
- if (lseek(fd, wpos, SEEK_SET) == -1) {
3464
+ if (lseek(env->me_fd, wpos, SEEK_SET) == -1) {
3798
3465
  rc = ErrCode();
3799
3466
  if (rc == EINTR)
3800
3467
  goto retry_seek;
3801
3468
  DPRINTF(("lseek: %s", strerror(rc)));
3802
3469
  return rc;
3803
3470
  }
3804
- wres = writev(fd, iov, n);
3471
+ wres = writev(env->me_fd, iov, n);
3805
3472
  }
3806
3473
  #endif
3807
3474
  if (wres != wsize) {
@@ -3816,69 +3483,37 @@ retry_seek:
3816
3483
  }
3817
3484
  return rc;
3818
3485
  }
3819
- #endif /* _WIN32 */
3820
3486
  n = 0;
3821
3487
  }
3822
3488
  if (i > pagecount)
3823
3489
  break;
3824
3490
  wpos = pos;
3825
3491
  wsize = 0;
3826
- #ifdef _WIN32
3827
- wdp = dp;
3828
- }
3829
- #else
3830
3492
  }
3493
+ DPRINTF(("committing page %"Z"u", pgno));
3494
+ next_pos = pos + size;
3831
3495
  iov[n].iov_len = size;
3832
3496
  iov[n].iov_base = (char *)dp;
3833
- #endif /* _WIN32 */
3834
- DPRINTF(("committing page %"Yu, pgno));
3835
- next_pos = pos + size;
3836
3497
  wsize += size;
3837
3498
  n++;
3838
- }
3839
- #ifdef MDB_VL32
3840
- if (pgno > txn->mt_last_pgno)
3841
- txn->mt_last_pgno = pgno;
3842
- #endif
3843
-
3844
- #ifdef _WIN32
3845
- if (!F_ISSET(env->me_flags, MDB_NOSYNC)) {
3846
- /* Now wait for all the asynchronous/overlapped sync/write-through writes to complete.
3847
- * We start with the last one so that all the others should already be complete and
3848
- * we reduce thread suspend/resuming (in practice, typically about 99.5% of writes are
3849
- * done after the last write is done) */
3850
- rc = 0;
3851
- while (--async_i >= 0) {
3852
- if (ov[async_i].hEvent) {
3853
- if (!GetOverlappedResult(fd, &ov[async_i], &wres, TRUE)) {
3854
- rc = ErrCode(); /* Continue on so that all the event signals are reset */
3855
- }
3856
- }
3857
- }
3858
- if (rc) { /* any error on GetOverlappedResult, exit now */
3859
- return rc;
3860
- }
3861
- }
3862
3499
  #endif /* _WIN32 */
3500
+ }
3863
3501
 
3864
- if (!(env->me_flags & MDB_WRITEMAP)) {
3865
- /* Don't free pages when using writemap (can only get here in NOSYNC mode in Windows)
3866
- * MIPS has cache coherency issues, this is a no-op everywhere else
3867
- * Note: for any size >= on-chip cache size, entire on-chip cache is
3868
- * flushed.
3869
- */
3870
- CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
3502
+ /* MIPS has cache coherency issues, this is a no-op everywhere else
3503
+ * Note: for any size >= on-chip cache size, entire on-chip cache is
3504
+ * flushed.
3505
+ */
3506
+ CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
3871
3507
 
3872
- for (i = keep; ++i <= pagecount; ) {
3873
- dp = dl[i].mptr;
3874
- /* This is a page we skipped above */
3875
- if (!dl[i].mid) {
3876
- dl[++j] = dl[i];
3877
- dl[j].mid = dp->mp_pgno;
3878
- continue;
3879
- }
3880
- mdb_dpage_free(env, dp);
3508
+ for (i = keep; ++i <= pagecount; ) {
3509
+ dp = dl[i].mptr;
3510
+ /* This is a page we skipped above */
3511
+ if (!dl[i].mid) {
3512
+ dl[++j] = dl[i];
3513
+ dl[j].mid = dp->mp_pgno;
3514
+ continue;
3881
3515
  }
3516
+ mdb_dpage_free(env, dp);
3882
3517
  }
3883
3518
 
3884
3519
  done:
@@ -3888,10 +3523,8 @@ done:
3888
3523
  return MDB_SUCCESS;
3889
3524
  }
3890
3525
 
3891
- static int ESECT mdb_env_share_locks(MDB_env *env, int *excl);
3892
-
3893
- int
3894
- mdb_txn_commit(MDB_txn *txn)
3526
+ static int
3527
+ _mdb_txn_commit(MDB_txn *txn)
3895
3528
  {
3896
3529
  int rc;
3897
3530
  unsigned int i, end_mode;
@@ -3904,7 +3537,7 @@ mdb_txn_commit(MDB_txn *txn)
3904
3537
  end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE;
3905
3538
 
3906
3539
  if (txn->mt_child) {
3907
- rc = mdb_txn_commit(txn->mt_child);
3540
+ rc = _mdb_txn_commit(txn->mt_child);
3908
3541
  if (rc)
3909
3542
  goto fail;
3910
3543
  }
@@ -4066,7 +3699,7 @@ mdb_txn_commit(MDB_txn *txn)
4066
3699
  !(txn->mt_flags & (MDB_TXN_DIRTY|MDB_TXN_SPILLS)))
4067
3700
  goto done;
4068
3701
 
4069
- DPRINTF(("committing txn %"Yu" %p on mdbenv %p, root page %"Yu,
3702
+ DPRINTF(("committing txn %"Z"u %p on mdbenv %p, root page %"Z"u",
4070
3703
  txn->mt_txnid, (void*)txn, (void*)env, txn->mt_dbs[MAIN_DBI].md_root));
4071
3704
 
4072
3705
  /* Update DB root pointers */
@@ -4084,7 +3717,7 @@ mdb_txn_commit(MDB_txn *txn)
4084
3717
  goto fail;
4085
3718
  }
4086
3719
  data.mv_data = &txn->mt_dbs[i];
4087
- rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data,
3720
+ rc = _mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data,
4088
3721
  F_SUBDATA);
4089
3722
  if (rc)
4090
3723
  goto fail;
@@ -4104,42 +3737,36 @@ mdb_txn_commit(MDB_txn *txn)
4104
3737
  mdb_audit(txn);
4105
3738
  #endif
4106
3739
 
4107
- if ((rc = mdb_page_flush(txn, 0)))
4108
- goto fail;
4109
- if (!F_ISSET(txn->mt_flags, MDB_TXN_NOSYNC) &&
4110
- (rc = mdb_env_sync0(env, 0, txn->mt_next_pgno)))
4111
- goto fail;
4112
- if ((rc = mdb_env_write_meta(txn)))
3740
+ if ((rc = mdb_page_flush(txn, 0)) ||
3741
+ (rc = mdb_env_sync(env, 0)) ||
3742
+ (rc = mdb_env_write_meta(txn)))
4113
3743
  goto fail;
4114
3744
  end_mode = MDB_END_COMMITTED|MDB_END_UPDATE;
4115
- if (env->me_flags & MDB_PREVSNAPSHOT) {
4116
- if (!(env->me_flags & MDB_NOLOCK)) {
4117
- int excl;
4118
- rc = mdb_env_share_locks(env, &excl);
4119
- if (rc)
4120
- goto fail;
4121
- }
4122
- env->me_flags ^= MDB_PREVSNAPSHOT;
4123
- }
4124
3745
 
4125
3746
  done:
4126
3747
  mdb_txn_end(txn, end_mode);
4127
3748
  return MDB_SUCCESS;
4128
3749
 
4129
3750
  fail:
4130
- mdb_txn_abort(txn);
3751
+ _mdb_txn_abort(txn);
4131
3752
  return rc;
4132
3753
  }
4133
3754
 
3755
+ int
3756
+ mdb_txn_commit(MDB_txn *txn)
3757
+ {
3758
+ MDB_TRACE(("%p", txn));
3759
+ return _mdb_txn_commit(txn);
3760
+ }
3761
+
4134
3762
  /** Read the environment parameters of a DB environment before
4135
3763
  * mapping it into memory.
4136
3764
  * @param[in] env the environment handle
4137
- * @param[in] prev whether to read the backup meta page
4138
3765
  * @param[out] meta address of where to store the meta information
4139
3766
  * @return 0 on success, non-zero on failure.
4140
3767
  */
4141
3768
  static int ESECT
4142
- mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta)
3769
+ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
4143
3770
  {
4144
3771
  MDB_metabuf pbuf;
4145
3772
  MDB_page *p;
@@ -4174,7 +3801,7 @@ mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta)
4174
3801
  p = (MDB_page *)&pbuf;
4175
3802
 
4176
3803
  if (!F_ISSET(p->mp_flags, P_META)) {
4177
- DPRINTF(("page %"Yu" not a meta page", p->mp_pgno));
3804
+ DPRINTF(("page %"Z"u not a meta page", p->mp_pgno));
4178
3805
  return MDB_INVALID;
4179
3806
  }
4180
3807
 
@@ -4190,7 +3817,7 @@ mdb_env_read_header(MDB_env *env, int prev, MDB_meta *meta)
4190
3817
  return MDB_VERSION_MISMATCH;
4191
3818
  }
4192
3819
 
4193
- if (off == 0 || (prev ? m->mm_txnid < meta->mm_txnid : m->mm_txnid > meta->mm_txnid))
3820
+ if (off == 0 || m->mm_txnid > meta->mm_txnid)
4194
3821
  *meta = *m;
4195
3822
  }
4196
3823
  return 0;
@@ -4236,6 +3863,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
4236
3863
  if (len == -1 && ErrCode() == EINTR) continue; \
4237
3864
  rc = (len >= 0); break; } while(1)
4238
3865
  #endif
3866
+
4239
3867
  DPUTS("writing new meta page");
4240
3868
 
4241
3869
  psize = env->me_psize;
@@ -4243,6 +3871,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
4243
3871
  p = calloc(NUM_METAS, psize);
4244
3872
  if (!p)
4245
3873
  return ENOMEM;
3874
+
4246
3875
  p->mp_pgno = 0;
4247
3876
  p->mp_flags = P_META;
4248
3877
  *(MDB_meta *)METADATA(p) = *meta;
@@ -4273,8 +3902,8 @@ mdb_env_write_meta(MDB_txn *txn)
4273
3902
  MDB_env *env;
4274
3903
  MDB_meta meta, metab, *mp;
4275
3904
  unsigned flags;
4276
- mdb_size_t mapsize;
4277
- MDB_OFF_T off;
3905
+ size_t mapsize;
3906
+ off_t off;
4278
3907
  int rc, len, toggle;
4279
3908
  char *ptr;
4280
3909
  HANDLE mfd;
@@ -4285,18 +3914,17 @@ mdb_env_write_meta(MDB_txn *txn)
4285
3914
  #endif
4286
3915
 
4287
3916
  toggle = txn->mt_txnid & 1;
4288
- DPRINTF(("writing meta page %d for root page %"Yu,
3917
+ DPRINTF(("writing meta page %d for root page %"Z"u",
4289
3918
  toggle, txn->mt_dbs[MAIN_DBI].md_root));
4290
3919
 
4291
3920
  env = txn->mt_env;
4292
- flags = txn->mt_flags | env->me_flags;
3921
+ flags = env->me_flags;
4293
3922
  mp = env->me_metas[toggle];
4294
3923
  mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
4295
3924
  /* Persist any increases of mapsize config */
4296
3925
  if (mapsize < env->me_mapsize)
4297
3926
  mapsize = env->me_mapsize;
4298
3927
 
4299
- #ifndef _WIN32 /* We don't want to ever use MSYNC/FlushViewOfFile in Windows */
4300
3928
  if (flags & MDB_WRITEMAP) {
4301
3929
  mp->mm_mapsize = mapsize;
4302
3930
  mp->mm_dbs[FREE_DBI] = txn->mt_dbs[FREE_DBI];
@@ -4312,10 +3940,11 @@ mdb_env_write_meta(MDB_txn *txn)
4312
3940
  unsigned meta_size = env->me_psize;
4313
3941
  rc = (env->me_flags & MDB_MAPASYNC) ? MS_ASYNC : MS_SYNC;
4314
3942
  ptr = (char *)mp - PAGEHDRSZ;
4315
- /* POSIX msync() requires ptr = start of OS page */
3943
+ #ifndef _WIN32 /* POSIX msync() requires ptr = start of OS page */
4316
3944
  r2 = (ptr - env->me_map) & (env->me_os_psize - 1);
4317
3945
  ptr -= r2;
4318
3946
  meta_size += r2;
3947
+ #endif
4319
3948
  if (MDB_MSYNC(ptr, meta_size, rc)) {
4320
3949
  rc = ErrCode();
4321
3950
  goto fail;
@@ -4323,7 +3952,6 @@ mdb_env_write_meta(MDB_txn *txn)
4323
3952
  }
4324
3953
  goto done;
4325
3954
  }
4326
- #endif
4327
3955
  metab.mm_txnid = mp->mm_txnid;
4328
3956
  metab.mm_last_pg = mp->mm_last_pg;
4329
3957
 
@@ -4402,8 +4030,7 @@ static MDB_meta *
4402
4030
  mdb_env_pick_meta(const MDB_env *env)
4403
4031
  {
4404
4032
  MDB_meta *const *metas = env->me_metas;
4405
- return metas[ (metas[0]->mm_txnid < metas[1]->mm_txnid) ^
4406
- ((env->me_flags & MDB_PREVSNAPSHOT) != 0) ];
4033
+ return metas[ metas[0]->mm_txnid < metas[1]->mm_txnid ];
4407
4034
  }
4408
4035
 
4409
4036
  int ESECT
@@ -4423,30 +4050,15 @@ mdb_env_create(MDB_env **env)
4423
4050
  #ifdef MDB_USE_POSIX_SEM
4424
4051
  e->me_rmutex = SEM_FAILED;
4425
4052
  e->me_wmutex = SEM_FAILED;
4426
- #elif defined MDB_USE_SYSV_SEM
4427
- e->me_rmutex->semid = -1;
4428
- e->me_wmutex->semid = -1;
4429
4053
  #endif
4430
4054
  e->me_pid = getpid();
4431
4055
  GET_PAGESIZE(e->me_os_psize);
4432
4056
  VGMEMP_CREATE(e,0,0);
4433
4057
  *env = e;
4058
+ MDB_TRACE(("%p", e));
4434
4059
  return MDB_SUCCESS;
4435
4060
  }
4436
4061
 
4437
- #ifdef _WIN32
4438
- /** @brief Map a result from an NTAPI call to WIN32. */
4439
- static DWORD
4440
- mdb_nt2win32(NTSTATUS st)
4441
- {
4442
- OVERLAPPED o = {0};
4443
- DWORD br;
4444
- o.Internal = st;
4445
- GetOverlappedResult(NULL, &o, &br, FALSE);
4446
- return GetLastError();
4447
- }
4448
- #endif
4449
-
4450
4062
  static int ESECT
4451
4063
  mdb_env_map(MDB_env *env, void *addr)
4452
4064
  {
@@ -4454,54 +4066,42 @@ mdb_env_map(MDB_env *env, void *addr)
4454
4066
  unsigned int flags = env->me_flags;
4455
4067
  #ifdef _WIN32
4456
4068
  int rc;
4457
- int access = SECTION_MAP_READ;
4458
4069
  HANDLE mh;
4459
- void *map;
4460
- SIZE_T msize;
4461
- ULONG pageprot = PAGE_READONLY, secprot, alloctype;
4070
+ LONG sizelo, sizehi;
4071
+ size_t msize;
4462
4072
 
4463
- if (flags & MDB_WRITEMAP) {
4464
- access |= SECTION_MAP_WRITE;
4465
- pageprot = PAGE_READWRITE;
4466
- }
4467
4073
  if (flags & MDB_RDONLY) {
4468
- secprot = PAGE_READONLY;
4074
+ /* Don't set explicit map size, use whatever exists */
4469
4075
  msize = 0;
4470
- alloctype = 0;
4076
+ sizelo = 0;
4077
+ sizehi = 0;
4471
4078
  } else {
4472
- secprot = PAGE_READWRITE;
4473
4079
  msize = env->me_mapsize;
4474
- alloctype = MEM_RESERVE;
4080
+ sizelo = msize & 0xffffffff;
4081
+ sizehi = msize >> 16 >> 16; /* only needed on Win64 */
4082
+
4083
+ /* Windows won't create mappings for zero length files.
4084
+ * and won't map more than the file size.
4085
+ * Just set the maxsize right now.
4086
+ */
4087
+ if (!(flags & MDB_WRITEMAP) && (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
4088
+ || !SetEndOfFile(env->me_fd)
4089
+ || SetFilePointer(env->me_fd, 0, NULL, 0) != 0))
4090
+ return ErrCode();
4475
4091
  }
4476
4092
 
4477
- /** Some users are afraid of seeing their disk space getting used
4478
- * all at once, so the default is now to do incremental file growth.
4479
- * But that has a large performance impact, so give the option of
4480
- * allocating the file up front.
4481
- */
4482
- #ifdef MDB_FIXEDSIZE
4483
- LARGE_INTEGER fsize;
4484
- fsize.LowPart = msize & 0xffffffff;
4485
- fsize.HighPart = msize >> 16 >> 16;
4486
- rc = NtCreateSection(&mh, access, NULL, &fsize, secprot, SEC_RESERVE, env->me_fd);
4487
- #else
4488
- rc = NtCreateSection(&mh, access, NULL, NULL, secprot, SEC_RESERVE, env->me_fd);
4489
- #endif
4490
- if (rc)
4491
- return mdb_nt2win32(rc);
4492
- map = addr;
4493
- #ifdef MDB_VL32
4494
- msize = NUM_METAS * env->me_psize;
4495
- #endif
4496
- rc = NtMapViewOfSection(mh, GetCurrentProcess(), &map, 0, 0, NULL, &msize, ViewUnmap, alloctype, pageprot);
4497
- #ifdef MDB_VL32
4498
- env->me_fmh = mh;
4499
- #else
4500
- NtClose(mh);
4501
- #endif
4093
+ mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
4094
+ PAGE_READWRITE : PAGE_READONLY,
4095
+ sizehi, sizelo, NULL);
4096
+ if (!mh)
4097
+ return ErrCode();
4098
+ env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
4099
+ FILE_MAP_WRITE : FILE_MAP_READ,
4100
+ 0, 0, msize, addr);
4101
+ rc = env->me_map ? 0 : ErrCode();
4102
+ CloseHandle(mh);
4502
4103
  if (rc)
4503
- return mdb_nt2win32(rc);
4504
- env->me_map = map;
4104
+ return rc;
4505
4105
  #else
4506
4106
  int mmap_flags = MAP_SHARED;
4507
4107
  int prot = PROT_READ;
@@ -4509,15 +4109,6 @@ mdb_env_map(MDB_env *env, void *addr)
4509
4109
  if (flags & MDB_NOSYNC)
4510
4110
  mmap_flags |= MAP_NOSYNC;
4511
4111
  #endif
4512
- #ifdef MDB_VL32
4513
- (void) flags;
4514
- env->me_map = mmap(addr, NUM_METAS * env->me_psize, prot, mmap_flags,
4515
- env->me_fd, 0);
4516
- if (env->me_map == MAP_FAILED) {
4517
- env->me_map = NULL;
4518
- return ErrCode();
4519
- }
4520
- #else
4521
4112
  if (flags & MDB_WRITEMAP) {
4522
4113
  prot |= PROT_WRITE;
4523
4114
  if (ftruncate(env->me_fd, env->me_mapsize) < 0)
@@ -4549,7 +4140,6 @@ mdb_env_map(MDB_env *env, void *addr)
4549
4140
  */
4550
4141
  if (addr && env->me_map != addr)
4551
4142
  return EBUSY; /* TODO: Make a new MDB_* error code? */
4552
- #endif
4553
4143
 
4554
4144
  p = (MDB_page *)env->me_map;
4555
4145
  env->me_metas[0] = METADATA(p);
@@ -4559,17 +4149,15 @@ mdb_env_map(MDB_env *env, void *addr)
4559
4149
  }
4560
4150
 
4561
4151
  int ESECT
4562
- mdb_env_set_mapsize(MDB_env *env, mdb_size_t size)
4152
+ mdb_env_set_mapsize(MDB_env *env, size_t size)
4563
4153
  {
4564
4154
  /* If env is already open, caller is responsible for making
4565
4155
  * sure there are no active txns.
4566
4156
  */
4567
4157
  if (env->me_map) {
4158
+ int rc;
4568
4159
  MDB_meta *meta;
4569
- #ifndef MDB_VL32
4570
4160
  void *old;
4571
- int rc;
4572
- #endif
4573
4161
  if (env->me_txn)
4574
4162
  return EINVAL;
4575
4163
  meta = mdb_env_pick_meta(env);
@@ -4577,25 +4165,21 @@ mdb_env_set_mapsize(MDB_env *env, mdb_size_t size)
4577
4165
  size = meta->mm_mapsize;
4578
4166
  {
4579
4167
  /* Silently round up to minimum if the size is too small */
4580
- mdb_size_t minsize = (meta->mm_last_pg + 1) * env->me_psize;
4168
+ size_t minsize = (meta->mm_last_pg + 1) * env->me_psize;
4581
4169
  if (size < minsize)
4582
4170
  size = minsize;
4583
4171
  }
4584
- #ifndef MDB_VL32
4585
- /* For MDB_VL32 this bit is a noop since we dynamically remap
4586
- * chunks of the DB anyway.
4587
- */
4588
4172
  munmap(env->me_map, env->me_mapsize);
4589
4173
  env->me_mapsize = size;
4590
4174
  old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
4591
4175
  rc = mdb_env_map(env, old);
4592
4176
  if (rc)
4593
4177
  return rc;
4594
- #endif /* !MDB_VL32 */
4595
4178
  }
4596
4179
  env->me_mapsize = size;
4597
4180
  if (env->me_psize)
4598
4181
  env->me_maxpg = env->me_mapsize / env->me_psize;
4182
+ MDB_TRACE(("%p, %"Yu"", env, size));
4599
4183
  return MDB_SUCCESS;
4600
4184
  }
4601
4185
 
@@ -4605,6 +4189,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
4605
4189
  if (env->me_map)
4606
4190
  return EINVAL;
4607
4191
  env->me_maxdbs = dbs + CORE_DBS;
4192
+ MDB_TRACE(("%p, %u", env, dbs));
4608
4193
  return MDB_SUCCESS;
4609
4194
  }
4610
4195
 
@@ -4614,6 +4199,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
4614
4199
  if (env->me_map || readers < 1)
4615
4200
  return EINVAL;
4616
4201
  env->me_maxreaders = readers;
4202
+ MDB_TRACE(("%p, %u", env, readers));
4617
4203
  return MDB_SUCCESS;
4618
4204
  }
4619
4205
 
@@ -4627,7 +4213,7 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
4627
4213
  }
4628
4214
 
4629
4215
  static int ESECT
4630
- mdb_fsize(HANDLE fd, mdb_size_t *size)
4216
+ mdb_fsize(HANDLE fd, size_t *size)
4631
4217
  {
4632
4218
  #ifdef _WIN32
4633
4219
  LARGE_INTEGER fsize;
@@ -4716,7 +4302,7 @@ mdb_fname_init(const char *path, unsigned envflags, MDB_name *fname)
4716
4302
  /** File type, access mode etc. for #mdb_fopen() */
4717
4303
  enum mdb_fopen_type {
4718
4304
  #ifdef _WIN32
4719
- MDB_O_RDONLY, MDB_O_RDWR, MDB_O_OVERLAPPED, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS
4305
+ MDB_O_RDONLY, MDB_O_RDWR, MDB_O_META, MDB_O_COPY, MDB_O_LOCKS
4720
4306
  #else
4721
4307
  /* A comment in mdb_fopen() explains some O_* flag choices. */
4722
4308
  MDB_O_RDONLY= O_RDONLY, /**< for RDONLY me_fd */
@@ -4777,11 +4363,6 @@ mdb_fopen(const MDB_env *env, MDB_name *fname,
4777
4363
  disp = OPEN_ALWAYS;
4778
4364
  attrs = FILE_ATTRIBUTE_NORMAL;
4779
4365
  switch (which) {
4780
- case MDB_O_OVERLAPPED: /* for unbuffered asynchronous writes (write-through mode)*/
4781
- acc = GENERIC_WRITE;
4782
- disp = OPEN_EXISTING;
4783
- attrs = FILE_FLAG_OVERLAPPED|FILE_FLAG_WRITE_THROUGH;
4784
- break;
4785
4366
  case MDB_O_RDONLY: /* read-only datafile */
4786
4367
  acc = GENERIC_READ;
4787
4368
  disp = OPEN_EXISTING;
@@ -4843,7 +4424,7 @@ mdb_fopen(const MDB_env *env, MDB_name *fname,
4843
4424
  /** Further setup required for opening an LMDB environment
4844
4425
  */
4845
4426
  static int ESECT
4846
- mdb_env_open2(MDB_env *env, int prev)
4427
+ mdb_env_open2(MDB_env *env)
4847
4428
  {
4848
4429
  unsigned int flags = env->me_flags;
4849
4430
  int i, newenv = 0, rc;
@@ -4856,22 +4437,6 @@ mdb_env_open2(MDB_env *env, int prev)
4856
4437
  env->me_pidquery = MDB_PROCESS_QUERY_LIMITED_INFORMATION;
4857
4438
  else
4858
4439
  env->me_pidquery = PROCESS_QUERY_INFORMATION;
4859
- /* Grab functions we need from NTDLL */
4860
- if (!NtCreateSection) {
4861
- HMODULE h = GetModuleHandleW(L"NTDLL.DLL");
4862
- if (!h)
4863
- return MDB_PROBLEM;
4864
- NtClose = (NtCloseFunc *)GetProcAddress(h, "NtClose");
4865
- if (!NtClose)
4866
- return MDB_PROBLEM;
4867
- NtMapViewOfSection = (NtMapViewOfSectionFunc *)GetProcAddress(h, "NtMapViewOfSection");
4868
- if (!NtMapViewOfSection)
4869
- return MDB_PROBLEM;
4870
- NtCreateSection = (NtCreateSectionFunc *)GetProcAddress(h, "NtCreateSection");
4871
- if (!NtCreateSection)
4872
- return MDB_PROBLEM;
4873
- }
4874
- env->ovs = 0;
4875
4440
  #endif /* _WIN32 */
4876
4441
 
4877
4442
  #ifdef BROKEN_FDATASYNC
@@ -4922,7 +4487,7 @@ mdb_env_open2(MDB_env *env, int prev)
4922
4487
  }
4923
4488
  #endif
4924
4489
 
4925
- if ((i = mdb_env_read_header(env, prev, &meta)) != 0) {
4490
+ if ((i = mdb_env_read_header(env, &meta)) != 0) {
4926
4491
  if (i != ENOENT)
4927
4492
  return i;
4928
4493
  DPUTS("new mdbenv");
@@ -4945,7 +4510,7 @@ mdb_env_open2(MDB_env *env, int prev)
4945
4510
  /* Make sure mapsize >= committed data size. Even when using
4946
4511
  * mm_mapsize, which could be broken in old files (ITS#7789).
4947
4512
  */
4948
- mdb_size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize;
4513
+ size_t minsize = (meta.mm_last_pg + 1) * meta.mm_psize;
4949
4514
  if (env->me_mapsize < minsize)
4950
4515
  env->me_mapsize = minsize;
4951
4516
  }
@@ -4964,18 +4529,6 @@ mdb_env_open2(MDB_env *env, int prev)
4964
4529
  return rc;
4965
4530
  newenv = 0;
4966
4531
  }
4967
- #ifdef _WIN32
4968
- /* For FIXEDMAP, make sure the file is non-empty before we attempt to map it */
4969
- if (newenv) {
4970
- char dummy = 0;
4971
- DWORD len;
4972
- rc = WriteFile(env->me_fd, &dummy, 1, &len, NULL);
4973
- if (!rc) {
4974
- rc = ErrCode();
4975
- return rc;
4976
- }
4977
- }
4978
- #endif
4979
4532
 
4980
4533
  rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL);
4981
4534
  if (rc)
@@ -5005,13 +4558,13 @@ mdb_env_open2(MDB_env *env, int prev)
5005
4558
 
5006
4559
  DPRINTF(("opened database version %u, pagesize %u",
5007
4560
  meta->mm_version, env->me_psize));
5008
- DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1)));
5009
- DPRINTF(("depth: %u", db->md_depth));
5010
- DPRINTF(("entries: %"Yu, db->md_entries));
5011
- DPRINTF(("branch pages: %"Yu, db->md_branch_pages));
5012
- DPRINTF(("leaf pages: %"Yu, db->md_leaf_pages));
5013
- DPRINTF(("overflow pages: %"Yu, db->md_overflow_pages));
5014
- DPRINTF(("root: %"Yu, db->md_root));
4561
+ DPRINTF(("using meta page %d", (int) (meta->mm_txnid & 1)));
4562
+ DPRINTF(("depth: %u", db->md_depth));
4563
+ DPRINTF(("entries: %"Z"u", db->md_entries));
4564
+ DPRINTF(("branch pages: %"Z"u", db->md_branch_pages));
4565
+ DPRINTF(("leaf pages: %"Z"u", db->md_leaf_pages));
4566
+ DPRINTF(("overflow pages: %"Z"u", db->md_overflow_pages));
4567
+ DPRINTF(("root: %"Z"u", db->md_root));
5015
4568
  }
5016
4569
  #endif
5017
4570
 
@@ -5207,21 +4760,32 @@ mdb_env_excl_lock(MDB_env *env, int *excl)
5207
4760
  * Share and Enjoy! :-)
5208
4761
  */
5209
4762
 
4763
+ typedef unsigned long long mdb_hash_t;
4764
+ #define MDB_HASH_INIT ((mdb_hash_t)0xcbf29ce484222325ULL)
4765
+
5210
4766
  /** perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer
5211
4767
  * @param[in] val value to hash
5212
- * @param[in] len length of value
4768
+ * @param[in] hval initial value for hash
5213
4769
  * @return 64 bit hash
4770
+ *
4771
+ * NOTE: To use the recommended 64 bit FNV-1a hash, use MDB_HASH_INIT as the
4772
+ * hval arg on the first call.
5214
4773
  */
5215
4774
  static mdb_hash_t
5216
- mdb_hash(const void *val, size_t len)
4775
+ mdb_hash_val(MDB_val *val, mdb_hash_t hval)
5217
4776
  {
5218
- const unsigned char *s = (const unsigned char *) val, *end = s + len;
5219
- mdb_hash_t hval = 0xcbf29ce484222325ULL;
4777
+ unsigned char *s = (unsigned char *)val->mv_data; /* unsigned string */
4778
+ unsigned char *end = s + val->mv_size;
5220
4779
  /*
5221
- * FNV-1a hash each octet of the buffer
4780
+ * FNV-1a hash each octet of the string
5222
4781
  */
5223
4782
  while (s < end) {
5224
- hval = (hval ^ *s++) * 0x100000001b3ULL;
4783
+ /* xor the bottom with the current octet */
4784
+ hval ^= (mdb_hash_t)*s++;
4785
+
4786
+ /* multiply by the 64 bit FNV magic prime mod 2^64 */
4787
+ hval += (hval << 1) + (hval << 4) + (hval << 5) +
4788
+ (hval << 7) + (hval << 8) + (hval << 40);
5225
4789
  }
5226
4790
  /* return our new hash value */
5227
4791
  return hval;
@@ -5238,33 +4802,25 @@ mdb_hash(const void *val, size_t len)
5238
4802
  static const char mdb_a85[]= "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
5239
4803
 
5240
4804
  static void ESECT
5241
- mdb_pack85(unsigned long long l, char *out)
4805
+ mdb_pack85(unsigned long l, char *out)
5242
4806
  {
5243
4807
  int i;
5244
4808
 
5245
- for (i=0; i<10 && l; i++) {
4809
+ for (i=0; i<5; i++) {
5246
4810
  *out++ = mdb_a85[l % 85];
5247
4811
  l /= 85;
5248
4812
  }
5249
- *out = '\0';
5250
4813
  }
5251
4814
 
5252
- /** Init #MDB_env.me_mutexname[] except the char which #MUTEXNAME() will set.
5253
- * Changes to this code must be reflected in #MDB_LOCK_FORMAT.
5254
- */
5255
4815
  static void ESECT
5256
- mdb_env_mname_init(MDB_env *env)
4816
+ mdb_hash_enc(MDB_val *val, char *encbuf)
5257
4817
  {
5258
- char *nm = env->me_mutexname;
5259
- strcpy(nm, MUTEXNAME_PREFIX);
5260
- mdb_pack85(env->me_txns->mti_mutexid, nm + sizeof(MUTEXNAME_PREFIX));
5261
- }
5262
-
5263
- /** Return env->me_mutexname after filling in ch ('r'/'w') for convenience */
5264
- #define MUTEXNAME(env, ch) ( \
5265
- (void) ((env)->me_mutexname[sizeof(MUTEXNAME_PREFIX)-1] = (ch)), \
5266
- (env)->me_mutexname)
4818
+ mdb_hash_t h = mdb_hash_val(val, MDB_HASH_INIT);
5267
4819
 
4820
+ mdb_pack85(h, encbuf);
4821
+ mdb_pack85(h>>32, encbuf+5);
4822
+ encbuf[10] = '\0';
4823
+ }
5268
4824
  #endif
5269
4825
 
5270
4826
  /** Open and/or initialize the lock region for the environment.
@@ -5281,13 +4837,9 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5281
4837
  # define MDB_ERRCODE_ROFS ERROR_WRITE_PROTECT
5282
4838
  #else
5283
4839
  # define MDB_ERRCODE_ROFS EROFS
5284
- #endif
5285
- #ifdef MDB_USE_SYSV_SEM
5286
- int semid;
5287
- union semun semu;
5288
4840
  #endif
5289
4841
  int rc;
5290
- MDB_OFF_T size, rsize;
4842
+ off_t size, rsize;
5291
4843
 
5292
4844
  rc = mdb_fopen(env, fname, MDB_O_LOCKS, mode, &env->me_lfd);
5293
4845
  if (rc) {
@@ -5362,6 +4914,8 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5362
4914
  DWORD nhigh;
5363
4915
  DWORD nlow;
5364
4916
  } idbuf;
4917
+ MDB_val val;
4918
+ char encbuf[11];
5365
4919
 
5366
4920
  if (!mdb_sec_inited) {
5367
4921
  InitializeSecurityDescriptor(&mdb_null_sd,
@@ -5376,11 +4930,14 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5376
4930
  idbuf.volume = stbuf.dwVolumeSerialNumber;
5377
4931
  idbuf.nhigh = stbuf.nFileIndexHigh;
5378
4932
  idbuf.nlow = stbuf.nFileIndexLow;
5379
- env->me_txns->mti_mutexid = mdb_hash(&idbuf, sizeof(idbuf));
5380
- mdb_env_mname_init(env);
5381
- env->me_rmutex = CreateMutexA(&mdb_all_sa, FALSE, MUTEXNAME(env, 'r'));
4933
+ val.mv_data = &idbuf;
4934
+ val.mv_size = sizeof(idbuf);
4935
+ mdb_hash_enc(&val, encbuf);
4936
+ sprintf(env->me_txns->mti_rmname, "Global\\MDBr%s", encbuf);
4937
+ sprintf(env->me_txns->mti_wmname, "Global\\MDBw%s", encbuf);
4938
+ env->me_rmutex = CreateMutexA(&mdb_all_sa, FALSE, env->me_txns->mti_rmname);
5382
4939
  if (!env->me_rmutex) goto fail_errno;
5383
- env->me_wmutex = CreateMutexA(&mdb_all_sa, FALSE, MUTEXNAME(env, 'w'));
4940
+ env->me_wmutex = CreateMutexA(&mdb_all_sa, FALSE, env->me_txns->mti_wmname);
5384
4941
  if (!env->me_wmutex) goto fail_errno;
5385
4942
  #elif defined(MDB_USE_POSIX_SEM)
5386
4943
  struct stat stbuf;
@@ -5388,46 +4945,34 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5388
4945
  dev_t dev;
5389
4946
  ino_t ino;
5390
4947
  } idbuf;
4948
+ MDB_val val;
4949
+ char encbuf[11];
5391
4950
 
5392
4951
  #if defined(__NetBSD__)
5393
4952
  #define MDB_SHORT_SEMNAMES 1 /* limited to 14 chars */
5394
4953
  #endif
5395
4954
  if (fstat(env->me_lfd, &stbuf)) goto fail_errno;
5396
- memset(&idbuf, 0, sizeof(idbuf));
5397
4955
  idbuf.dev = stbuf.st_dev;
5398
4956
  idbuf.ino = stbuf.st_ino;
5399
- env->me_txns->mti_mutexid = mdb_hash(&idbuf, sizeof(idbuf))
4957
+ val.mv_data = &idbuf;
4958
+ val.mv_size = sizeof(idbuf);
4959
+ mdb_hash_enc(&val, encbuf);
5400
4960
  #ifdef MDB_SHORT_SEMNAMES
5401
- /* Max 9 base85-digits. We truncate here instead of in
5402
- * mdb_env_mname_init() to keep the latter portable.
5403
- */
5404
- % ((mdb_hash_t)85*85*85*85*85*85*85*85*85)
4961
+ encbuf[9] = '\0'; /* drop name from 15 chars to 14 chars */
5405
4962
  #endif
5406
- ;
5407
- mdb_env_mname_init(env);
4963
+ sprintf(env->me_txns->mti_rmname, "/MDBr%s", encbuf);
4964
+ sprintf(env->me_txns->mti_wmname, "/MDBw%s", encbuf);
5408
4965
  /* Clean up after a previous run, if needed: Try to
5409
4966
  * remove both semaphores before doing anything else.
5410
4967
  */
5411
- sem_unlink(MUTEXNAME(env, 'r'));
5412
- sem_unlink(MUTEXNAME(env, 'w'));
5413
- env->me_rmutex = sem_open(MUTEXNAME(env, 'r'), O_CREAT|O_EXCL, mode, 1);
4968
+ sem_unlink(env->me_txns->mti_rmname);
4969
+ sem_unlink(env->me_txns->mti_wmname);
4970
+ env->me_rmutex = sem_open(env->me_txns->mti_rmname,
4971
+ O_CREAT|O_EXCL, mode, 1);
5414
4972
  if (env->me_rmutex == SEM_FAILED) goto fail_errno;
5415
- env->me_wmutex = sem_open(MUTEXNAME(env, 'w'), O_CREAT|O_EXCL, mode, 1);
4973
+ env->me_wmutex = sem_open(env->me_txns->mti_wmname,
4974
+ O_CREAT|O_EXCL, mode, 1);
5416
4975
  if (env->me_wmutex == SEM_FAILED) goto fail_errno;
5417
- #elif defined(MDB_USE_SYSV_SEM)
5418
- unsigned short vals[2] = {1, 1};
5419
- key_t key = ftok(fname->mn_val, 'M'); /* fname is lockfile path now */
5420
- if (key == -1)
5421
- goto fail_errno;
5422
- semid = semget(key, 2, (mode & 0777) | IPC_CREAT);
5423
- if (semid < 0)
5424
- goto fail_errno;
5425
- semu.array = vals;
5426
- if (semctl(semid, 0, SETALL, semu) < 0)
5427
- goto fail_errno;
5428
- env->me_txns->mti_semid = semid;
5429
- env->me_txns->mti_rlocked = 0;
5430
- env->me_txns->mti_wlocked = 0;
5431
4976
  #else /* MDB_USE_POSIX_MUTEX: */
5432
4977
  pthread_mutexattr_t mattr;
5433
4978
 
@@ -5438,8 +4983,9 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5438
4983
  memset(env->me_txns->mti_rmutex, 0, sizeof(*env->me_txns->mti_rmutex));
5439
4984
  memset(env->me_txns->mti_wmutex, 0, sizeof(*env->me_txns->mti_wmutex));
5440
4985
 
5441
- if ((rc = pthread_mutexattr_init(&mattr)) != 0)
4986
+ if ((rc = pthread_mutexattr_init(&mattr)))
5442
4987
  goto fail;
4988
+
5443
4989
  rc = pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
5444
4990
  #ifdef MDB_ROBUST_SUPPORTED
5445
4991
  if (!rc) rc = pthread_mutexattr_setrobust(&mattr, PTHREAD_MUTEX_ROBUST);
@@ -5449,7 +4995,7 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5449
4995
  pthread_mutexattr_destroy(&mattr);
5450
4996
  if (rc)
5451
4997
  goto fail;
5452
- #endif /* _WIN32 || ... */
4998
+ #endif /* _WIN32 || MDB_USE_POSIX_SEM */
5453
4999
 
5454
5000
  env->me_txns->mti_magic = MDB_MAGIC;
5455
5001
  env->me_txns->mti_format = MDB_LOCK_FORMAT;
@@ -5457,9 +5003,6 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5457
5003
  env->me_txns->mti_numreaders = 0;
5458
5004
 
5459
5005
  } else {
5460
- #ifdef MDB_USE_SYSV_SEM
5461
- struct semid_ds buf;
5462
- #endif
5463
5006
  if (env->me_txns->mti_magic != MDB_MAGIC) {
5464
5007
  DPUTS("lock region has invalid magic");
5465
5008
  rc = MDB_INVALID;
@@ -5476,37 +5019,17 @@ mdb_env_setup_locks(MDB_env *env, MDB_name *fname, int mode, int *excl)
5476
5019
  goto fail;
5477
5020
  }
5478
5021
  #ifdef _WIN32
5479
- mdb_env_mname_init(env);
5480
- env->me_rmutex = OpenMutexA(SYNCHRONIZE, FALSE, MUTEXNAME(env, 'r'));
5022
+ env->me_rmutex = OpenMutexA(SYNCHRONIZE, FALSE, env->me_txns->mti_rmname);
5481
5023
  if (!env->me_rmutex) goto fail_errno;
5482
- env->me_wmutex = OpenMutexA(SYNCHRONIZE, FALSE, MUTEXNAME(env, 'w'));
5024
+ env->me_wmutex = OpenMutexA(SYNCHRONIZE, FALSE, env->me_txns->mti_wmname);
5483
5025
  if (!env->me_wmutex) goto fail_errno;
5484
5026
  #elif defined(MDB_USE_POSIX_SEM)
5485
- mdb_env_mname_init(env);
5486
- env->me_rmutex = sem_open(MUTEXNAME(env, 'r'), 0);
5027
+ env->me_rmutex = sem_open(env->me_txns->mti_rmname, 0);
5487
5028
  if (env->me_rmutex == SEM_FAILED) goto fail_errno;
5488
- env->me_wmutex = sem_open(MUTEXNAME(env, 'w'), 0);
5029
+ env->me_wmutex = sem_open(env->me_txns->mti_wmname, 0);
5489
5030
  if (env->me_wmutex == SEM_FAILED) goto fail_errno;
5490
- #elif defined(MDB_USE_SYSV_SEM)
5491
- semid = env->me_txns->mti_semid;
5492
- semu.buf = &buf;
5493
- /* check for read access */
5494
- if (semctl(semid, 0, IPC_STAT, semu) < 0)
5495
- goto fail_errno;
5496
- /* check for write access */
5497
- if (semctl(semid, 0, IPC_SET, semu) < 0)
5498
- goto fail_errno;
5499
5031
  #endif
5500
5032
  }
5501
- #ifdef MDB_USE_SYSV_SEM
5502
- env->me_rmutex->semid = semid;
5503
- env->me_wmutex->semid = semid;
5504
- env->me_rmutex->semnum = 0;
5505
- env->me_wmutex->semnum = 1;
5506
- env->me_rmutex->locked = &env->me_txns->mti_rlocked;
5507
- env->me_wmutex->locked = &env->me_txns->mti_wlocked;
5508
- #endif
5509
-
5510
5033
  return MDB_SUCCESS;
5511
5034
 
5512
5035
  fail_errno:
@@ -5521,7 +5044,7 @@ fail:
5521
5044
  */
5522
5045
  #define CHANGEABLE (MDB_NOSYNC|MDB_NOMETASYNC|MDB_MAPASYNC|MDB_NOMEMINIT)
5523
5046
  #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY| \
5524
- MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD|MDB_PREVSNAPSHOT)
5047
+ MDB_WRITEMAP|MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
5525
5048
 
5526
5049
  #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
5527
5050
  # error "Persistent DB flags & env flags overlap, but both go in mm_flags"
@@ -5536,37 +5059,12 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5536
5059
  if (env->me_fd!=INVALID_HANDLE_VALUE || (flags & ~(CHANGEABLE|CHANGELESS)))
5537
5060
  return EINVAL;
5538
5061
 
5539
- #ifdef MDB_VL32
5540
- if (flags & MDB_WRITEMAP) {
5541
- /* silently ignore WRITEMAP in 32 bit mode */
5542
- flags ^= MDB_WRITEMAP;
5543
- }
5544
- if (flags & MDB_FIXEDMAP) {
5545
- /* cannot support FIXEDMAP */
5546
- return EINVAL;
5547
- }
5548
- #endif
5549
5062
  flags |= env->me_flags;
5550
5063
 
5551
5064
  rc = mdb_fname_init(path, flags, &fname);
5552
5065
  if (rc)
5553
5066
  return rc;
5554
5067
 
5555
- #ifdef MDB_VL32
5556
- #ifdef _WIN32
5557
- env->me_rpmutex = CreateMutex(NULL, FALSE, NULL);
5558
- if (!env->me_rpmutex) {
5559
- rc = ErrCode();
5560
- goto leave;
5561
- }
5562
- #else
5563
- rc = pthread_mutex_init(&env->me_rpmutex, NULL);
5564
- if (rc)
5565
- goto leave;
5566
- #endif
5567
- #endif
5568
- flags |= MDB_ENV_ACTIVE; /* tell mdb_env_close0() to clean up */
5569
-
5570
5068
  if (flags & MDB_RDONLY) {
5571
5069
  /* silently ignore WRITEMAP when we're only getting read access */
5572
5070
  flags &= ~MDB_WRITEMAP;
@@ -5575,23 +5073,10 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5575
5073
  (env->me_dirty_list = calloc(MDB_IDL_UM_SIZE, sizeof(MDB_ID2)))))
5576
5074
  rc = ENOMEM;
5577
5075
  }
5578
-
5579
- env->me_flags = flags;
5076
+ env->me_flags = flags |= MDB_ENV_ACTIVE;
5580
5077
  if (rc)
5581
5078
  goto leave;
5582
5079
 
5583
- #ifdef MDB_VL32
5584
- {
5585
- env->me_rpages = malloc(MDB_ERPAGE_SIZE * sizeof(MDB_ID3));
5586
- if (!env->me_rpages) {
5587
- rc = ENOMEM;
5588
- goto leave;
5589
- }
5590
- env->me_rpages[0].mid = 0;
5591
- env->me_rpcheck = MDB_ERPAGE_SIZE/2;
5592
- }
5593
- #endif
5594
-
5595
5080
  env->me_path = strdup(path);
5596
5081
  env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
5597
5082
  env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t));
@@ -5607,10 +5092,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5607
5092
  rc = mdb_env_setup_locks(env, &fname, mode, &excl);
5608
5093
  if (rc)
5609
5094
  goto leave;
5610
- if ((flags & MDB_PREVSNAPSHOT) && !excl) {
5611
- rc = EAGAIN;
5612
- goto leave;
5613
- }
5614
5095
  }
5615
5096
 
5616
5097
  rc = mdb_fopen(env, &fname,
@@ -5618,11 +5099,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5618
5099
  mode, &env->me_fd);
5619
5100
  if (rc)
5620
5101
  goto leave;
5621
- #ifdef _WIN32
5622
- rc = mdb_fopen(env, &fname, MDB_O_OVERLAPPED, mode, &env->me_ovfd);
5623
- if (rc)
5624
- goto leave;
5625
- #endif
5626
5102
 
5627
5103
  if ((flags & (MDB_RDONLY|MDB_NOLOCK)) == MDB_RDONLY) {
5628
5104
  rc = mdb_env_setup_locks(env, &fname, mode, &excl);
@@ -5630,16 +5106,17 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5630
5106
  goto leave;
5631
5107
  }
5632
5108
 
5633
- if ((rc = mdb_env_open2(env, flags & MDB_PREVSNAPSHOT)) == MDB_SUCCESS) {
5634
- /* Synchronous fd for meta writes. Needed even with
5635
- * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset.
5636
- */
5637
- rc = mdb_fopen(env, &fname, MDB_O_META, mode, &env->me_mfd);
5638
- if (rc)
5639
- goto leave;
5640
-
5109
+ if ((rc = mdb_env_open2(env)) == MDB_SUCCESS) {
5110
+ if (!(flags & (MDB_RDONLY|MDB_WRITEMAP))) {
5111
+ /* Synchronous fd for meta writes. Needed even with
5112
+ * MDB_NOSYNC/MDB_NOMETASYNC, in case these get reset.
5113
+ */
5114
+ rc = mdb_fopen(env, &fname, MDB_O_META, mode, &env->me_mfd);
5115
+ if (rc)
5116
+ goto leave;
5117
+ }
5641
5118
  DPRINTF(("opened dbenv %p", (void *) env));
5642
- if (excl > 0 && !(flags & MDB_PREVSNAPSHOT)) {
5119
+ if (excl > 0) {
5643
5120
  rc = mdb_env_share_locks(env, &excl);
5644
5121
  if (rc)
5645
5122
  goto leave;
@@ -5656,16 +5133,6 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5656
5133
  txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
5657
5134
  txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
5658
5135
  txn->mt_env = env;
5659
- #ifdef MDB_VL32
5660
- txn->mt_rpages = malloc(MDB_TRPAGE_SIZE * sizeof(MDB_ID3));
5661
- if (!txn->mt_rpages) {
5662
- free(txn);
5663
- rc = ENOMEM;
5664
- goto leave;
5665
- }
5666
- txn->mt_rpages[0].mid = 0;
5667
- txn->mt_rpcheck = MDB_TRPAGE_SIZE/2;
5668
- #endif
5669
5136
  txn->mt_dbxs = env->me_dbxs;
5670
5137
  txn->mt_flags = MDB_TXN_FINISHED;
5671
5138
  env->me_txn0 = txn;
@@ -5676,6 +5143,7 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
5676
5143
  }
5677
5144
 
5678
5145
  leave:
5146
+ MDB_TRACE(("%p, %s, %u, %04o", env, path, flags & (CHANGEABLE|CHANGELESS), mode));
5679
5147
  if (rc) {
5680
5148
  mdb_env_close0(env, excl);
5681
5149
  }
@@ -5704,17 +5172,6 @@ mdb_env_close0(MDB_env *env, int excl)
5704
5172
  free(env->me_dbflags);
5705
5173
  free(env->me_path);
5706
5174
  free(env->me_dirty_list);
5707
- #ifdef MDB_VL32
5708
- if (env->me_txn0 && env->me_txn0->mt_rpages)
5709
- free(env->me_txn0->mt_rpages);
5710
- if (env->me_rpages) {
5711
- MDB_ID3L el = env->me_rpages;
5712
- unsigned int x;
5713
- for (x=1; x<=el[0].mid; x++)
5714
- munmap(el[x].mptr, el[x].mcnt * env->me_psize);
5715
- free(el);
5716
- }
5717
- #endif
5718
5175
  free(env->me_txn0);
5719
5176
  mdb_midl_free(env->me_free_pgs);
5720
5177
 
@@ -5732,24 +5189,10 @@ mdb_env_close0(MDB_env *env, int excl)
5732
5189
  }
5733
5190
 
5734
5191
  if (env->me_map) {
5735
- #ifdef MDB_VL32
5736
- munmap(env->me_map, NUM_METAS*env->me_psize);
5737
- #else
5738
5192
  munmap(env->me_map, env->me_mapsize);
5739
- #endif
5740
5193
  }
5741
5194
  if (env->me_mfd != INVALID_HANDLE_VALUE)
5742
5195
  (void) close(env->me_mfd);
5743
- #ifdef _WIN32
5744
- if (env->ovs > 0) {
5745
- for (i = 0; i < env->ovs; i++) {
5746
- CloseHandle(env->ov[i].hEvent);
5747
- }
5748
- free(env->ov);
5749
- }
5750
- if (env->me_ovfd != INVALID_HANDLE_VALUE)
5751
- (void) close(env->me_ovfd);
5752
- #endif
5753
5196
  if (env->me_fd != INVALID_HANDLE_VALUE)
5754
5197
  (void) close(env->me_fd);
5755
5198
  if (env->me_txns) {
@@ -5783,31 +5226,10 @@ mdb_env_close0(MDB_env *env, int excl)
5783
5226
  if (excl == 0)
5784
5227
  mdb_env_excl_lock(env, &excl);
5785
5228
  if (excl > 0) {
5786
- sem_unlink(MUTEXNAME(env, 'r'));
5787
- sem_unlink(MUTEXNAME(env, 'w'));
5229
+ sem_unlink(env->me_txns->mti_rmname);
5230
+ sem_unlink(env->me_txns->mti_wmname);
5788
5231
  }
5789
5232
  }
5790
- #elif defined(MDB_USE_SYSV_SEM)
5791
- if (env->me_rmutex->semid != -1) {
5792
- /* If we have the filelock: If we are the
5793
- * only remaining user, clean up semaphores.
5794
- */
5795
- if (excl == 0)
5796
- mdb_env_excl_lock(env, &excl);
5797
- if (excl > 0)
5798
- semctl(env->me_rmutex->semid, 0, IPC_RMID);
5799
- }
5800
- #elif defined(MDB_ROBUST_SUPPORTED)
5801
- /* If we have the filelock: If we are the
5802
- * only remaining user, clean up robust
5803
- * mutexes.
5804
- */
5805
- if (excl == 0)
5806
- mdb_env_excl_lock(env, &excl);
5807
- if (excl > 0) {
5808
- pthread_mutex_destroy(env->me_txns->mti_rmutex);
5809
- pthread_mutex_destroy(env->me_txns->mti_wmutex);
5810
- }
5811
5233
  #endif
5812
5234
  munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
5813
5235
  }
@@ -5822,14 +5244,6 @@ mdb_env_close0(MDB_env *env, int excl)
5822
5244
  #endif
5823
5245
  (void) close(env->me_lfd);
5824
5246
  }
5825
- #ifdef MDB_VL32
5826
- #ifdef _WIN32
5827
- if (env->me_fmh) CloseHandle(env->me_fmh);
5828
- if (env->me_rpmutex) CloseHandle(env->me_rpmutex);
5829
- #else
5830
- pthread_mutex_destroy(&env->me_rpmutex);
5831
- #endif
5832
- #endif
5833
5247
 
5834
5248
  env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
5835
5249
  }
@@ -5842,6 +5256,7 @@ mdb_env_close(MDB_env *env)
5842
5256
  if (env == NULL)
5843
5257
  return;
5844
5258
 
5259
+ MDB_TRACE(("%p", env));
5845
5260
  VGMEMP_DESTROY(env);
5846
5261
  while ((dp = env->me_dpages) != NULL) {
5847
5262
  VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
@@ -5853,18 +5268,18 @@ mdb_env_close(MDB_env *env)
5853
5268
  free(env);
5854
5269
  }
5855
5270
 
5856
- /** Compare two items pointing at aligned #mdb_size_t's */
5271
+ /** Compare two items pointing at aligned size_t's */
5857
5272
  static int
5858
5273
  mdb_cmp_long(const MDB_val *a, const MDB_val *b)
5859
5274
  {
5860
- return (*(mdb_size_t *)a->mv_data < *(mdb_size_t *)b->mv_data) ? -1 :
5861
- *(mdb_size_t *)a->mv_data > *(mdb_size_t *)b->mv_data;
5275
+ return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
5276
+ *(size_t *)a->mv_data > *(size_t *)b->mv_data;
5862
5277
  }
5863
5278
 
5864
5279
  /** Compare two items pointing at aligned unsigned int's.
5865
5280
  *
5866
5281
  * This is also set as #MDB_INTEGERDUP|#MDB_DUPFIXED's #MDB_dbx.%md_dcmp,
5867
- * but #mdb_cmp_clong() is called instead if the data type is #mdb_size_t.
5282
+ * but #mdb_cmp_clong() is called instead if the data type is size_t.
5868
5283
  */
5869
5284
  static int
5870
5285
  mdb_cmp_int(const MDB_val *a, const MDB_val *b)
@@ -5969,7 +5384,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
5969
5384
 
5970
5385
  nkeys = NUMKEYS(mp);
5971
5386
 
5972
- DPRINTF(("searching %u keys in %s %spage %"Yu,
5387
+ DPRINTF(("searching %u keys in %s %spage %"Z"u",
5973
5388
  nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "",
5974
5389
  mdb_dbg_pgno(mp)));
5975
5390
 
@@ -5981,7 +5396,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
5981
5396
  * alignment is guaranteed. Use faster mdb_cmp_int.
5982
5397
  */
5983
5398
  if (cmp == mdb_cmp_cint && IS_BRANCH(mp)) {
5984
- if (NODEPTR(mp, 1)->mn_ksize == sizeof(mdb_size_t))
5399
+ if (NODEPTR(mp, 1)->mn_ksize == sizeof(size_t))
5985
5400
  cmp = mdb_cmp_long;
5986
5401
  else
5987
5402
  cmp = mdb_cmp_int;
@@ -6017,7 +5432,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
6017
5432
  DPRINTF(("found leaf index %u [%s], rc = %i",
6018
5433
  i, DKEY(&nodekey), rc));
6019
5434
  else
6020
- DPRINTF(("found branch index %u [%s -> %"Yu"], rc = %i",
5435
+ DPRINTF(("found branch index %u [%s -> %"Z"u], rc = %i",
6021
5436
  i, DKEY(&nodekey), NODEPGNO(node), rc));
6022
5437
  #endif
6023
5438
  if (rc == 0)
@@ -6065,7 +5480,7 @@ static void
6065
5480
  mdb_cursor_pop(MDB_cursor *mc)
6066
5481
  {
6067
5482
  if (mc->mc_snum) {
6068
- DPRINTF(("popping page %"Yu" off db %d cursor %p",
5483
+ DPRINTF(("popping page %"Z"u off db %d cursor %p",
6069
5484
  mc->mc_pg[mc->mc_top]->mp_pgno, DDBI(mc), (void *) mc));
6070
5485
 
6071
5486
  mc->mc_snum--;
@@ -6083,7 +5498,7 @@ mdb_cursor_pop(MDB_cursor *mc)
6083
5498
  static int
6084
5499
  mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
6085
5500
  {
6086
- DPRINTF(("pushing page %"Yu" on db %d cursor %p", mp->mp_pgno,
5501
+ DPRINTF(("pushing page %"Z"u on db %d cursor %p", mp->mp_pgno,
6087
5502
  DDBI(mc), (void *) mc));
6088
5503
 
6089
5504
  if (mc->mc_snum >= CURSOR_STACK) {
@@ -6098,294 +5513,6 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
6098
5513
  return MDB_SUCCESS;
6099
5514
  }
6100
5515
 
6101
- #ifdef MDB_VL32
6102
- /** Map a read-only page.
6103
- * There are two levels of tracking in use, a per-txn list and a per-env list.
6104
- * ref'ing and unref'ing the per-txn list is faster since it requires no
6105
- * locking. Pages are cached in the per-env list for global reuse, and a lock
6106
- * is required. Pages are not immediately unmapped when their refcnt goes to
6107
- * zero; they hang around in case they will be reused again soon.
6108
- *
6109
- * When the per-txn list gets full, all pages with refcnt=0 are purged from the
6110
- * list and their refcnts in the per-env list are decremented.
6111
- *
6112
- * When the per-env list gets full, all pages with refcnt=0 are purged from the
6113
- * list and their pages are unmapped.
6114
- *
6115
- * @note "full" means the list has reached its respective rpcheck threshold.
6116
- * This threshold slowly raises if no pages could be purged on a given check,
6117
- * and returns to its original value when enough pages were purged.
6118
- *
6119
- * If purging doesn't free any slots, filling the per-txn list will return
6120
- * MDB_TXN_FULL, and filling the per-env list returns MDB_MAP_FULL.
6121
- *
6122
- * Reference tracking in a txn is imperfect, pages can linger with non-zero
6123
- * refcnt even without active references. It was deemed to be too invasive
6124
- * to add unrefs in every required location. However, all pages are unref'd
6125
- * at the end of the transaction. This guarantees that no stale references
6126
- * linger in the per-env list.
6127
- *
6128
- * Usually we map chunks of 16 pages at a time, but if an overflow page begins
6129
- * at the tail of the chunk we extend the chunk to include the entire overflow
6130
- * page. Unfortunately, pages can be turned into overflow pages after their
6131
- * chunk was already mapped. In that case we must remap the chunk if the
6132
- * overflow page is referenced. If the chunk's refcnt is 0 we can just remap
6133
- * it, otherwise we temporarily map a new chunk just for the overflow page.
6134
- *
6135
- * @note this chunk handling means we cannot guarantee that a data item
6136
- * returned from the DB will stay alive for the duration of the transaction:
6137
- * We unref pages as soon as a cursor moves away from the page
6138
- * A subsequent op may cause a purge, which may unmap any unref'd chunks
6139
- * The caller must copy the data if it must be used later in the same txn.
6140
- *
6141
- * Also - our reference counting revolves around cursors, but overflow pages
6142
- * aren't pointed to by a cursor's page stack. We have to remember them
6143
- * explicitly, in the added mc_ovpg field. A single cursor can only hold a
6144
- * reference to one overflow page at a time.
6145
- *
6146
- * @param[in] txn the transaction for this access.
6147
- * @param[in] pgno the page number for the page to retrieve.
6148
- * @param[out] ret address of a pointer where the page's address will be stored.
6149
- * @return 0 on success, non-zero on failure.
6150
- */
6151
- static int
6152
- mdb_rpage_get(MDB_txn *txn, pgno_t pg0, MDB_page **ret)
6153
- {
6154
- MDB_env *env = txn->mt_env;
6155
- MDB_page *p;
6156
- MDB_ID3L tl = txn->mt_rpages;
6157
- MDB_ID3L el = env->me_rpages;
6158
- MDB_ID3 id3;
6159
- unsigned x, rem;
6160
- pgno_t pgno;
6161
- int rc, retries = 1;
6162
- #ifdef _WIN32
6163
- LARGE_INTEGER off;
6164
- SIZE_T len;
6165
- #define SET_OFF(off,val) off.QuadPart = val
6166
- #define MAP(rc,env,addr,len,off) \
6167
- addr = NULL; \
6168
- rc = NtMapViewOfSection(env->me_fmh, GetCurrentProcess(), &addr, 0, \
6169
- len, &off, &len, ViewUnmap, (env->me_flags & MDB_RDONLY) ? 0 : MEM_RESERVE, PAGE_READONLY); \
6170
- if (rc) rc = mdb_nt2win32(rc)
6171
- #else
6172
- off_t off;
6173
- size_t len;
6174
- #define SET_OFF(off,val) off = val
6175
- #define MAP(rc,env,addr,len,off) \
6176
- addr = mmap(NULL, len, PROT_READ, MAP_SHARED, env->me_fd, off); \
6177
- rc = (addr == MAP_FAILED) ? errno : 0
6178
- #endif
6179
-
6180
- /* remember the offset of the actual page number, so we can
6181
- * return the correct pointer at the end.
6182
- */
6183
- rem = pg0 & (MDB_RPAGE_CHUNK-1);
6184
- pgno = pg0 ^ rem;
6185
-
6186
- id3.mid = 0;
6187
- x = mdb_mid3l_search(tl, pgno);
6188
- if (x <= tl[0].mid && tl[x].mid == pgno) {
6189
- if (x != tl[0].mid && tl[x+1].mid == pg0)
6190
- x++;
6191
- /* check for overflow size */
6192
- p = (MDB_page *)((char *)tl[x].mptr + rem * env->me_psize);
6193
- if (IS_OVERFLOW(p) && p->mp_pages + rem > tl[x].mcnt) {
6194
- id3.mcnt = p->mp_pages + rem;
6195
- len = id3.mcnt * env->me_psize;
6196
- SET_OFF(off, pgno * env->me_psize);
6197
- MAP(rc, env, id3.mptr, len, off);
6198
- if (rc)
6199
- return rc;
6200
- /* check for local-only page */
6201
- if (rem) {
6202
- mdb_tassert(txn, tl[x].mid != pg0);
6203
- /* hope there's room to insert this locally.
6204
- * setting mid here tells later code to just insert
6205
- * this id3 instead of searching for a match.
6206
- */
6207
- id3.mid = pg0;
6208
- goto notlocal;
6209
- } else {
6210
- /* ignore the mapping we got from env, use new one */
6211
- tl[x].mptr = id3.mptr;
6212
- tl[x].mcnt = id3.mcnt;
6213
- /* if no active ref, see if we can replace in env */
6214
- if (!tl[x].mref) {
6215
- unsigned i;
6216
- pthread_mutex_lock(&env->me_rpmutex);
6217
- i = mdb_mid3l_search(el, tl[x].mid);
6218
- if (el[i].mref == 1) {
6219
- /* just us, replace it */
6220
- munmap(el[i].mptr, el[i].mcnt * env->me_psize);
6221
- el[i].mptr = tl[x].mptr;
6222
- el[i].mcnt = tl[x].mcnt;
6223
- } else {
6224
- /* there are others, remove ourself */
6225
- el[i].mref--;
6226
- }
6227
- pthread_mutex_unlock(&env->me_rpmutex);
6228
- }
6229
- }
6230
- }
6231
- id3.mptr = tl[x].mptr;
6232
- id3.mcnt = tl[x].mcnt;
6233
- tl[x].mref++;
6234
- goto ok;
6235
- }
6236
-
6237
- notlocal:
6238
- if (tl[0].mid >= MDB_TRPAGE_MAX - txn->mt_rpcheck) {
6239
- unsigned i, y;
6240
- /* purge unref'd pages from our list and unref in env */
6241
- pthread_mutex_lock(&env->me_rpmutex);
6242
- retry:
6243
- y = 0;
6244
- for (i=1; i<=tl[0].mid; i++) {
6245
- if (!tl[i].mref) {
6246
- if (!y) y = i;
6247
- /* tmp overflow pages don't go to env */
6248
- if (tl[i].mid & (MDB_RPAGE_CHUNK-1)) {
6249
- munmap(tl[i].mptr, tl[i].mcnt * env->me_psize);
6250
- continue;
6251
- }
6252
- x = mdb_mid3l_search(el, tl[i].mid);
6253
- el[x].mref--;
6254
- }
6255
- }
6256
- pthread_mutex_unlock(&env->me_rpmutex);
6257
- if (!y) {
6258
- /* we didn't find any unref'd chunks.
6259
- * if we're out of room, fail.
6260
- */
6261
- if (tl[0].mid >= MDB_TRPAGE_MAX)
6262
- return MDB_TXN_FULL;
6263
- /* otherwise, raise threshold for next time around
6264
- * and let this go.
6265
- */
6266
- txn->mt_rpcheck /= 2;
6267
- } else {
6268
- /* we found some unused; consolidate the list */
6269
- for (i=y+1; i<= tl[0].mid; i++)
6270
- if (tl[i].mref)
6271
- tl[y++] = tl[i];
6272
- tl[0].mid = y-1;
6273
- /* decrease the check threshold toward its original value */
6274
- if (!txn->mt_rpcheck)
6275
- txn->mt_rpcheck = 1;
6276
- while (txn->mt_rpcheck < tl[0].mid && txn->mt_rpcheck < MDB_TRPAGE_SIZE/2)
6277
- txn->mt_rpcheck *= 2;
6278
- }
6279
- }
6280
- if (tl[0].mid < MDB_TRPAGE_SIZE) {
6281
- id3.mref = 1;
6282
- if (id3.mid)
6283
- goto found;
6284
- /* don't map past last written page in read-only envs */
6285
- if ((env->me_flags & MDB_RDONLY) && pgno + MDB_RPAGE_CHUNK-1 > txn->mt_last_pgno)
6286
- id3.mcnt = txn->mt_last_pgno + 1 - pgno;
6287
- else
6288
- id3.mcnt = MDB_RPAGE_CHUNK;
6289
- len = id3.mcnt * env->me_psize;
6290
- id3.mid = pgno;
6291
-
6292
- /* search for page in env */
6293
- pthread_mutex_lock(&env->me_rpmutex);
6294
- x = mdb_mid3l_search(el, pgno);
6295
- if (x <= el[0].mid && el[x].mid == pgno) {
6296
- id3.mptr = el[x].mptr;
6297
- id3.mcnt = el[x].mcnt;
6298
- /* check for overflow size */
6299
- p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
6300
- if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) {
6301
- id3.mcnt = p->mp_pages + rem;
6302
- len = id3.mcnt * env->me_psize;
6303
- SET_OFF(off, pgno * env->me_psize);
6304
- MAP(rc, env, id3.mptr, len, off);
6305
- if (rc)
6306
- goto fail;
6307
- if (!el[x].mref) {
6308
- munmap(el[x].mptr, env->me_psize * el[x].mcnt);
6309
- el[x].mptr = id3.mptr;
6310
- el[x].mcnt = id3.mcnt;
6311
- } else {
6312
- id3.mid = pg0;
6313
- pthread_mutex_unlock(&env->me_rpmutex);
6314
- goto found;
6315
- }
6316
- }
6317
- el[x].mref++;
6318
- pthread_mutex_unlock(&env->me_rpmutex);
6319
- goto found;
6320
- }
6321
- if (el[0].mid >= MDB_ERPAGE_MAX - env->me_rpcheck) {
6322
- /* purge unref'd pages */
6323
- unsigned i, y = 0;
6324
- for (i=1; i<=el[0].mid; i++) {
6325
- if (!el[i].mref) {
6326
- if (!y) y = i;
6327
- munmap(el[i].mptr, env->me_psize * el[i].mcnt);
6328
- }
6329
- }
6330
- if (!y) {
6331
- if (retries) {
6332
- /* see if we can unref some local pages */
6333
- retries--;
6334
- id3.mid = 0;
6335
- goto retry;
6336
- }
6337
- if (el[0].mid >= MDB_ERPAGE_MAX) {
6338
- pthread_mutex_unlock(&env->me_rpmutex);
6339
- return MDB_MAP_FULL;
6340
- }
6341
- env->me_rpcheck /= 2;
6342
- } else {
6343
- for (i=y+1; i<= el[0].mid; i++)
6344
- if (el[i].mref)
6345
- el[y++] = el[i];
6346
- el[0].mid = y-1;
6347
- if (!env->me_rpcheck)
6348
- env->me_rpcheck = 1;
6349
- while (env->me_rpcheck < el[0].mid && env->me_rpcheck < MDB_ERPAGE_SIZE/2)
6350
- env->me_rpcheck *= 2;
6351
- }
6352
- }
6353
- SET_OFF(off, pgno * env->me_psize);
6354
- MAP(rc, env, id3.mptr, len, off);
6355
- if (rc) {
6356
- fail:
6357
- pthread_mutex_unlock(&env->me_rpmutex);
6358
- return rc;
6359
- }
6360
- /* check for overflow size */
6361
- p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
6362
- if (IS_OVERFLOW(p) && p->mp_pages + rem > id3.mcnt) {
6363
- id3.mcnt = p->mp_pages + rem;
6364
- munmap(id3.mptr, len);
6365
- len = id3.mcnt * env->me_psize;
6366
- MAP(rc, env, id3.mptr, len, off);
6367
- if (rc)
6368
- goto fail;
6369
- }
6370
- mdb_mid3l_insert(el, &id3);
6371
- pthread_mutex_unlock(&env->me_rpmutex);
6372
- found:
6373
- mdb_mid3l_insert(tl, &id3);
6374
- } else {
6375
- return MDB_TXN_FULL;
6376
- }
6377
- ok:
6378
- p = (MDB_page *)((char *)id3.mptr + rem * env->me_psize);
6379
- #if MDB_DEBUG /* we don't need this check any more */
6380
- if (IS_OVERFLOW(p)) {
6381
- mdb_tassert(txn, p->mp_pages + rem <= id3.mcnt);
6382
- }
6383
- #endif
6384
- *ret = p;
6385
- return MDB_SUCCESS;
6386
- }
6387
- #endif
6388
-
6389
5516
  /** Find the address of the page corresponding to a given page number.
6390
5517
  * Set #MDB_TXN_ERROR on failure.
6391
5518
  * @param[in] mc the cursor accessing the page.
@@ -6398,10 +5525,11 @@ static int
6398
5525
  mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
6399
5526
  {
6400
5527
  MDB_txn *txn = mc->mc_txn;
5528
+ MDB_env *env = txn->mt_env;
6401
5529
  MDB_page *p = NULL;
6402
5530
  int level;
6403
5531
 
6404
- if (! (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP))) {
5532
+ if (! (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_WRITEMAP))) {
6405
5533
  MDB_txn *tx2 = txn;
6406
5534
  level = 1;
6407
5535
  do {
@@ -6416,7 +5544,8 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
6416
5544
  MDB_ID pn = pgno << 1;
6417
5545
  x = mdb_midl_search(tx2->mt_spill_pgs, pn);
6418
5546
  if (x <= tx2->mt_spill_pgs[0] && tx2->mt_spill_pgs[x] == pn) {
6419
- goto mapped;
5547
+ p = (MDB_page *)(env->me_map + env->me_psize * pgno);
5548
+ goto done;
6420
5549
  }
6421
5550
  }
6422
5551
  if (dl[0].mid) {
@@ -6430,28 +5559,15 @@ mdb_page_get(MDB_cursor *mc, pgno_t pgno, MDB_page **ret, int *lvl)
6430
5559
  } while ((tx2 = tx2->mt_parent) != NULL);
6431
5560
  }
6432
5561
 
6433
- if (pgno >= txn->mt_next_pgno) {
6434
- DPRINTF(("page %"Yu" not found", pgno));
5562
+ if (pgno < txn->mt_next_pgno) {
5563
+ level = 0;
5564
+ p = (MDB_page *)(env->me_map + env->me_psize * pgno);
5565
+ } else {
5566
+ DPRINTF(("page %"Z"u not found", pgno));
6435
5567
  txn->mt_flags |= MDB_TXN_ERROR;
6436
5568
  return MDB_PAGE_NOTFOUND;
6437
5569
  }
6438
5570
 
6439
- level = 0;
6440
-
6441
- mapped:
6442
- {
6443
- #ifdef MDB_VL32
6444
- int rc = mdb_rpage_get(txn, pgno, &p);
6445
- if (rc) {
6446
- txn->mt_flags |= MDB_TXN_ERROR;
6447
- return rc;
6448
- }
6449
- #else
6450
- MDB_env *env = txn->mt_env;
6451
- p = (MDB_page *)(env->me_map + env->me_psize * pgno);
6452
- #endif
6453
- }
6454
-
6455
5571
  done:
6456
5572
  *ret = p;
6457
5573
  if (lvl)
@@ -6473,13 +5589,13 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
6473
5589
  MDB_node *node;
6474
5590
  indx_t i;
6475
5591
 
6476
- DPRINTF(("branch page %"Yu" has %u keys", mp->mp_pgno, NUMKEYS(mp)));
5592
+ DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp)));
6477
5593
  /* Don't assert on branch pages in the FreeDB. We can get here
6478
5594
  * while in the process of rebalancing a FreeDB branch page; we must
6479
5595
  * let that proceed. ITS#8336
6480
5596
  */
6481
5597
  mdb_cassert(mc, !mc->mc_dbi || NUMKEYS(mp) > 1);
6482
- DPRINTF(("found index 0 to page %"Yu, NODEPGNO(NODEPTR(mp, 0))));
5598
+ DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0))));
6483
5599
 
6484
5600
  if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) {
6485
5601
  i = 0;
@@ -6534,7 +5650,7 @@ ready:
6534
5650
  return MDB_CORRUPTED;
6535
5651
  }
6536
5652
 
6537
- DPRINTF(("found leaf page %"Yu" for key [%s]", mp->mp_pgno,
5653
+ DPRINTF(("found leaf page %"Z"u for key [%s]", mp->mp_pgno,
6538
5654
  key ? DKEY(key) : "null"));
6539
5655
  mc->mc_flags |= C_INITIALIZED;
6540
5656
  mc->mc_flags &= ~C_EOF;
@@ -6604,7 +5720,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
6604
5720
  MDB_node *leaf = mdb_node_search(&mc2,
6605
5721
  &mc->mc_dbx->md_name, &exact);
6606
5722
  if (!exact)
6607
- return MDB_NOTFOUND;
5723
+ return MDB_BAD_DBI;
6608
5724
  if ((leaf->mn_flags & (F_DUPDATA|F_SUBDATA)) != F_SUBDATA)
6609
5725
  return MDB_INCOMPATIBLE; /* not a named DB */
6610
5726
  rc = mdb_node_read(&mc2, leaf, &data);
@@ -6630,26 +5746,14 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
6630
5746
  }
6631
5747
 
6632
5748
  mdb_cassert(mc, root > 1);
6633
- if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) {
6634
- #ifdef MDB_VL32
6635
- if (mc->mc_pg[0])
6636
- MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[0]);
6637
- #endif
5749
+ if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
6638
5750
  if ((rc = mdb_page_get(mc, root, &mc->mc_pg[0], NULL)) != 0)
6639
5751
  return rc;
6640
- }
6641
5752
 
6642
- #ifdef MDB_VL32
6643
- {
6644
- int i;
6645
- for (i=1; i<mc->mc_snum; i++)
6646
- MDB_PAGE_UNREF(mc->mc_txn, mc->mc_pg[i]);
6647
- }
6648
- #endif
6649
5753
  mc->mc_snum = 1;
6650
5754
  mc->mc_top = 0;
6651
5755
 
6652
- DPRINTF(("db %d root page %"Yu" has flags 0x%X",
5756
+ DPRINTF(("db %d root page %"Z"u has flags 0x%X",
6653
5757
  DDBI(mc), root, mc->mc_pg[0]->mp_flags));
6654
5758
 
6655
5759
  if (flags & MDB_PS_MODIFY) {
@@ -6674,7 +5778,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
6674
5778
  MDB_ID pn = pg << 1;
6675
5779
  int rc;
6676
5780
 
6677
- DPRINTF(("free ov page %"Yu" (%d)", pg, ovpages));
5781
+ DPRINTF(("free ov page %"Z"u (%d)", pg, ovpages));
6678
5782
  /* If the page is dirty or on the spill list we just acquired it,
6679
5783
  * so we should give it back to our current free list, if any.
6680
5784
  * Otherwise put it onto the list of pages we freed in this txn.
@@ -6715,7 +5819,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
6715
5819
  j = ++(dl[0].mid);
6716
5820
  dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */
6717
5821
  txn->mt_flags |= MDB_TXN_ERROR;
6718
- return MDB_PROBLEM;
5822
+ return MDB_CORRUPTED;
6719
5823
  }
6720
5824
  }
6721
5825
  txn->mt_dirty_room++;
@@ -6735,10 +5839,6 @@ release:
6735
5839
  if (rc)
6736
5840
  return rc;
6737
5841
  }
6738
- #ifdef MDB_VL32
6739
- if (mc->mc_ovpg == mp)
6740
- mc->mc_ovpg = NULL;
6741
- #endif
6742
5842
  mc->mc_db->md_overflow_pages -= ovpages;
6743
5843
  return 0;
6744
5844
  }
@@ -6756,10 +5856,6 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data)
6756
5856
  pgno_t pgno;
6757
5857
  int rc;
6758
5858
 
6759
- if (MC_OVPG(mc)) {
6760
- MDB_PAGE_UNREF(mc->mc_txn, MC_OVPG(mc));
6761
- MC_SET_OVPG(mc, NULL);
6762
- }
6763
5859
  if (!F_ISSET(leaf->mn_flags, F_BIGDATA)) {
6764
5860
  data->mv_size = NODEDSZ(leaf);
6765
5861
  data->mv_data = NODEDATA(leaf);
@@ -6771,11 +5867,10 @@ mdb_node_read(MDB_cursor *mc, MDB_node *leaf, MDB_val *data)
6771
5867
  data->mv_size = NODEDSZ(leaf);
6772
5868
  memcpy(&pgno, NODEDATA(leaf), sizeof(pgno));
6773
5869
  if ((rc = mdb_page_get(mc, pgno, &omp, NULL)) != 0) {
6774
- DPRINTF(("read overflow page %"Yu" failed", pgno));
5870
+ DPRINTF(("read overflow page %"Z"u failed", pgno));
6775
5871
  return rc;
6776
5872
  }
6777
5873
  data->mv_data = METADATA(omp);
6778
- MC_SET_OVPG(mc, omp);
6779
5874
 
6780
5875
  return MDB_SUCCESS;
6781
5876
  }
@@ -6786,7 +5881,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
6786
5881
  {
6787
5882
  MDB_cursor mc;
6788
5883
  MDB_xcursor mx;
6789
- int exact = 0, rc;
5884
+ int exact = 0;
6790
5885
  DKBUF;
6791
5886
 
6792
5887
  DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
@@ -6798,12 +5893,7 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
6798
5893
  return MDB_BAD_TXN;
6799
5894
 
6800
5895
  mdb_cursor_init(&mc, txn, dbi, &mx);
6801
- rc = mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
6802
- /* unref all the pages when MDB_VL32 - caller must copy the data
6803
- * before doing anything else
6804
- */
6805
- MDB_CURSOR_UNREF(&mc, 1);
6806
- return rc;
5896
+ return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
6807
5897
  }
6808
5898
 
6809
5899
  /** Find a sibling for a page.
@@ -6820,19 +5910,13 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
6820
5910
  int rc;
6821
5911
  MDB_node *indx;
6822
5912
  MDB_page *mp;
6823
- #ifdef MDB_VL32
6824
- MDB_page *op;
6825
- #endif
6826
5913
 
6827
5914
  if (mc->mc_snum < 2) {
6828
5915
  return MDB_NOTFOUND; /* root has no siblings */
6829
5916
  }
6830
5917
 
6831
- #ifdef MDB_VL32
6832
- op = mc->mc_pg[mc->mc_top];
6833
- #endif
6834
5918
  mdb_cursor_pop(mc);
6835
- DPRINTF(("parent page is page %"Yu", index %u",
5919
+ DPRINTF(("parent page is page %"Z"u, index %u",
6836
5920
  mc->mc_pg[mc->mc_top]->mp_pgno, mc->mc_ki[mc->mc_top]));
6837
5921
 
6838
5922
  if (move_right ? (mc->mc_ki[mc->mc_top] + 1u >= NUMKEYS(mc->mc_pg[mc->mc_top]))
@@ -6855,8 +5939,6 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
6855
5939
  }
6856
5940
  mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
6857
5941
 
6858
- MDB_PAGE_UNREF(mc->mc_txn, op);
6859
-
6860
5942
  indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
6861
5943
  if ((rc = mdb_page_get(mc, NODEPGNO(indx), &mp, NULL)) != 0) {
6862
5944
  /* mc will be inconsistent if caller does mc_snum++ as above */
@@ -6904,9 +5986,6 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
6904
5986
  return rc;
6905
5987
  }
6906
5988
  }
6907
- else {
6908
- MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
6909
- }
6910
5989
  } else {
6911
5990
  mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
6912
5991
  if (op == MDB_NEXT_DUP)
@@ -6914,7 +5993,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
6914
5993
  }
6915
5994
  }
6916
5995
 
6917
- DPRINTF(("cursor_next: top page is %"Yu" in cursor %p",
5996
+ DPRINTF(("cursor_next: top page is %"Z"u in cursor %p",
6918
5997
  mdb_dbg_pgno(mp), (void *) mc));
6919
5998
  if (mc->mc_flags & C_DEL) {
6920
5999
  mc->mc_flags ^= C_DEL;
@@ -6928,12 +6007,12 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
6928
6007
  return rc;
6929
6008
  }
6930
6009
  mp = mc->mc_pg[mc->mc_top];
6931
- DPRINTF(("next page is %"Yu", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]));
6010
+ DPRINTF(("next page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]));
6932
6011
  } else
6933
6012
  mc->mc_ki[mc->mc_top]++;
6934
6013
 
6935
6014
  skip:
6936
- DPRINTF(("==> cursor points to page %"Yu" with %u keys, key index %u",
6015
+ DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
6937
6016
  mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
6938
6017
 
6939
6018
  if (IS_LEAF2(mp)) {
@@ -6990,9 +6069,6 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
6990
6069
  return rc;
6991
6070
  }
6992
6071
  }
6993
- else {
6994
- MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
6995
- }
6996
6072
  } else {
6997
6073
  mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
6998
6074
  if (op == MDB_PREV_DUP)
@@ -7000,7 +6076,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
7000
6076
  }
7001
6077
  }
7002
6078
 
7003
- DPRINTF(("cursor_prev: top page is %"Yu" in cursor %p",
6079
+ DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p",
7004
6080
  mdb_dbg_pgno(mp), (void *) mc));
7005
6081
 
7006
6082
  mc->mc_flags &= ~(C_EOF|C_DEL);
@@ -7012,11 +6088,11 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
7012
6088
  }
7013
6089
  mp = mc->mc_pg[mc->mc_top];
7014
6090
  mc->mc_ki[mc->mc_top] = NUMKEYS(mp) - 1;
7015
- DPRINTF(("prev page is %"Yu", key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]));
6091
+ DPRINTF(("prev page is %"Z"u, key index %u", mp->mp_pgno, mc->mc_ki[mc->mc_top]));
7016
6092
  } else
7017
6093
  mc->mc_ki[mc->mc_top]--;
7018
6094
 
7019
- DPRINTF(("==> cursor points to page %"Yu" with %u keys, key index %u",
6095
+ DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
7020
6096
  mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
7021
6097
 
7022
6098
  if (!IS_LEAF(mp))
@@ -7057,10 +6133,8 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7057
6133
  if (key->mv_size == 0)
7058
6134
  return MDB_BAD_VALSIZE;
7059
6135
 
7060
- if (mc->mc_xcursor) {
7061
- MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
6136
+ if (mc->mc_xcursor)
7062
6137
  mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
7063
- }
7064
6138
 
7065
6139
  /* See if we're already on the right page */
7066
6140
  if (mc->mc_flags & C_INITIALIZED) {
@@ -7071,7 +6145,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7071
6145
  mc->mc_ki[mc->mc_top] = 0;
7072
6146
  return MDB_NOTFOUND;
7073
6147
  }
7074
- if (mp->mp_flags & P_LEAF2) {
6148
+ if (MP_FLAGS(mp) & P_LEAF2) {
7075
6149
  nodekey.mv_size = mc->mc_db->md_pad;
7076
6150
  nodekey.mv_data = LEAF2KEY(mp, 0, nodekey.mv_size);
7077
6151
  } else {
@@ -7092,7 +6166,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7092
6166
  unsigned int i;
7093
6167
  unsigned int nkeys = NUMKEYS(mp);
7094
6168
  if (nkeys > 1) {
7095
- if (mp->mp_flags & P_LEAF2) {
6169
+ if (MP_FLAGS(mp) & P_LEAF2) {
7096
6170
  nodekey.mv_data = LEAF2KEY(mp,
7097
6171
  nkeys-1, nodekey.mv_size);
7098
6172
  } else {
@@ -7110,7 +6184,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7110
6184
  if (rc < 0) {
7111
6185
  if (mc->mc_ki[mc->mc_top] < NUMKEYS(mp)) {
7112
6186
  /* This is definitely the right page, skip search_page */
7113
- if (mp->mp_flags & P_LEAF2) {
6187
+ if (MP_FLAGS(mp) & P_LEAF2) {
7114
6188
  nodekey.mv_data = LEAF2KEY(mp,
7115
6189
  mc->mc_ki[mc->mc_top], nodekey.mv_size);
7116
6190
  } else {
@@ -7216,8 +6290,10 @@ set1:
7216
6290
  if ((rc = mdb_node_read(mc, leaf, &olddata)) != MDB_SUCCESS)
7217
6291
  return rc;
7218
6292
  dcmp = mc->mc_dbx->md_dcmp;
7219
- if (NEED_CMP_CLONG(dcmp, olddata.mv_size))
6293
+ #if UINT_MAX < SIZE_MAX
6294
+ if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
7220
6295
  dcmp = mdb_cmp_clong;
6296
+ #endif
7221
6297
  rc = dcmp(data, &olddata);
7222
6298
  if (rc) {
7223
6299
  if (op == MDB_GET_BOTH || rc > 0)
@@ -7249,10 +6325,8 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data)
7249
6325
  int rc;
7250
6326
  MDB_node *leaf;
7251
6327
 
7252
- if (mc->mc_xcursor) {
7253
- MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
6328
+ if (mc->mc_xcursor)
7254
6329
  mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
7255
- }
7256
6330
 
7257
6331
  if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
7258
6332
  rc = mdb_page_search(mc, NULL, MDB_PS_FIRST);
@@ -7296,10 +6370,8 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data)
7296
6370
  int rc;
7297
6371
  MDB_node *leaf;
7298
6372
 
7299
- if (mc->mc_xcursor) {
7300
- MDB_CURSOR_UNREF(&mc->mc_xcursor->mx_cursor, 0);
6373
+ if (mc->mc_xcursor)
7301
6374
  mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED|C_EOF);
7302
- }
7303
6375
 
7304
6376
  if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) {
7305
6377
  rc = mdb_page_search(mc, NULL, MDB_PS_LAST);
@@ -7489,6 +6561,7 @@ fetchm:
7489
6561
  rc = MDB_NOTFOUND;
7490
6562
  break;
7491
6563
  }
6564
+ mc->mc_flags &= ~C_EOF;
7492
6565
  {
7493
6566
  MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
7494
6567
  if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
@@ -7555,8 +6628,8 @@ mdb_cursor_touch(MDB_cursor *mc)
7555
6628
  /** Do not spill pages to disk if txn is getting full, may fail instead */
7556
6629
  #define MDB_NOSPILL 0x8000
7557
6630
 
7558
- int
7559
- mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
6631
+ static int
6632
+ _mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7560
6633
  unsigned int flags)
7561
6634
  {
7562
6635
  MDB_env *env;
@@ -7675,7 +6748,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7675
6748
  *mc->mc_dbflag |= DB_DIRTY;
7676
6749
  if ((mc->mc_db->md_flags & (MDB_DUPSORT|MDB_DUPFIXED))
7677
6750
  == MDB_DUPFIXED)
7678
- np->mp_flags |= P_LEAF2;
6751
+ MP_FLAGS(np) |= P_LEAF2;
7679
6752
  mc->mc_flags |= C_INITIALIZED;
7680
6753
  } else {
7681
6754
  /* make sure all cursor pages are writable */
@@ -7697,7 +6770,7 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7697
6770
  fp_flags = P_LEAF|P_DIRTY;
7698
6771
  fp = env->me_pbuf;
7699
6772
  fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
7700
- fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE);
6773
+ MP_LOWER(fp) = MP_UPPER(fp) = (PAGEHDRSZ-PAGEBASE);
7701
6774
  olddata.mv_size = PAGEHDRSZ;
7702
6775
  goto prep_subDB;
7703
6776
  }
@@ -7756,8 +6829,10 @@ more:
7756
6829
  if (flags == MDB_CURRENT)
7757
6830
  goto current;
7758
6831
  dcmp = mc->mc_dbx->md_dcmp;
7759
- if (NEED_CMP_CLONG(dcmp, olddata.mv_size))
6832
+ #if UINT_MAX < SIZE_MAX
6833
+ if (dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
7760
6834
  dcmp = mdb_cmp_clong;
6835
+ #endif
7761
6836
  /* does data match? */
7762
6837
  if (!dcmp(data, &olddata)) {
7763
6838
  if (flags & (MDB_NODUPDATA|MDB_APPENDDUP))
@@ -7771,18 +6846,18 @@ more:
7771
6846
  dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size);
7772
6847
 
7773
6848
  /* Make sub-page header for the dup items, with dummy body */
7774
- fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
7775
- fp->mp_lower = (PAGEHDRSZ-PAGEBASE);
6849
+ MP_FLAGS(fp) = P_LEAF|P_DIRTY|P_SUBP;
6850
+ MP_LOWER(fp) = (PAGEHDRSZ-PAGEBASE);
7776
6851
  xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
7777
6852
  if (mc->mc_db->md_flags & MDB_DUPFIXED) {
7778
- fp->mp_flags |= P_LEAF2;
6853
+ MP_FLAGS(fp) |= P_LEAF2;
7779
6854
  fp->mp_pad = data->mv_size;
7780
6855
  xdata.mv_size += 2 * data->mv_size; /* leave space for 2 more */
7781
6856
  } else {
7782
6857
  xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) +
7783
6858
  (dkey.mv_size & 1) + (data->mv_size & 1);
7784
6859
  }
7785
- fp->mp_upper = xdata.mv_size - PAGEBASE;
6860
+ MP_UPPER(fp) = xdata.mv_size - PAGEBASE;
7786
6861
  olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */
7787
6862
  } else if (leaf->mn_flags & F_SUBDATA) {
7788
6863
  /* Data is on sub-DB, just store it */
@@ -7805,8 +6880,8 @@ more:
7805
6880
  }
7806
6881
  /* FALLTHRU */ /* Big enough MDB_DUPFIXED sub-page */
7807
6882
  case MDB_CURRENT:
7808
- fp->mp_flags |= P_DIRTY;
7809
- COPY_PGNO(fp->mp_pgno, mp->mp_pgno);
6883
+ MP_FLAGS(fp) |= P_DIRTY;
6884
+ COPY_PGNO(MP_PGNO(fp), MP_PGNO(mp));
7810
6885
  mc->mc_xcursor->mx_cursor.mc_pg[0] = fp;
7811
6886
  flags |= F_DUPDATA;
7812
6887
  goto put_sub;
@@ -7814,7 +6889,7 @@ more:
7814
6889
  xdata.mv_size = olddata.mv_size + offset;
7815
6890
  }
7816
6891
 
7817
- fp_flags = fp->mp_flags;
6892
+ fp_flags = MP_FLAGS(fp);
7818
6893
  if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) {
7819
6894
  /* Too big for a sub-page, convert to sub-DB */
7820
6895
  fp_flags &= ~P_SUBP;
@@ -7844,16 +6919,16 @@ prep_subDB:
7844
6919
  sub_root = mp;
7845
6920
  }
7846
6921
  if (mp != fp) {
7847
- mp->mp_flags = fp_flags | P_DIRTY;
7848
- mp->mp_pad = fp->mp_pad;
7849
- mp->mp_lower = fp->mp_lower;
7850
- mp->mp_upper = fp->mp_upper + offset;
6922
+ MP_FLAGS(mp) = fp_flags | P_DIRTY;
6923
+ MP_PAD(mp) = MP_PAD(fp);
6924
+ MP_LOWER(mp) = MP_LOWER(fp);
6925
+ MP_UPPER(mp) = MP_UPPER(fp) + offset;
7851
6926
  if (fp_flags & P_LEAF2) {
7852
6927
  memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
7853
6928
  } else {
7854
- memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE,
7855
- olddata.mv_size - fp->mp_upper - PAGEBASE);
7856
- memcpy((char *)(&mp->mp_ptrs), (char *)(&fp->mp_ptrs), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0]));
6929
+ memcpy((char *)mp + MP_UPPER(mp) + PAGEBASE, (char *)fp + MP_UPPER(fp) + PAGEBASE,
6930
+ olddata.mv_size - MP_UPPER(fp) - PAGEBASE);
6931
+ memcpy((char *)MP_PTRS(mp), (char *)MP_PTRS(fp), NUMKEYS(fp) * sizeof(mp->mp_ptrs[0]));
7857
6932
  for (i=0; i<NUMKEYS(fp); i++)
7858
6933
  mp->mp_ptrs[i] += offset;
7859
6934
  }
@@ -7918,7 +6993,7 @@ current:
7918
6993
  * Copy end of page, adjusting alignment so
7919
6994
  * compiler may copy words instead of bytes.
7920
6995
  */
7921
- off = (PAGEHDRSZ + data->mv_size) & -sizeof(size_t);
6996
+ off = (PAGEHDRSZ + data->mv_size) & -(int)sizeof(size_t);
7922
6997
  memcpy((size_t *)((char *)np + off),
7923
6998
  (size_t *)((char *)omp + off), sz - off);
7924
6999
  sz = PAGEHDRSZ;
@@ -7946,11 +7021,14 @@ current:
7946
7021
  else if (!(mc->mc_flags & C_SUB))
7947
7022
  memcpy(olddata.mv_data, data->mv_data, data->mv_size);
7948
7023
  else {
7024
+ if (key->mv_size != NODEKSZ(leaf))
7025
+ goto new_ksize;
7949
7026
  memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
7950
7027
  goto fix_parent;
7951
7028
  }
7952
7029
  return MDB_SUCCESS;
7953
7030
  }
7031
+ new_ksize:
7954
7032
  mdb_node_del(mc, 0);
7955
7033
  }
7956
7034
 
@@ -7997,7 +7075,7 @@ new_sub:
7997
7075
  */
7998
7076
  if (do_sub) {
7999
7077
  int xflags, new_dupdata;
8000
- mdb_size_t ecount;
7078
+ size_t ecount;
8001
7079
  put_sub:
8002
7080
  xdata.mv_size = 0;
8003
7081
  xdata.mv_data = "";
@@ -8014,7 +7092,7 @@ put_sub:
8014
7092
  new_dupdata = (int)dkey.mv_size;
8015
7093
  /* converted, write the original data first */
8016
7094
  if (dkey.mv_size) {
8017
- rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
7095
+ rc = _mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
8018
7096
  if (rc)
8019
7097
  goto bad_sub;
8020
7098
  /* we've done our job */
@@ -8042,7 +7120,7 @@ put_sub:
8042
7120
  ecount = mc->mc_xcursor->mx_db.md_entries;
8043
7121
  if (flags & MDB_APPENDDUP)
8044
7122
  xflags |= MDB_APPEND;
8045
- rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
7123
+ rc = _mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
8046
7124
  if (flags & F_SUBDATA) {
8047
7125
  void *db = NODEDATA(leaf);
8048
7126
  memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
@@ -8076,14 +7154,27 @@ put_sub:
8076
7154
  return rc;
8077
7155
  bad_sub:
8078
7156
  if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */
8079
- rc = MDB_PROBLEM;
7157
+ rc = MDB_CORRUPTED;
8080
7158
  }
8081
7159
  mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
8082
7160
  return rc;
8083
7161
  }
8084
7162
 
8085
7163
  int
8086
- mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
7164
+ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
7165
+ unsigned int flags)
7166
+ {
7167
+ DKBUF;
7168
+ DDBUF;
7169
+ int rc = _mdb_cursor_put(mc, key, data, flags);
7170
+ MDB_TRACE(("%p, %"Z"u[%s], %"Z"u%s, %u",
7171
+ mc, key ? key->mv_size:0, DKEY(key), data ? data->mv_size:0,
7172
+ data ? mdb_dval(mc->mc_txn, mc->mc_dbi, data, dbuf):"", flags));
7173
+ return rc;
7174
+ }
7175
+
7176
+ static int
7177
+ _mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
8087
7178
  {
8088
7179
  MDB_node *leaf;
8089
7180
  MDB_page *mp;
@@ -8121,7 +7212,7 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
8121
7212
  if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
8122
7213
  mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
8123
7214
  }
8124
- rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
7215
+ rc = _mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
8125
7216
  if (rc)
8126
7217
  return rc;
8127
7218
  /* If sub-DB still has entries, we're done */
@@ -8185,6 +7276,14 @@ fail:
8185
7276
  return rc;
8186
7277
  }
8187
7278
 
7279
+ int
7280
+ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
7281
+ {
7282
+ MDB_TRACE(("%p, %u",
7283
+ mc, flags));
7284
+ return _mdb_cursor_del(mc, flags);
7285
+ }
7286
+
8188
7287
  /** Allocate and initialize new pages for a database.
8189
7288
  * Set #MDB_TXN_ERROR on failure.
8190
7289
  * @param[in] mc a cursor on the database being added to.
@@ -8202,7 +7301,7 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp)
8202
7301
 
8203
7302
  if ((rc = mdb_page_alloc(mc, num, &np)))
8204
7303
  return rc;
8205
- DPRINTF(("allocated new mpage %"Yu", page size %u",
7304
+ DPRINTF(("allocated new mpage %"Z"u, page size %u",
8206
7305
  np->mp_pgno, mc->mc_txn->mt_env->me_psize));
8207
7306
  np->mp_flags = flags | P_DIRTY;
8208
7307
  np->mp_lower = (PAGEHDRSZ-PAGEBASE);
@@ -8301,9 +7400,9 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
8301
7400
  void *ndata;
8302
7401
  DKBUF;
8303
7402
 
8304
- mdb_cassert(mc, mp->mp_upper >= mp->mp_lower);
7403
+ mdb_cassert(mc, MP_UPPER(mp) >= MP_LOWER(mp));
8305
7404
 
8306
- DPRINTF(("add to %s %spage %"Yu" index %i, data size %"Z"u key size %"Z"u [%s]",
7405
+ DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]",
8307
7406
  IS_LEAF(mp) ? "leaf" : "branch",
8308
7407
  IS_SUBP(mp) ? "sub-" : "",
8309
7408
  mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0,
@@ -8320,8 +7419,8 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
8320
7419
  memcpy(ptr, key->mv_data, ksize);
8321
7420
 
8322
7421
  /* Just using these for counting */
8323
- mp->mp_lower += sizeof(indx_t);
8324
- mp->mp_upper -= ksize - sizeof(indx_t);
7422
+ MP_LOWER(mp) += sizeof(indx_t);
7423
+ MP_UPPER(mp) -= ksize - sizeof(indx_t);
8325
7424
  return MDB_SUCCESS;
8326
7425
  }
8327
7426
 
@@ -8344,7 +7443,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
8344
7443
  goto full;
8345
7444
  if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
8346
7445
  return rc;
8347
- DPRINTF(("allocated overflow page %"Yu, ofp->mp_pgno));
7446
+ DPRINTF(("allocated overflow page %"Z"u", ofp->mp_pgno));
8348
7447
  flags |= F_BIGDATA;
8349
7448
  goto update;
8350
7449
  } else {
@@ -8358,14 +7457,14 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
8358
7457
  update:
8359
7458
  /* Move higher pointers up one slot. */
8360
7459
  for (i = NUMKEYS(mp); i > indx; i--)
8361
- mp->mp_ptrs[i] = mp->mp_ptrs[i - 1];
7460
+ MP_PTRS(mp)[i] = MP_PTRS(mp)[i - 1];
8362
7461
 
8363
7462
  /* Adjust free space offsets. */
8364
- ofs = mp->mp_upper - node_size;
8365
- mdb_cassert(mc, ofs >= mp->mp_lower + sizeof(indx_t));
8366
- mp->mp_ptrs[indx] = ofs;
8367
- mp->mp_upper = ofs;
8368
- mp->mp_lower += sizeof(indx_t);
7463
+ ofs = MP_UPPER(mp) - node_size;
7464
+ mdb_cassert(mc, ofs >= MP_LOWER(mp) + sizeof(indx_t));
7465
+ MP_PTRS(mp)[indx] = ofs;
7466
+ MP_UPPER(mp) = ofs;
7467
+ MP_LOWER(mp) += sizeof(indx_t);
8369
7468
 
8370
7469
  /* Write the node data. */
8371
7470
  node = NODEPTR(mp, indx);
@@ -8401,9 +7500,9 @@ update:
8401
7500
  return MDB_SUCCESS;
8402
7501
 
8403
7502
  full:
8404
- DPRINTF(("not enough room in page %"Yu", got %u ptrs",
7503
+ DPRINTF(("not enough room in page %"Z"u, got %u ptrs",
8405
7504
  mdb_dbg_pgno(mp), NUMKEYS(mp)));
8406
- DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room));
7505
+ DPRINTF(("upper-lower = %u - %u = %"Z"d", MP_UPPER(mp),MP_LOWER(mp),room));
8407
7506
  DPRINTF(("node size = %"Z"u", node_size));
8408
7507
  mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
8409
7508
  return MDB_PAGE_FULL;
@@ -8424,7 +7523,7 @@ mdb_node_del(MDB_cursor *mc, int ksize)
8424
7523
  MDB_node *node;
8425
7524
  char *base;
8426
7525
 
8427
- DPRINTF(("delete node %u on %s page %"Yu, indx,
7526
+ DPRINTF(("delete node %u on %s page %"Z"u", indx,
8428
7527
  IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp)));
8429
7528
  numkeys = NUMKEYS(mp);
8430
7529
  mdb_cassert(mc, indx < numkeys);
@@ -8434,8 +7533,8 @@ mdb_node_del(MDB_cursor *mc, int ksize)
8434
7533
  base = LEAF2KEY(mp, indx, ksize);
8435
7534
  if (x)
8436
7535
  memmove(base, base + ksize, x * ksize);
8437
- mp->mp_lower -= sizeof(indx_t);
8438
- mp->mp_upper += ksize - sizeof(indx_t);
7536
+ MP_LOWER(mp) -= sizeof(indx_t);
7537
+ MP_UPPER(mp) += ksize - sizeof(indx_t);
8439
7538
  return;
8440
7539
  }
8441
7540
 
@@ -8449,21 +7548,21 @@ mdb_node_del(MDB_cursor *mc, int ksize)
8449
7548
  }
8450
7549
  sz = EVEN(sz);
8451
7550
 
8452
- ptr = mp->mp_ptrs[indx];
7551
+ ptr = MP_PTRS(mp)[indx];
8453
7552
  for (i = j = 0; i < numkeys; i++) {
8454
7553
  if (i != indx) {
8455
- mp->mp_ptrs[j] = mp->mp_ptrs[i];
8456
- if (mp->mp_ptrs[i] < ptr)
8457
- mp->mp_ptrs[j] += sz;
7554
+ MP_PTRS(mp)[j] = MP_PTRS(mp)[i];
7555
+ if (MP_PTRS(mp)[i] < ptr)
7556
+ MP_PTRS(mp)[j] += sz;
8458
7557
  j++;
8459
7558
  }
8460
7559
  }
8461
7560
 
8462
- base = (char *)mp + mp->mp_upper + PAGEBASE;
8463
- memmove(base + sz, base, ptr - mp->mp_upper);
7561
+ base = (char *)mp + MP_UPPER(mp) + PAGEBASE;
7562
+ memmove(base + sz, base, ptr - MP_UPPER(mp));
8464
7563
 
8465
- mp->mp_lower -= sizeof(indx_t);
8466
- mp->mp_upper += sz;
7564
+ MP_LOWER(mp) -= sizeof(indx_t);
7565
+ MP_UPPER(mp) += sz;
8467
7566
  }
8468
7567
 
8469
7568
  /** Compact the main page after deleting a node on a subpage.
@@ -8492,11 +7591,11 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
8492
7591
  } else {
8493
7592
  xp = (MDB_page *)((char *)sp + delta); /* destination subpage */
8494
7593
  for (i = NUMKEYS(sp); --i >= 0; )
8495
- xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta;
7594
+ MP_PTRS(xp)[i] = MP_PTRS(sp)[i] - delta;
8496
7595
  len = PAGEHDRSZ;
8497
7596
  }
8498
- sp->mp_upper = sp->mp_lower;
8499
- COPY_PGNO(sp->mp_pgno, mp->mp_pgno);
7597
+ MP_UPPER(sp) = MP_LOWER(sp);
7598
+ COPY_PGNO(MP_PGNO(sp), mp->mp_pgno);
8500
7599
  SETDSZ(node, nsize);
8501
7600
 
8502
7601
  /* Shift <lower nodes...initial part of subpage> upward */
@@ -8533,8 +7632,7 @@ mdb_xcursor_init0(MDB_cursor *mc)
8533
7632
  mx->mx_cursor.mc_dbflag = &mx->mx_dbflag;
8534
7633
  mx->mx_cursor.mc_snum = 0;
8535
7634
  mx->mx_cursor.mc_top = 0;
8536
- MC_SET_OVPG(&mx->mx_cursor, NULL);
8537
- mx->mx_cursor.mc_flags = C_SUB | (mc->mc_flags & (C_ORIG_RDONLY|C_WRITEMAP));
7635
+ mx->mx_cursor.mc_flags = C_SUB;
8538
7636
  mx->mx_dbx.md_name.mv_size = 0;
8539
7637
  mx->mx_dbx.md_name.mv_data = NULL;
8540
7638
  mx->mx_dbx.md_cmp = mc->mc_dbx->md_dcmp;
@@ -8553,12 +7651,12 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
8553
7651
  {
8554
7652
  MDB_xcursor *mx = mc->mc_xcursor;
8555
7653
 
8556
- mx->mx_cursor.mc_flags &= C_SUB|C_ORIG_RDONLY|C_WRITEMAP;
8557
7654
  if (node->mn_flags & F_SUBDATA) {
8558
7655
  memcpy(&mx->mx_db, NODEDATA(node), sizeof(MDB_db));
8559
7656
  mx->mx_cursor.mc_pg[0] = 0;
8560
7657
  mx->mx_cursor.mc_snum = 0;
8561
7658
  mx->mx_cursor.mc_top = 0;
7659
+ mx->mx_cursor.mc_flags = C_SUB;
8562
7660
  } else {
8563
7661
  MDB_page *fp = NODEDATA(node);
8564
7662
  mx->mx_db.md_pad = 0;
@@ -8568,10 +7666,10 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
8568
7666
  mx->mx_db.md_leaf_pages = 1;
8569
7667
  mx->mx_db.md_overflow_pages = 0;
8570
7668
  mx->mx_db.md_entries = NUMKEYS(fp);
8571
- COPY_PGNO(mx->mx_db.md_root, fp->mp_pgno);
7669
+ COPY_PGNO(mx->mx_db.md_root, MP_PGNO(fp));
8572
7670
  mx->mx_cursor.mc_snum = 1;
8573
7671
  mx->mx_cursor.mc_top = 0;
8574
- mx->mx_cursor.mc_flags |= C_INITIALIZED;
7672
+ mx->mx_cursor.mc_flags = C_INITIALIZED|C_SUB;
8575
7673
  mx->mx_cursor.mc_pg[0] = fp;
8576
7674
  mx->mx_cursor.mc_ki[0] = 0;
8577
7675
  if (mc->mc_db->md_flags & MDB_DUPFIXED) {
@@ -8581,11 +7679,13 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
8581
7679
  mx->mx_db.md_flags |= MDB_INTEGERKEY;
8582
7680
  }
8583
7681
  }
8584
- DPRINTF(("Sub-db -%u root page %"Yu, mx->mx_cursor.mc_dbi,
7682
+ DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi,
8585
7683
  mx->mx_db.md_root));
8586
7684
  mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA;
8587
- if (NEED_CMP_CLONG(mx->mx_dbx.md_cmp, mx->mx_db.md_pad))
7685
+ #if UINT_MAX < SIZE_MAX
7686
+ if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
8588
7687
  mx->mx_dbx.md_cmp = mdb_cmp_clong;
7688
+ #endif
8589
7689
  }
8590
7690
 
8591
7691
 
@@ -8608,7 +7708,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata)
8608
7708
  mx->mx_cursor.mc_flags |= C_INITIALIZED;
8609
7709
  mx->mx_cursor.mc_ki[0] = 0;
8610
7710
  mx->mx_dbflag = DB_VALID|DB_USRVALID|DB_DUPDATA;
8611
- #if UINT_MAX < MDB_SIZE_MAX /* matches mdb_xcursor_init1:NEED_CMP_CLONG() */
7711
+ #if UINT_MAX < SIZE_MAX
8612
7712
  mx->mx_dbx.md_cmp = src_mx->mx_dbx.md_cmp;
8613
7713
  #endif
8614
7714
  } else if (!(mx->mx_cursor.mc_flags & C_INITIALIZED)) {
@@ -8616,7 +7716,7 @@ mdb_xcursor_init2(MDB_cursor *mc, MDB_xcursor *src_mx, int new_dupdata)
8616
7716
  }
8617
7717
  mx->mx_db = src_mx->mx_db;
8618
7718
  mx->mx_cursor.mc_pg[0] = src_mx->mx_cursor.mc_pg[0];
8619
- DPRINTF(("Sub-db -%u root page %"Yu, mx->mx_cursor.mc_dbi,
7719
+ DPRINTF(("Sub-db -%u root page %"Z"u", mx->mx_cursor.mc_dbi,
8620
7720
  mx->mx_db.md_root));
8621
7721
  }
8622
7722
 
@@ -8635,8 +7735,7 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
8635
7735
  mc->mc_top = 0;
8636
7736
  mc->mc_pg[0] = 0;
8637
7737
  mc->mc_ki[0] = 0;
8638
- MC_SET_OVPG(mc, NULL);
8639
- mc->mc_flags = txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP);
7738
+ mc->mc_flags = 0;
8640
7739
  if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
8641
7740
  mdb_tassert(txn, mx != NULL);
8642
7741
  mc->mc_xcursor = mx;
@@ -8678,6 +7777,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
8678
7777
  return ENOMEM;
8679
7778
  }
8680
7779
 
7780
+ MDB_TRACE(("%p, %u = %p", txn, dbi, mc));
8681
7781
  *ret = mc;
8682
7782
 
8683
7783
  return MDB_SUCCESS;
@@ -8701,7 +7801,7 @@ mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc)
8701
7801
 
8702
7802
  /* Return the count of duplicate data items for the current key */
8703
7803
  int
8704
- mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp)
7804
+ mdb_cursor_count(MDB_cursor *mc, size_t *countp)
8705
7805
  {
8706
7806
  MDB_node *leaf;
8707
7807
 
@@ -8741,14 +7841,9 @@ mdb_cursor_count(MDB_cursor *mc, mdb_size_t *countp)
8741
7841
  void
8742
7842
  mdb_cursor_close(MDB_cursor *mc)
8743
7843
  {
8744
- if (mc) {
8745
- MDB_CURSOR_UNREF(mc, 0);
8746
- }
7844
+ MDB_TRACE(("%p", mc));
8747
7845
  if (mc && !mc->mc_backup) {
8748
- /* Remove from txn, if tracked.
8749
- * A read-only txn (!C_UNTRACK) may have been freed already,
8750
- * so do not peek inside it. Only write txns track cursors.
8751
- */
7846
+ /* remove from txn, if tracked */
8752
7847
  if ((mc->mc_flags & C_UNTRACK) && mc->mc_txn->mt_cursors) {
8753
7848
  MDB_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi];
8754
7849
  while (*prev && *prev != mc) prev = &(*prev)->mc_next;
@@ -8799,7 +7894,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
8799
7894
  char kbuf2[DKBUF_MAXKEYSIZE*2+1];
8800
7895
  k2.mv_data = NODEKEY(node);
8801
7896
  k2.mv_size = node->mn_ksize;
8802
- DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Yu,
7897
+ DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u",
8803
7898
  indx, ptr,
8804
7899
  mdb_dkey(&k2, kbuf2),
8805
7900
  DKEY(key),
@@ -8947,7 +8042,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
8947
8042
  return rc;
8948
8043
  }
8949
8044
 
8950
- DPRINTF(("moving %s node %u [%s] on page %"Yu" to node %u on page %"Yu,
8045
+ DPRINTF(("moving %s node %u [%s] on page %"Z"u to node %u on page %"Z"u",
8951
8046
  IS_LEAF(csrc->mc_pg[csrc->mc_top]) ? "leaf" : "branch",
8952
8047
  csrc->mc_ki[csrc->mc_top],
8953
8048
  DKEY(&key),
@@ -9033,7 +8128,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
9033
8128
  key.mv_size = NODEKSZ(srcnode);
9034
8129
  key.mv_data = NODEKEY(srcnode);
9035
8130
  }
9036
- DPRINTF(("update separator for source page %"Yu" to [%s]",
8131
+ DPRINTF(("update separator for source page %"Z"u to [%s]",
9037
8132
  csrc->mc_pg[csrc->mc_top]->mp_pgno, DKEY(&key)));
9038
8133
  mdb_cursor_copy(csrc, &mn);
9039
8134
  mn.mc_snum--;
@@ -9064,7 +8159,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
9064
8159
  key.mv_size = NODEKSZ(srcnode);
9065
8160
  key.mv_data = NODEKEY(srcnode);
9066
8161
  }
9067
- DPRINTF(("update separator for destination page %"Yu" to [%s]",
8162
+ DPRINTF(("update separator for destination page %"Z"u to [%s]",
9068
8163
  cdst->mc_pg[cdst->mc_top]->mp_pgno, DKEY(&key)));
9069
8164
  mdb_cursor_copy(cdst, &mn);
9070
8165
  mn.mc_snum--;
@@ -9110,7 +8205,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
9110
8205
  psrc = csrc->mc_pg[csrc->mc_top];
9111
8206
  pdst = cdst->mc_pg[cdst->mc_top];
9112
8207
 
9113
- DPRINTF(("merging page %"Yu" into %"Yu, psrc->mp_pgno, pdst->mp_pgno));
8208
+ DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno));
9114
8209
 
9115
8210
  mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
9116
8211
  mdb_cassert(csrc, cdst->mc_snum > 1);
@@ -9167,7 +8262,7 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
9167
8262
  }
9168
8263
  }
9169
8264
 
9170
- DPRINTF(("dst page %"Yu" now has %u keys (%.1f%% filled)",
8265
+ DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)",
9171
8266
  pdst->mp_pgno, NUMKEYS(pdst),
9172
8267
  (float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10));
9173
8268
 
@@ -9251,7 +8346,6 @@ mdb_cursor_copy(const MDB_cursor *csrc, MDB_cursor *cdst)
9251
8346
  cdst->mc_snum = csrc->mc_snum;
9252
8347
  cdst->mc_top = csrc->mc_top;
9253
8348
  cdst->mc_flags = csrc->mc_flags;
9254
- MC_SET_OVPG(cdst, MC_OVPG(csrc));
9255
8349
 
9256
8350
  for (i=0; i<csrc->mc_snum; i++) {
9257
8351
  cdst->mc_pg[i] = csrc->mc_pg[i];
@@ -9280,14 +8374,14 @@ mdb_rebalance(MDB_cursor *mc)
9280
8374
  minkeys = 1;
9281
8375
  thresh = FILL_THRESHOLD;
9282
8376
  }
9283
- DPRINTF(("rebalancing %s page %"Yu" (has %u keys, %.1f%% full)",
8377
+ DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)",
9284
8378
  IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
9285
8379
  mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]),
9286
8380
  (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10));
9287
8381
 
9288
8382
  if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= thresh &&
9289
8383
  NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
9290
- DPRINTF(("no need to rebalance page %"Yu", above fill threshold",
8384
+ DPRINTF(("no need to rebalance page %"Z"u, above fill threshold",
9291
8385
  mdb_dbg_pgno(mc->mc_pg[mc->mc_top])));
9292
8386
  return MDB_SUCCESS;
9293
8387
  }
@@ -9416,7 +8510,7 @@ mdb_rebalance(MDB_cursor *mc)
9416
8510
  fromleft = 1;
9417
8511
  }
9418
8512
 
9419
- DPRINTF(("found neighbor page %"Yu" (%u keys, %.1f%% full)",
8513
+ DPRINTF(("found neighbor page %"Z"u (%u keys, %.1f%% full)",
9420
8514
  mn.mc_pg[mn.mc_top]->mp_pgno, NUMKEYS(mn.mc_pg[mn.mc_top]),
9421
8515
  (float)PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) / 10));
9422
8516
 
@@ -9522,7 +8616,7 @@ mdb_cursor_del0(MDB_cursor *mc)
9522
8616
  goto fail;
9523
8617
  }
9524
8618
  if (m3->mc_xcursor && !(m3->mc_flags & C_EOF)) {
9525
- MDB_node *node = NODEPTR(m3->mc_pg[m3->mc_top], m3->mc_ki[m3->mc_top]);
8619
+ MDB_node *node = NODEPTR(m3->mc_pg[mc->mc_top], m3->mc_ki[mc->mc_top]);
9526
8620
  /* If this node has dupdata, it may need to be reinited
9527
8621
  * because its data has moved.
9528
8622
  * If the xcursor was not initd it must be reinited.
@@ -9557,6 +8651,8 @@ int
9557
8651
  mdb_del(MDB_txn *txn, MDB_dbi dbi,
9558
8652
  MDB_val *key, MDB_val *data)
9559
8653
  {
8654
+ DKBUF;
8655
+ DDBUF;
9560
8656
  if (!key || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID))
9561
8657
  return EINVAL;
9562
8658
 
@@ -9568,6 +8664,9 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
9568
8664
  data = NULL;
9569
8665
  }
9570
8666
 
8667
+ MDB_TRACE(("%p, %u, %"Z"u[%s], %"Z"u%s",
8668
+ txn, dbi, key ? key->mv_size:0, DKEY(key), data ? data->mv_size:0,
8669
+ data ? mdb_dval(txn, dbi, data, dbuf):""));
9571
8670
  return mdb_del0(txn, dbi, key, data, 0);
9572
8671
  }
9573
8672
 
@@ -9605,9 +8704,10 @@ mdb_del0(MDB_txn *txn, MDB_dbi dbi,
9605
8704
  * run out of space, triggering a split. We need this
9606
8705
  * cursor to be consistent until the end of the rebalance.
9607
8706
  */
8707
+ mc.mc_flags |= C_UNTRACK;
9608
8708
  mc.mc_next = txn->mt_cursors[dbi];
9609
8709
  txn->mt_cursors[dbi] = &mc;
9610
- rc = mdb_cursor_del(&mc, flags);
8710
+ rc = _mdb_cursor_del(&mc, flags);
9611
8711
  txn->mt_cursors[dbi] = mc.mc_next;
9612
8712
  }
9613
8713
  return rc;
@@ -9646,7 +8746,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9646
8746
  newindx = mc->mc_ki[mc->mc_top];
9647
8747
  nkeys = NUMKEYS(mp);
9648
8748
 
9649
- DPRINTF(("-----> splitting %s page %"Yu" and adding [%s] at index %i/%i",
8749
+ DPRINTF(("-----> splitting %s page %"Z"u and adding [%s] at index %i/%i",
9650
8750
  IS_LEAF(mp) ? "leaf" : "branch", mp->mp_pgno,
9651
8751
  DKEY(newkey), mc->mc_ki[mc->mc_top], nkeys));
9652
8752
 
@@ -9654,7 +8754,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9654
8754
  if ((rc = mdb_page_new(mc, mp->mp_flags, 1, &rp)))
9655
8755
  return rc;
9656
8756
  rp->mp_pad = mp->mp_pad;
9657
- DPRINTF(("new right sibling: page %"Yu, rp->mp_pgno));
8757
+ DPRINTF(("new right sibling: page %"Z"u", rp->mp_pgno));
9658
8758
 
9659
8759
  /* Usually when splitting the root page, the cursor
9660
8760
  * height is 1. But when called from mdb_update_key,
@@ -9672,7 +8772,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9672
8772
  mc->mc_pg[0] = pp;
9673
8773
  mc->mc_ki[0] = 0;
9674
8774
  mc->mc_db->md_root = pp->mp_pgno;
9675
- DPRINTF(("root split! new root = %"Yu, pp->mp_pgno));
8775
+ DPRINTF(("root split! new root = %"Z"u", pp->mp_pgno));
9676
8776
  new_root = mc->mc_db->md_depth++;
9677
8777
 
9678
8778
  /* Add left (implicit) pointer. */
@@ -9689,7 +8789,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9689
8789
  ptop = 0;
9690
8790
  } else {
9691
8791
  ptop = mc->mc_top-1;
9692
- DPRINTF(("parent branch page is %"Yu, mc->mc_pg[ptop]->mp_pgno));
8792
+ DPRINTF(("parent branch page is %"Z"u", mc->mc_pg[ptop]->mp_pgno));
9693
8793
  }
9694
8794
 
9695
8795
  mdb_cursor_copy(mc, &mn);
@@ -9745,9 +8845,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9745
8845
  mc->mc_ki[mc->mc_top] = x;
9746
8846
  }
9747
8847
  } else {
9748
- int psize, nsize, k;
8848
+ int psize, nsize, k, keythresh;
8849
+
9749
8850
  /* Maximum free space in an empty page */
9750
8851
  pmax = env->me_psize - PAGEHDRSZ;
8852
+ /* Threshold number of keys considered "small" */
8853
+ keythresh = env->me_psize >> 7;
8854
+
9751
8855
  if (IS_LEAF(mp))
9752
8856
  nsize = mdb_leaf_size(env, newkey, newdata);
9753
8857
  else
@@ -9788,7 +8892,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9788
8892
  * the split so the new page is emptier than the old page.
9789
8893
  * This yields better packing during sequential inserts.
9790
8894
  */
9791
- if (nkeys < 32 || nsize > pmax/16 || newindx >= nkeys) {
8895
+ if (nkeys < keythresh || nsize > pmax/16 || newindx >= nkeys) {
9792
8896
  /* Find split point */
9793
8897
  psize = 0;
9794
8898
  if (newindx <= split_indx || newindx >= nkeys) {
@@ -9864,7 +8968,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9864
8968
  } else {
9865
8969
  /* find right page's left sibling */
9866
8970
  mc->mc_ki[ptop] = mn.mc_ki[ptop];
9867
- rc = mdb_cursor_sibling(mc, 0);
8971
+ mdb_cursor_sibling(mc, 0);
9868
8972
  }
9869
8973
  }
9870
8974
  } else {
@@ -9873,8 +8977,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
9873
8977
  mn.mc_top++;
9874
8978
  }
9875
8979
  if (rc != MDB_SUCCESS) {
9876
- if (rc == MDB_NOTFOUND) /* improper mdb_cursor_sibling() result */
9877
- rc = MDB_PROBLEM;
9878
8980
  goto done;
9879
8981
  }
9880
8982
  if (nflags & MDB_APPEND) {
@@ -10047,6 +9149,8 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
10047
9149
  MDB_cursor mc;
10048
9150
  MDB_xcursor mx;
10049
9151
  int rc;
9152
+ DKBUF;
9153
+ DDBUF;
10050
9154
 
10051
9155
  if (!key || !data || !TXN_DBI_EXIST(txn, dbi, DB_USRVALID))
10052
9156
  return EINVAL;
@@ -10057,10 +9161,12 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
10057
9161
  if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_BLOCKED))
10058
9162
  return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
10059
9163
 
9164
+ MDB_TRACE(("%p, %u, %"Z"u[%s], %"Z"u%s, %u",
9165
+ txn, dbi, key ? key->mv_size:0, DKEY(key), data->mv_size, mdb_dval(txn, dbi, data, dbuf), flags));
10060
9166
  mdb_cursor_init(&mc, txn, dbi, &mx);
10061
9167
  mc.mc_next = txn->mt_cursors[dbi];
10062
9168
  txn->mt_cursors[dbi] = &mc;
10063
- rc = mdb_cursor_put(&mc, key, data, flags);
9169
+ rc = _mdb_cursor_put(&mc, key, data, flags);
10064
9170
  txn->mt_cursors[dbi] = mc.mc_next;
10065
9171
  return rc;
10066
9172
  }
@@ -10078,8 +9184,8 @@ typedef struct mdb_copy {
10078
9184
  pthread_cond_t mc_cond; /**< Condition variable for #mc_new */
10079
9185
  char *mc_wbuf[2];
10080
9186
  char *mc_over[2];
10081
- int mc_wlen[2];
10082
- int mc_olen[2];
9187
+ size_t mc_wlen[2];
9188
+ size_t mc_olen[2];
10083
9189
  pgno_t mc_next_pgno;
10084
9190
  HANDLE mc_fd;
10085
9191
  int mc_toggle; /**< Buffer number in provider */
@@ -10096,7 +9202,8 @@ mdb_env_copythr(void *arg)
10096
9202
  {
10097
9203
  mdb_copy *my = arg;
10098
9204
  char *ptr;
10099
- int toggle = 0, wsize, rc;
9205
+ int toggle = 0, rc;
9206
+ size_t wsize;
10100
9207
  #ifdef _WIN32
10101
9208
  DWORD len;
10102
9209
  #define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
@@ -10209,7 +9316,6 @@ mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags)
10209
9316
 
10210
9317
  mc.mc_snum = 1;
10211
9318
  mc.mc_txn = my->mc_txn;
10212
- mc.mc_flags = my->mc_txn->mt_flags & (C_ORIG_RDONLY|C_WRITEMAP);
10213
9319
 
10214
9320
  rc = mdb_page_get(&mc, *pg, &mc.mc_pg[0], NULL);
10215
9321
  if (rc)
@@ -10464,7 +9570,7 @@ finish:
10464
9570
  my.mc_error = rc;
10465
9571
  mdb_env_cthr_toggle(&my, 1 | MDB_EOF);
10466
9572
  rc = THREAD_FINISH(thr);
10467
- mdb_txn_abort(txn);
9573
+ _mdb_txn_abort(txn);
10468
9574
 
10469
9575
  done:
10470
9576
  #ifdef _WIN32
@@ -10487,7 +9593,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
10487
9593
  MDB_txn *txn = NULL;
10488
9594
  mdb_mutexref_t wmutex = NULL;
10489
9595
  int rc;
10490
- mdb_size_t wsize, w3;
9596
+ size_t wsize, w3;
10491
9597
  char *ptr;
10492
9598
  #ifdef _WIN32
10493
9599
  DWORD len, w2;
@@ -10548,7 +9654,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
10548
9654
 
10549
9655
  w3 = txn->mt_next_pgno * env->me_psize;
10550
9656
  {
10551
- mdb_size_t fsize = 0;
9657
+ size_t fsize = 0;
10552
9658
  if ((rc = mdb_fsize(env->me_fd, &fsize)))
10553
9659
  goto leave;
10554
9660
  if (w3 > fsize)
@@ -10576,7 +9682,7 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
10576
9682
  }
10577
9683
 
10578
9684
  leave:
10579
- mdb_txn_abort(txn);
9685
+ _mdb_txn_abort(txn);
10580
9686
  return rc;
10581
9687
  }
10582
9688
 
@@ -10791,6 +9897,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
10791
9897
  }
10792
9898
  }
10793
9899
  mdb_default_cmp(txn, MAIN_DBI);
9900
+ MDB_TRACE(("%p, (null), %u = %u", txn, flags, MAIN_DBI));
10794
9901
  return MDB_SUCCESS;
10795
9902
  }
10796
9903
 
@@ -10852,7 +9959,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
10852
9959
  dummy.md_root = P_INVALID;
10853
9960
  dummy.md_flags = flags & PERSISTENT_FLAGS;
10854
9961
  WITH_CURSOR_TRACKING(mc,
10855
- rc = mdb_cursor_put(&mc, &key, &data, F_SUBDATA));
9962
+ rc = _mdb_cursor_put(&mc, &key, &data, F_SUBDATA));
10856
9963
  dbflag |= DB_DIRTY;
10857
9964
  }
10858
9965
 
@@ -10877,6 +9984,7 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
10877
9984
  if (!unused) {
10878
9985
  txn->mt_numdbs++;
10879
9986
  }
9987
+ MDB_TRACE(("%p, %s, %u = %u", txn, name, flags, slot));
10880
9988
  }
10881
9989
 
10882
9990
  return rc;
@@ -10908,6 +10016,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
10908
10016
  ptr = env->me_dbxs[dbi].md_name.mv_data;
10909
10017
  /* If there was no name, this was already closed */
10910
10018
  if (ptr) {
10019
+ MDB_TRACE(("%p, %u", env, dbi));
10911
10020
  env->me_dbxs[dbi].md_name.mv_data = NULL;
10912
10021
  env->me_dbxs[dbi].md_name.mv_size = 0;
10913
10022
  env->me_dbflags[dbi] = 0;
@@ -10952,11 +10061,6 @@ mdb_drop0(MDB_cursor *mc, int subs)
10952
10061
  mdb_cursor_pop(mc);
10953
10062
 
10954
10063
  mdb_cursor_copy(mc, &mx);
10955
- #ifdef MDB_VL32
10956
- /* bump refcount for mx's pages */
10957
- for (i=0; i<mc->mc_snum; i++)
10958
- mdb_page_get(&mx, mc->mc_pg[i]->mp_pgno, &mx.mc_pg[i], NULL);
10959
- #endif
10960
10064
  while (mc->mc_snum > 0) {
10961
10065
  MDB_page *mp = mc->mc_pg[mc->mc_top];
10962
10066
  unsigned n = NUMKEYS(mp);
@@ -11022,8 +10126,6 @@ pop:
11022
10126
  done:
11023
10127
  if (rc)
11024
10128
  txn->mt_flags |= MDB_TXN_ERROR;
11025
- /* drop refcount for mx's pages */
11026
- MDB_CURSOR_UNREF(&mx, 0);
11027
10129
  } else if (rc == MDB_NOTFOUND) {
11028
10130
  rc = MDB_SUCCESS;
11029
10131
  }
@@ -11049,6 +10151,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
11049
10151
  if (rc)
11050
10152
  return rc;
11051
10153
 
10154
+ MDB_TRACE(("%u, %d", dbi, del));
11052
10155
  rc = mdb_drop0(mc, mc->mc_db->md_flags & MDB_DUPSORT);
11053
10156
  /* Invalidate the dropped DB's cursors */
11054
10157
  for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
@@ -11143,7 +10246,7 @@ mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
11143
10246
  if (mr[i].mr_pid) {
11144
10247
  txnid_t txnid = mr[i].mr_txnid;
11145
10248
  sprintf(buf, txnid == (txnid_t)-1 ?
11146
- "%10d %"Z"x -\n" : "%10d %"Z"x %"Yu"\n",
10249
+ "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n",
11147
10250
  (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid);
11148
10251
  if (first) {
11149
10252
  first = 0;
@@ -11248,7 +10351,7 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
11248
10351
  }
11249
10352
  for (; j<rdrs; j++)
11250
10353
  if (mr[j].mr_pid == pid) {
11251
- DPRINTF(("clear stale reader pid %u txn %"Yd,
10354
+ DPRINTF(("clear stale reader pid %u txn %"Z"d",
11252
10355
  (unsigned) pid, mr[j].mr_txnid));
11253
10356
  mr[j].mr_pid = 0;
11254
10357
  count++;