lmdb 0.4.5 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/lmdb_ext/liblmdb/CHANGES +22 -0
- data/ext/lmdb_ext/liblmdb/lmdb.h +111 -39
- data/ext/lmdb_ext/liblmdb/mdb.c +1165 -383
- data/ext/lmdb_ext/liblmdb/midl.c +16 -2
- data/ext/lmdb_ext/liblmdb/midl.h +12 -3
- data/ext/lmdb_ext/lmdb_ext.c +88 -4
- data/lib/lmdb/version.rb +1 -1
- data/spec/lmdb_spec.rb +36 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cfbe0e0fc20cfe471e48ab16b7a6f03a868f5fa0
|
4
|
+
data.tar.gz: cd1e90a95ee5eef3bf2bfe0f0db3e389feb22a51
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23b7d820ead899db18c95d87e819b9d8a533a118a86bffe2bd996cd1352ca73bf8c3399150ed7b4fdaea7a5e4ab1901bc3844e14d25672874744572d383a1985
|
7
|
+
data.tar.gz: e3d13fe0515fd9ca626ed81526b3be34a4b9f0affe73afef4862033bb8b124c1c84687115f8f5f40a3524a4308d44ac86e877113362a6b08e87f002f081052b5
|
@@ -1,5 +1,27 @@
|
|
1
1
|
LMDB 0.9 Change Log
|
2
2
|
|
3
|
+
LMDB 0.9.14 Release (2014/09/15)
|
4
|
+
Fix to support 64K page size (ITS#7713)
|
5
|
+
Fix to persist decreased as well as increased mapsizes (ITS#7789)
|
6
|
+
Fix cursor bug when deleting last node of a DUPSORT key
|
7
|
+
Fix mdb_env_info to return FIXEDMAP address
|
8
|
+
Fix ambiguous error code from writing to closed DBI (ITS#7825)
|
9
|
+
Fix mdb_copy copying past end of file (ITS#7886)
|
10
|
+
Fix cursor bugs from page_merge/rebalance
|
11
|
+
Fix to dirty fewer pages in deletes (mdb_page_loose())
|
12
|
+
Fix mdb_dbi_open creating subDBs (ITS#7917)
|
13
|
+
Fix mdb_cursor_get(_DUP) with single value (ITS#7913)
|
14
|
+
Fix Windows compat issues in mtests (ITS#7879)
|
15
|
+
Add compacting variant of mdb_copy
|
16
|
+
Add BigEndian integer key compare code
|
17
|
+
Add mdb_dump/mdb_load utilities
|
18
|
+
|
19
|
+
LMDB 0.9.13 Release (2014/06/18)
|
20
|
+
Fix mdb_page_alloc unlimited overflow page search
|
21
|
+
Documentation
|
22
|
+
Re-fix MDB_CURRENT doc (ITS#7793)
|
23
|
+
Fix MDB_GET_MULTIPLE/MDB_NEXT_MULTIPLE doc
|
24
|
+
|
3
25
|
LMDB 0.9.12 Release (2014/06/13)
|
4
26
|
Fix MDB_GET_BOTH regression (ITS#7875,#7681)
|
5
27
|
Fix MDB_MULTIPLE writing multiple keys (ITS#7834)
|
data/ext/lmdb_ext/liblmdb/lmdb.h
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
/** @file lmdb.h
|
2
2
|
* @brief Lightning memory-mapped database library
|
3
3
|
*
|
4
|
-
* @mainpage Lightning Memory-Mapped Database Manager (
|
4
|
+
* @mainpage Lightning Memory-Mapped Database Manager (LMDB)
|
5
5
|
*
|
6
6
|
* @section intro_sec Introduction
|
7
|
-
*
|
7
|
+
* LMDB is a Btree-based database management library modeled loosely on the
|
8
8
|
* BerkeleyDB API, but much simplified. The entire database is exposed
|
9
9
|
* in a memory map, and all data fetches return data directly
|
10
10
|
* from the mapped memory, so no malloc's or memcpy's occur during
|
@@ -26,10 +26,10 @@
|
|
26
26
|
* readers, and readers don't block writers.
|
27
27
|
*
|
28
28
|
* Unlike other well-known database mechanisms which use either write-ahead
|
29
|
-
* transaction logs or append-only data writes,
|
29
|
+
* transaction logs or append-only data writes, LMDB requires no maintenance
|
30
30
|
* during operation. Both write-ahead loggers and append-only databases
|
31
31
|
* require periodic checkpointing and/or compaction of their log or database
|
32
|
-
* files otherwise they grow without bound.
|
32
|
+
* files otherwise they grow without bound. LMDB tracks free pages within
|
33
33
|
* the database and re-uses them for new write operations, so the database
|
34
34
|
* size does not grow without bound in normal use.
|
35
35
|
*
|
@@ -49,7 +49,7 @@
|
|
49
49
|
* stale locks can block further operation.
|
50
50
|
*
|
51
51
|
* Fix: Check for stale readers periodically, using the
|
52
|
-
* #mdb_reader_check function or the mdb_stat tool. Or just
|
52
|
+
* #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. Or just
|
53
53
|
* make all programs using the database close it; the lockfile
|
54
54
|
* is always reset on first open of the environment.
|
55
55
|
*
|
@@ -86,7 +86,7 @@
|
|
86
86
|
*
|
87
87
|
* - Use an MDB_env* in the process which opened it, without fork()ing.
|
88
88
|
*
|
89
|
-
* - Do not have open an
|
89
|
+
* - Do not have open an LMDB database twice in the same process at
|
90
90
|
* the same time. Not even from a plain open() call - close()ing it
|
91
91
|
* breaks flock() advisory locking.
|
92
92
|
*
|
@@ -109,7 +109,7 @@
|
|
109
109
|
* - If you do that anyway, do a periodic check for stale readers. Or
|
110
110
|
* close the environment once in a while, so the lockfile can get reset.
|
111
111
|
*
|
112
|
-
* - Do not use
|
112
|
+
* - Do not use LMDB databases on remote filesystems, even between
|
113
113
|
* processes on the same host. This breaks flock() on some OSes,
|
114
114
|
* possibly memory map sync, and certainly sync between programs
|
115
115
|
* on different hosts.
|
@@ -172,7 +172,7 @@ typedef void *mdb_filehandle_t;
|
|
172
172
|
typedef int mdb_filehandle_t;
|
173
173
|
#endif
|
174
174
|
|
175
|
-
/** @defgroup mdb
|
175
|
+
/** @defgroup mdb LMDB API
|
176
176
|
* @{
|
177
177
|
* @brief OpenLDAP Lightning Memory-Mapped Database Manager
|
178
178
|
*/
|
@@ -184,7 +184,7 @@ typedef int mdb_filehandle_t;
|
|
184
184
|
/** Library minor version */
|
185
185
|
#define MDB_VERSION_MINOR 9
|
186
186
|
/** Library patch version */
|
187
|
-
#define MDB_VERSION_PATCH
|
187
|
+
#define MDB_VERSION_PATCH 14
|
188
188
|
|
189
189
|
/** Combine args a,b,c into a single integer for easy version comparisons */
|
190
190
|
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
|
@@ -194,10 +194,10 @@ typedef int mdb_filehandle_t;
|
|
194
194
|
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
|
195
195
|
|
196
196
|
/** The release date of this library version */
|
197
|
-
#define MDB_VERSION_DATE "
|
197
|
+
#define MDB_VERSION_DATE "September 15, 2014"
|
198
198
|
|
199
199
|
/** A stringifier for the version info */
|
200
|
-
#define MDB_VERSTR(a,b,c,d) "
|
200
|
+
#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")"
|
201
201
|
|
202
202
|
/** A helper for the stringifier macro */
|
203
203
|
#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d)
|
@@ -333,6 +333,15 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
|
|
333
333
|
#define MDB_MULTIPLE 0x80000
|
334
334
|
/* @} */
|
335
335
|
|
336
|
+
/** @defgroup mdb_copy Copy Flags
|
337
|
+
* @{
|
338
|
+
*/
|
339
|
+
/** Compacting copy: Omit free space from copy, and renumber all
|
340
|
+
* pages sequentially.
|
341
|
+
*/
|
342
|
+
#define MDB_CP_COMPACT 0x01
|
343
|
+
/* @} */
|
344
|
+
|
336
345
|
/** @brief Cursor Get operations.
|
337
346
|
*
|
338
347
|
* This is the set of all operations for retrieving data
|
@@ -345,16 +354,18 @@ typedef enum MDB_cursor_op {
|
|
345
354
|
MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */
|
346
355
|
MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */
|
347
356
|
MDB_GET_CURRENT, /**< Return key/data at current cursor position */
|
348
|
-
MDB_GET_MULTIPLE, /**< Return
|
349
|
-
|
357
|
+
MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items
|
358
|
+
from current cursor position. Move cursor to prepare
|
359
|
+
for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
|
350
360
|
MDB_LAST, /**< Position at last key/data item */
|
351
361
|
MDB_LAST_DUP, /**< Position at last data item of current key.
|
352
362
|
Only for #MDB_DUPSORT */
|
353
363
|
MDB_NEXT, /**< Position at next data item */
|
354
364
|
MDB_NEXT_DUP, /**< Position at next data item of current key.
|
355
365
|
Only for #MDB_DUPSORT */
|
356
|
-
MDB_NEXT_MULTIPLE, /**< Return
|
357
|
-
cursor position.
|
366
|
+
MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items
|
367
|
+
from next cursor position. Move cursor to prepare
|
368
|
+
for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
|
358
369
|
MDB_NEXT_NODUP, /**< Position at first data item of next key */
|
359
370
|
MDB_PREV, /**< Position at previous data item */
|
360
371
|
MDB_PREV_DUP, /**< Position at previous data item of current key.
|
@@ -384,7 +395,7 @@ typedef enum MDB_cursor_op {
|
|
384
395
|
#define MDB_PANIC (-30795)
|
385
396
|
/** Environment version mismatch */
|
386
397
|
#define MDB_VERSION_MISMATCH (-30794)
|
387
|
-
/** File is not a valid
|
398
|
+
/** File is not a valid LMDB file */
|
388
399
|
#define MDB_INVALID (-30793)
|
389
400
|
/** Environment mapsize reached */
|
390
401
|
#define MDB_MAP_FULL (-30792)
|
@@ -410,7 +421,10 @@ typedef enum MDB_cursor_op {
|
|
410
421
|
#define MDB_BAD_TXN (-30782)
|
411
422
|
/** Unsupported size of key/DB name/data, or wrong DUPFIXED size */
|
412
423
|
#define MDB_BAD_VALSIZE (-30781)
|
413
|
-
|
424
|
+
/** The specified DBI was changed unexpectedly */
|
425
|
+
#define MDB_BAD_DBI (-30780)
|
426
|
+
/** The last defined error code */
|
427
|
+
#define MDB_LAST_ERRCODE MDB_BAD_DBI
|
414
428
|
/** @} */
|
415
429
|
|
416
430
|
/** @brief Statistics for a database in the environment */
|
@@ -434,7 +448,7 @@ typedef struct MDB_envinfo {
|
|
434
448
|
unsigned int me_numreaders; /**< max reader slots used in the environment */
|
435
449
|
} MDB_envinfo;
|
436
450
|
|
437
|
-
/** @brief Return the
|
451
|
+
/** @brief Return the LMDB library version information.
|
438
452
|
*
|
439
453
|
* @param[out] major if non-NULL, the library major version number is copied here
|
440
454
|
* @param[out] minor if non-NULL, the library minor version number is copied here
|
@@ -448,14 +462,14 @@ char *mdb_version(int *major, int *minor, int *patch);
|
|
448
462
|
* This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3)
|
449
463
|
* function. If the error code is greater than or equal to 0, then the string
|
450
464
|
* returned by the system function strerror(3) is returned. If the error code
|
451
|
-
* is less than 0, an error string corresponding to the
|
452
|
-
* returned. See @ref errors for a list of
|
465
|
+
* is less than 0, an error string corresponding to the LMDB library error is
|
466
|
+
* returned. See @ref errors for a list of LMDB-specific error codes.
|
453
467
|
* @param[in] err The error code
|
454
468
|
* @retval "error message" The description of the error
|
455
469
|
*/
|
456
470
|
char *mdb_strerror(int err);
|
457
471
|
|
458
|
-
/** @brief Create an
|
472
|
+
/** @brief Create an LMDB environment handle.
|
459
473
|
*
|
460
474
|
* This function allocates memory for a #MDB_env structure. To release
|
461
475
|
* the allocated memory and discard the handle, call #mdb_env_close().
|
@@ -488,15 +502,15 @@ int mdb_env_create(MDB_env **env);
|
|
488
502
|
* how the operating system has allocated memory to shared libraries and other uses.
|
489
503
|
* The feature is highly experimental.
|
490
504
|
* <li>#MDB_NOSUBDIR
|
491
|
-
* By default,
|
505
|
+
* By default, LMDB creates its environment in a directory whose
|
492
506
|
* pathname is given in \b path, and creates its data and lock files
|
493
507
|
* under that directory. With this option, \b path is used as-is for
|
494
508
|
* the database main data file. The database lock file is the \b path
|
495
509
|
* with "-lock" appended.
|
496
510
|
* <li>#MDB_RDONLY
|
497
511
|
* Open the environment in read-only mode. No write operations will be
|
498
|
-
* allowed.
|
499
|
-
* filesystems, where
|
512
|
+
* allowed. LMDB will still modify the lock file - except on read-only
|
513
|
+
* filesystems, where LMDB does not use locks.
|
500
514
|
* <li>#MDB_WRITEMAP
|
501
515
|
* Use a writeable memory map unless MDB_RDONLY is set. This is faster
|
502
516
|
* and uses fewer mallocs, but loses protection from application bugs
|
@@ -540,7 +554,7 @@ int mdb_env_create(MDB_env **env);
|
|
540
554
|
* the user synchronizes its use. Applications that multiplex many
|
541
555
|
* user threads over individual OS threads need this option. Such an
|
542
556
|
* application must also serialize the write transactions in an OS
|
543
|
-
* thread, since
|
557
|
+
* thread, since LMDB's write locking is unaware of the user threads.
|
544
558
|
* <li>#MDB_NOLOCK
|
545
559
|
* Don't do any locking. If concurrent access is anticipated, the
|
546
560
|
* caller must manage all concurrency itself. For proper operation
|
@@ -579,7 +593,7 @@ int mdb_env_create(MDB_env **env);
|
|
579
593
|
* @return A non-zero error value on failure and 0 on success. Some possible
|
580
594
|
* errors are:
|
581
595
|
* <ul>
|
582
|
-
* <li>#MDB_VERSION_MISMATCH - the version of the
|
596
|
+
* <li>#MDB_VERSION_MISMATCH - the version of the LMDB library doesn't match the
|
583
597
|
* version that created the database environment.
|
584
598
|
* <li>#MDB_INVALID - the environment file headers are corrupted.
|
585
599
|
* <li>ENOENT - the directory specified by the path parameter doesn't exist.
|
@@ -589,7 +603,7 @@ int mdb_env_create(MDB_env **env);
|
|
589
603
|
*/
|
590
604
|
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);
|
591
605
|
|
592
|
-
/** @brief Copy an
|
606
|
+
/** @brief Copy an LMDB environment to the specified path.
|
593
607
|
*
|
594
608
|
* This function may be used to make a backup of an existing environment.
|
595
609
|
* No lockfile is created, since it gets recreated at need.
|
@@ -605,7 +619,7 @@ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t
|
|
605
619
|
*/
|
606
620
|
int mdb_env_copy(MDB_env *env, const char *path);
|
607
621
|
|
608
|
-
/** @brief Copy an
|
622
|
+
/** @brief Copy an LMDB environment to the specified file descriptor.
|
609
623
|
*
|
610
624
|
* This function may be used to make a backup of an existing environment.
|
611
625
|
* No lockfile is created, since it gets recreated at need.
|
@@ -620,7 +634,50 @@ int mdb_env_copy(MDB_env *env, const char *path);
|
|
620
634
|
*/
|
621
635
|
int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
|
622
636
|
|
623
|
-
/** @brief
|
637
|
+
/** @brief Copy an LMDB environment to the specified path, with options.
|
638
|
+
*
|
639
|
+
* This function may be used to make a backup of an existing environment.
|
640
|
+
* No lockfile is created, since it gets recreated at need.
|
641
|
+
* @note This call can trigger significant file size growth if run in
|
642
|
+
* parallel with write transactions, because it employs a read-only
|
643
|
+
* transaction. See long-lived transactions under @ref caveats_sec.
|
644
|
+
* @param[in] env An environment handle returned by #mdb_env_create(). It
|
645
|
+
* must have already been opened successfully.
|
646
|
+
* @param[in] path The directory in which the copy will reside. This
|
647
|
+
* directory must already exist and be writable but must otherwise be
|
648
|
+
* empty.
|
649
|
+
* @param[in] flags Special options for this operation. This parameter
|
650
|
+
* must be set to 0 or by bitwise OR'ing together one or more of the
|
651
|
+
* values described here.
|
652
|
+
* <ul>
|
653
|
+
* <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free
|
654
|
+
* pages and sequentially renumber all pages in output. This option
|
655
|
+
* consumes more CPU and runs more slowly than the default.
|
656
|
+
* </ul>
|
657
|
+
* @return A non-zero error value on failure and 0 on success.
|
658
|
+
*/
|
659
|
+
int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags);
|
660
|
+
|
661
|
+
/** @brief Copy an LMDB environment to the specified file descriptor,
|
662
|
+
* with options.
|
663
|
+
*
|
664
|
+
* This function may be used to make a backup of an existing environment.
|
665
|
+
* No lockfile is created, since it gets recreated at need. See
|
666
|
+
* #mdb_env_copy2() for further details.
|
667
|
+
* @note This call can trigger significant file size growth if run in
|
668
|
+
* parallel with write transactions, because it employs a read-only
|
669
|
+
* transaction. See long-lived transactions under @ref caveats_sec.
|
670
|
+
* @param[in] env An environment handle returned by #mdb_env_create(). It
|
671
|
+
* must have already been opened successfully.
|
672
|
+
* @param[in] fd The filedescriptor to write the copy to. It must
|
673
|
+
* have already been opened for Write access.
|
674
|
+
* @param[in] flags Special options for this operation.
|
675
|
+
* See #mdb_env_copy2() for options.
|
676
|
+
* @return A non-zero error value on failure and 0 on success.
|
677
|
+
*/
|
678
|
+
int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags);
|
679
|
+
|
680
|
+
/** @brief Return statistics about the LMDB environment.
|
624
681
|
*
|
625
682
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
626
683
|
* @param[out] stat The address of an #MDB_stat structure
|
@@ -628,7 +685,7 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
|
|
628
685
|
*/
|
629
686
|
int mdb_env_stat(MDB_env *env, MDB_stat *stat);
|
630
687
|
|
631
|
-
/** @brief Return information about the
|
688
|
+
/** @brief Return information about the LMDB environment.
|
632
689
|
*
|
633
690
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
634
691
|
* @param[out] stat The address of an #MDB_envinfo structure
|
@@ -639,7 +696,7 @@ int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
|
|
639
696
|
/** @brief Flush the data buffers to disk.
|
640
697
|
*
|
641
698
|
* Data is always written to disk when #mdb_txn_commit() is called,
|
642
|
-
* but the operating system may keep it buffered.
|
699
|
+
* but the operating system may keep it buffered. LMDB always flushes
|
643
700
|
* the OS buffers upon commit as well, unless the environment was
|
644
701
|
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
|
645
702
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
@@ -730,7 +787,13 @@ int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
|
|
730
787
|
* this process. Note that the library does not check for this condition,
|
731
788
|
* the caller must ensure it explicitly.
|
732
789
|
*
|
733
|
-
*
|
790
|
+
* The new size takes effect immediately for the current process but
|
791
|
+
* will not be persisted to any others until a write transaction has been
|
792
|
+
* committed by the current process. Also, only mapsize increases are
|
793
|
+
* persisted into the environment.
|
794
|
+
*
|
795
|
+
* If the mapsize is increased by another process, and data has grown
|
796
|
+
* beyond the range of the current mapsize, #mdb_txn_begin() will
|
734
797
|
* return #MDB_MAP_RESIZED. This function may be called with a size
|
735
798
|
* of zero to adopt the new size.
|
736
799
|
*
|
@@ -822,7 +885,7 @@ int mdb_env_set_userctx(MDB_env *env, void *ctx);
|
|
822
885
|
*/
|
823
886
|
void *mdb_env_get_userctx(MDB_env *env);
|
824
887
|
|
825
|
-
/** @brief A callback function for most
|
888
|
+
/** @brief A callback function for most LMDB assert() failures,
|
826
889
|
* called before printing the message and aborting.
|
827
890
|
*
|
828
891
|
* @param[in] env An environment handle returned by #mdb_env_create().
|
@@ -1204,7 +1267,7 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
|
|
1204
1267
|
* reserved space, which the caller can fill in later - before
|
1205
1268
|
* the next update operation or the transaction ends. This saves
|
1206
1269
|
* an extra memcpy if the data is being generated later.
|
1207
|
-
*
|
1270
|
+
* LMDB does nothing else with this memory, the caller is expected
|
1208
1271
|
* to modify all of the space requested.
|
1209
1272
|
* <li>#MDB_APPEND - append the given key/data pair to the end of the
|
1210
1273
|
* database. No key comparisons are performed. This option allows
|
@@ -1345,11 +1408,12 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
|
|
1345
1408
|
* @param[in] flags Options for this operation. This parameter
|
1346
1409
|
* must be set to 0 or one of the values described here.
|
1347
1410
|
* <ul>
|
1348
|
-
* <li>#MDB_CURRENT -
|
1349
|
-
*
|
1350
|
-
*
|
1351
|
-
*
|
1352
|
-
*
|
1411
|
+
* <li>#MDB_CURRENT - replace the item at the current cursor position.
|
1412
|
+
* The \b key parameter must still be provided, and must match it.
|
1413
|
+
* If using sorted duplicates (#MDB_DUPSORT) the data item must still
|
1414
|
+
* sort into the same place. This is intended to be used when the
|
1415
|
+
* new data is the same size as the old. Otherwise it will simply
|
1416
|
+
* perform a delete of the old record followed by an insert.
|
1353
1417
|
* <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
|
1354
1418
|
* already appear in the database. This flag may only be specified
|
1355
1419
|
* if the database was opened with #MDB_DUPSORT. The function will
|
@@ -1478,4 +1542,12 @@ int mdb_reader_check(MDB_env *env, int *dead);
|
|
1478
1542
|
#ifdef __cplusplus
|
1479
1543
|
}
|
1480
1544
|
#endif
|
1545
|
+
/** @page tools LMDB Command Line Tools
|
1546
|
+
The following describes the command line tools that are available for LMDB.
|
1547
|
+
\li \ref mdb_copy_1
|
1548
|
+
\li \ref mdb_dump_1
|
1549
|
+
\li \ref mdb_load_1
|
1550
|
+
\li \ref mdb_stat_1
|
1551
|
+
*/
|
1552
|
+
|
1481
1553
|
#endif /* _LMDB_H_ */
|
data/ext/lmdb_ext/liblmdb/mdb.c
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
/** @file mdb.c
|
2
|
-
* @brief memory-mapped database library
|
2
|
+
* @brief Lightning memory-mapped database library
|
3
3
|
*
|
4
4
|
* A Btree-based database management library modeled loosely on the
|
5
5
|
* BerkeleyDB API, but much simplified.
|
6
6
|
*/
|
7
7
|
/*
|
8
|
-
* Copyright 2011-
|
8
|
+
* Copyright 2011-2014 Howard Chu, Symas Corp.
|
9
9
|
* All rights reserved.
|
10
10
|
*
|
11
11
|
* Redistribution and use in source and binary forms, with or without
|
@@ -35,15 +35,17 @@
|
|
35
35
|
#ifndef _GNU_SOURCE
|
36
36
|
#define _GNU_SOURCE 1
|
37
37
|
#endif
|
38
|
-
#include <sys/types.h>
|
39
|
-
#include <sys/stat.h>
|
40
38
|
#ifdef _WIN32
|
39
|
+
#include <malloc.h>
|
41
40
|
#include <windows.h>
|
42
41
|
/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
|
43
42
|
* as int64 which is wrong. MSVC doesn't define it at all, so just
|
44
43
|
* don't use it.
|
45
44
|
*/
|
46
45
|
#define MDB_PID_T int
|
46
|
+
#define MDB_THR_T DWORD
|
47
|
+
#include <sys/types.h>
|
48
|
+
#include <sys/stat.h>
|
47
49
|
#ifdef __GNUC__
|
48
50
|
# include <sys/param.h>
|
49
51
|
#else
|
@@ -55,7 +57,10 @@
|
|
55
57
|
# endif
|
56
58
|
#endif
|
57
59
|
#else
|
60
|
+
#include <sys/types.h>
|
61
|
+
#include <sys/stat.h>
|
58
62
|
#define MDB_PID_T pid_t
|
63
|
+
#define MDB_THR_T pthread_t
|
59
64
|
#include <sys/param.h>
|
60
65
|
#include <sys/uio.h>
|
61
66
|
#include <sys/mman.h>
|
@@ -65,6 +70,16 @@
|
|
65
70
|
#include <fcntl.h>
|
66
71
|
#endif
|
67
72
|
|
73
|
+
#if defined(__mips) && defined(__linux)
|
74
|
+
/* MIPS has cache coherency issues, requires explicit cache control */
|
75
|
+
#include <asm/cachectl.h>
|
76
|
+
extern int cacheflush(char *addr, int nbytes, int cache);
|
77
|
+
#define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache)
|
78
|
+
#else
|
79
|
+
#define CACHEFLUSH(addr, bytes, cache)
|
80
|
+
#endif
|
81
|
+
|
82
|
+
|
68
83
|
#include <errno.h>
|
69
84
|
#include <limits.h>
|
70
85
|
#include <stddef.h>
|
@@ -75,6 +90,12 @@
|
|
75
90
|
#include <time.h>
|
76
91
|
#include <unistd.h>
|
77
92
|
|
93
|
+
#if defined(__sun)
|
94
|
+
/* Most platforms have posix_memalign, older may only have memalign */
|
95
|
+
#define HAVE_MEMALIGN 1
|
96
|
+
#include <malloc.h>
|
97
|
+
#endif
|
98
|
+
|
78
99
|
#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER))
|
79
100
|
#include <netinet/in.h>
|
80
101
|
#include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */
|
@@ -145,7 +166,18 @@
|
|
145
166
|
# error "Two's complement, reasonably sized integer types, please"
|
146
167
|
#endif
|
147
168
|
|
148
|
-
|
169
|
+
#ifdef __GNUC__
|
170
|
+
/** Put infrequently used env functions in separate section */
|
171
|
+
# ifdef __APPLE__
|
172
|
+
# define ESECT __attribute__ ((section("__TEXT,text_env")))
|
173
|
+
# else
|
174
|
+
# define ESECT __attribute__ ((section("text_env")))
|
175
|
+
# endif
|
176
|
+
#else
|
177
|
+
#define ESECT
|
178
|
+
#endif
|
179
|
+
|
180
|
+
/** @defgroup internal LMDB Internals
|
149
181
|
* @{
|
150
182
|
*/
|
151
183
|
/** @defgroup compat Compatibility Macros
|
@@ -156,6 +188,11 @@
|
|
156
188
|
* @{
|
157
189
|
*/
|
158
190
|
|
191
|
+
/** Features under development */
|
192
|
+
#ifndef MDB_DEVEL
|
193
|
+
#define MDB_DEVEL 0
|
194
|
+
#endif
|
195
|
+
|
159
196
|
/** Wrapper around __func__, which is a C99 feature */
|
160
197
|
#if __STDC_VERSION__ >= 199901L
|
161
198
|
# define mdb_func_ __func__
|
@@ -169,8 +206,10 @@
|
|
169
206
|
#ifdef _WIN32
|
170
207
|
#define MDB_USE_HASH 1
|
171
208
|
#define MDB_PIDLOCK 0
|
172
|
-
#define
|
209
|
+
#define THREAD_RET DWORD
|
210
|
+
#define pthread_t HANDLE
|
173
211
|
#define pthread_mutex_t HANDLE
|
212
|
+
#define pthread_cond_t HANDLE
|
174
213
|
#define pthread_key_t DWORD
|
175
214
|
#define pthread_self() GetCurrentThreadId()
|
176
215
|
#define pthread_key_create(x,y) \
|
@@ -178,12 +217,16 @@
|
|
178
217
|
#define pthread_key_delete(x) TlsFree(x)
|
179
218
|
#define pthread_getspecific(x) TlsGetValue(x)
|
180
219
|
#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode())
|
181
|
-
#define pthread_mutex_unlock(x) ReleaseMutex(x)
|
182
|
-
#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE)
|
183
|
-
#define
|
184
|
-
#define
|
185
|
-
#define
|
186
|
-
#define
|
220
|
+
#define pthread_mutex_unlock(x) ReleaseMutex(*x)
|
221
|
+
#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE)
|
222
|
+
#define pthread_cond_signal(x) SetEvent(*x)
|
223
|
+
#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0)
|
224
|
+
#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL)
|
225
|
+
#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE)
|
226
|
+
#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex)
|
227
|
+
#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex)
|
228
|
+
#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex)
|
229
|
+
#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex)
|
187
230
|
#define getpid() GetCurrentProcessId()
|
188
231
|
#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd))
|
189
232
|
#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len))
|
@@ -198,7 +241,9 @@
|
|
198
241
|
#endif
|
199
242
|
#define Z "I"
|
200
243
|
#else
|
201
|
-
|
244
|
+
#define THREAD_RET void *
|
245
|
+
#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg)
|
246
|
+
#define THREAD_FINISH(thr) pthread_join(thr,NULL)
|
202
247
|
#define Z "z" /**< printf format modifier for size_t */
|
203
248
|
|
204
249
|
/** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */
|
@@ -352,7 +397,8 @@ static txnid_t mdb_debug_start;
|
|
352
397
|
|
353
398
|
/** @brief The maximum size of a database page.
|
354
399
|
*
|
355
|
-
*
|
400
|
+
* It is 32k or 64k, since value-PAGEBASE must fit in
|
401
|
+
* #MDB_page.%mp_upper.
|
356
402
|
*
|
357
403
|
* LMDB will use database pages < OS pages if needed.
|
358
404
|
* That causes more I/O in write transactions: The OS must
|
@@ -365,7 +411,7 @@ static txnid_t mdb_debug_start;
|
|
365
411
|
* pressure from other processes is high. So until OSs have
|
366
412
|
* actual paging support for Huge pages, they're not viable.
|
367
413
|
*/
|
368
|
-
#define MAX_PAGESIZE 0x8000
|
414
|
+
#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000)
|
369
415
|
|
370
416
|
/** The minimum number of keys required in a database page.
|
371
417
|
* Setting this to a larger value will place a smaller bound on the
|
@@ -381,14 +427,14 @@ static txnid_t mdb_debug_start;
|
|
381
427
|
*/
|
382
428
|
#define MDB_MINKEYS 2
|
383
429
|
|
384
|
-
/** A stamp that identifies a file as an
|
430
|
+
/** A stamp that identifies a file as an LMDB file.
|
385
431
|
* There's nothing special about this value other than that it is easily
|
386
432
|
* recognizable, and it will reflect any byte order mismatches.
|
387
433
|
*/
|
388
434
|
#define MDB_MAGIC 0xBEEFC0DE
|
389
435
|
|
390
436
|
/** The version number for a database's datafile format. */
|
391
|
-
#define MDB_DATA_VERSION 1
|
437
|
+
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
|
392
438
|
/** The version number for a database's lockfile format. */
|
393
439
|
#define MDB_LOCK_VERSION 1
|
394
440
|
|
@@ -397,13 +443,14 @@ static txnid_t mdb_debug_start;
|
|
397
443
|
* Define this as 0 to compute the max from the page size. 511
|
398
444
|
* is default for backwards compat: liblmdb <= 0.9.10 can break
|
399
445
|
* when modifying a DB with keys/dupsort data bigger than its max.
|
446
|
+
* #MDB_DEVEL sets the default to 0.
|
400
447
|
*
|
401
448
|
* Data items in an #MDB_DUPSORT database are also limited to
|
402
449
|
* this size, since they're actually keys of a sub-DB. Keys and
|
403
450
|
* #MDB_DUPSORT data items must fit on a node in a regular page.
|
404
451
|
*/
|
405
452
|
#ifndef MDB_MAXKEYSIZE
|
406
|
-
#define MDB_MAXKEYSIZE 511
|
453
|
+
#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511)
|
407
454
|
#endif
|
408
455
|
|
409
456
|
/** The maximum size of a key we can write to the environment. */
|
@@ -537,7 +584,7 @@ typedef struct MDB_rxbody {
|
|
537
584
|
/** The process ID of the process owning this reader txn. */
|
538
585
|
MDB_PID_T mrb_pid;
|
539
586
|
/** The thread ID of the thread owning this txn. */
|
540
|
-
|
587
|
+
MDB_THR_T mrb_tid;
|
541
588
|
} MDB_rxbody;
|
542
589
|
|
543
590
|
/** The actual reader record, with cacheline padding. */
|
@@ -568,7 +615,7 @@ typedef struct MDB_reader {
|
|
568
615
|
* unlikely. If a collision occurs, the results are unpredictable.
|
569
616
|
*/
|
570
617
|
typedef struct MDB_txbody {
|
571
|
-
/** Stamp identifying this as an
|
618
|
+
/** Stamp identifying this as an LMDB file. It must be set
|
572
619
|
* to #MDB_MAGIC. */
|
573
620
|
uint32_t mtb_magic;
|
574
621
|
/** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */
|
@@ -635,7 +682,7 @@ typedef struct MDB_page {
|
|
635
682
|
#define mp_next mp_p.p_next
|
636
683
|
union {
|
637
684
|
pgno_t p_pgno; /**< page number */
|
638
|
-
|
685
|
+
struct MDB_page *p_next; /**< for in-memory list of freed pages */
|
639
686
|
} mp_p;
|
640
687
|
uint16_t mp_pad;
|
641
688
|
/** @defgroup mdb_page Page Flags
|
@@ -650,6 +697,7 @@ typedef struct MDB_page {
|
|
650
697
|
#define P_DIRTY 0x10 /**< dirty page, also set for #P_SUBP pages */
|
651
698
|
#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */
|
652
699
|
#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */
|
700
|
+
#define P_LOOSE 0x4000 /**< page was dirtied then freed, can be reused */
|
653
701
|
#define P_KEEP 0x8000 /**< leave this page alone during spill */
|
654
702
|
/** @} */
|
655
703
|
uint16_t mp_flags; /**< @ref mdb_page */
|
@@ -672,8 +720,11 @@ typedef struct MDB_page {
|
|
672
720
|
/** Address of first usable data byte in a page, after the header */
|
673
721
|
#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ))
|
674
722
|
|
723
|
+
/** ITS#7713, change PAGEBASE to handle 65536 byte pages */
|
724
|
+
#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0)
|
725
|
+
|
675
726
|
/** Number of nodes on a page */
|
676
|
-
#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1)
|
727
|
+
#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1)
|
677
728
|
|
678
729
|
/** The amount of space remaining in the page */
|
679
730
|
#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower)
|
@@ -700,6 +751,9 @@ typedef struct MDB_page {
|
|
700
751
|
/** The number of overflow pages needed to store the given size. */
|
701
752
|
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
|
702
753
|
|
754
|
+
/** Link in #MDB_txn.%mt_loose_pgs list */
|
755
|
+
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2))
|
756
|
+
|
703
757
|
/** Header for a single key/data pair within a page.
|
704
758
|
* Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2.
|
705
759
|
* We guarantee 2-byte alignment for 'MDB_node's.
|
@@ -751,7 +805,7 @@ typedef struct MDB_node {
|
|
751
805
|
#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
|
752
806
|
|
753
807
|
/** Address of node \b i in page \b p */
|
754
|
-
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
|
808
|
+
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE))
|
755
809
|
|
756
810
|
/** Address of the key for the node */
|
757
811
|
#define NODEKEY(node) (void *)((node)->mn_data)
|
@@ -841,7 +895,7 @@ typedef struct MDB_db {
|
|
841
895
|
* Pages 0-1 are meta pages. Transaction N writes meta page #(N % 2).
|
842
896
|
*/
|
843
897
|
typedef struct MDB_meta {
|
844
|
-
/** Stamp identifying this as an
|
898
|
+
/** Stamp identifying this as an LMDB file. It must be set
|
845
899
|
* to #MDB_MAGIC. */
|
846
900
|
uint32_t mm_magic;
|
847
901
|
/** Version number of this lock file. Must be set to #MDB_DATA_VERSION. */
|
@@ -898,6 +952,12 @@ struct MDB_txn {
|
|
898
952
|
/** The list of pages that became unused during this transaction.
|
899
953
|
*/
|
900
954
|
MDB_IDL mt_free_pgs;
|
955
|
+
/** The list of loose pages that became unused and may be reused
|
956
|
+
* in this transaction, linked through #NEXT_LOOSE_PAGE(page).
|
957
|
+
*/
|
958
|
+
MDB_page *mt_loose_pgs;
|
959
|
+
/* #Number of loose pages (#mt_loose_pgs) */
|
960
|
+
int mt_loose_count;
|
901
961
|
/** The sorted list of dirty pages we temporarily wrote to disk
|
902
962
|
* because the dirty list was full. page numbers in here are
|
903
963
|
* shifted left by 1, deleted slots have the LSB set.
|
@@ -913,6 +973,8 @@ struct MDB_txn {
|
|
913
973
|
MDB_dbx *mt_dbxs;
|
914
974
|
/** Array of MDB_db records for each known DB */
|
915
975
|
MDB_db *mt_dbs;
|
976
|
+
/** Array of sequence numbers for each DB handle */
|
977
|
+
unsigned int *mt_dbiseqs;
|
916
978
|
/** @defgroup mt_dbflag Transaction DB Flags
|
917
979
|
* @ingroup internal
|
918
980
|
* @{
|
@@ -1048,12 +1110,15 @@ struct MDB_env {
|
|
1048
1110
|
MDB_meta *me_metas[2]; /**< pointers to the two meta pages */
|
1049
1111
|
void *me_pbuf; /**< scratch area for DUPSORT put() */
|
1050
1112
|
MDB_txn *me_txn; /**< current write transaction */
|
1113
|
+
MDB_txn *me_txn0; /**< prealloc'd write transaction */
|
1051
1114
|
size_t me_mapsize; /**< size of the data memory map */
|
1052
1115
|
off_t me_size; /**< current file size */
|
1053
1116
|
pgno_t me_maxpg; /**< me_mapsize / me_psize */
|
1054
1117
|
MDB_dbx *me_dbxs; /**< array of static DB info */
|
1055
1118
|
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
|
1119
|
+
unsigned int *me_dbiseqs; /**< array of dbi sequence numbers */
|
1056
1120
|
pthread_key_t me_txkey; /**< thread-key for readers */
|
1121
|
+
txnid_t me_pgoldest; /**< ID of oldest reader last time we looked */
|
1057
1122
|
MDB_pgstate me_pgstate; /**< state of old pages from freeDB */
|
1058
1123
|
# define me_pglast me_pgstate.mf_pglast
|
1059
1124
|
# define me_pghead me_pgstate.mf_pghead
|
@@ -1102,6 +1167,10 @@ typedef struct MDB_ntxn {
|
|
1102
1167
|
#define TXN_DBI_EXIST(txn, dbi) \
|
1103
1168
|
((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID))
|
1104
1169
|
|
1170
|
+
/** Check for misused \b dbi handles */
|
1171
|
+
#define TXN_DBI_CHANGED(txn, dbi) \
|
1172
|
+
((txn)->mt_dbiseqs[dbi] != (txn)->mt_env->me_dbiseqs[dbi])
|
1173
|
+
|
1105
1174
|
static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
|
1106
1175
|
static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
|
1107
1176
|
static int mdb_page_touch(MDB_cursor *mc);
|
@@ -1182,7 +1251,7 @@ mdb_version(int *major, int *minor, int *patch)
|
|
1182
1251
|
return MDB_VERSION_STRING;
|
1183
1252
|
}
|
1184
1253
|
|
1185
|
-
/** Table of descriptions for
|
1254
|
+
/** Table of descriptions for LMDB @ref errors */
|
1186
1255
|
static char *const mdb_errstr[] = {
|
1187
1256
|
"MDB_KEYEXIST: Key/data pair already exists",
|
1188
1257
|
"MDB_NOTFOUND: No matching key/data pair found",
|
@@ -1190,7 +1259,7 @@ static char *const mdb_errstr[] = {
|
|
1190
1259
|
"MDB_CORRUPTED: Located page was wrong type",
|
1191
1260
|
"MDB_PANIC: Update of meta page failed",
|
1192
1261
|
"MDB_VERSION_MISMATCH: Database environment version mismatch",
|
1193
|
-
"MDB_INVALID: File is not an
|
1262
|
+
"MDB_INVALID: File is not an LMDB file",
|
1194
1263
|
"MDB_MAP_FULL: Environment mapsize limit reached",
|
1195
1264
|
"MDB_DBS_FULL: Environment maxdbs limit reached",
|
1196
1265
|
"MDB_READERS_FULL: Environment maxreaders limit reached",
|
@@ -1203,11 +1272,20 @@ static char *const mdb_errstr[] = {
|
|
1203
1272
|
"MDB_BAD_RSLOT: Invalid reuse of reader locktable slot",
|
1204
1273
|
"MDB_BAD_TXN: Transaction cannot recover - it must be aborted",
|
1205
1274
|
"MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
|
1275
|
+
"MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
|
1206
1276
|
};
|
1207
1277
|
|
1208
1278
|
char *
|
1209
1279
|
mdb_strerror(int err)
|
1210
1280
|
{
|
1281
|
+
#ifdef _WIN32
|
1282
|
+
/** HACK: pad 4KB on stack over the buf. Return system msgs in buf.
|
1283
|
+
* This works as long as no function between the call to mdb_strerror
|
1284
|
+
* and the actual use of the message uses more than 4K of stack.
|
1285
|
+
*/
|
1286
|
+
char pad[4096];
|
1287
|
+
char buf[1024], *ptr = buf;
|
1288
|
+
#endif
|
1211
1289
|
int i;
|
1212
1290
|
if (!err)
|
1213
1291
|
return ("Successful return: 0");
|
@@ -1217,7 +1295,32 @@ mdb_strerror(int err)
|
|
1217
1295
|
return mdb_errstr[i];
|
1218
1296
|
}
|
1219
1297
|
|
1298
|
+
#ifdef _WIN32
|
1299
|
+
/* These are the C-runtime error codes we use. The comment indicates
|
1300
|
+
* their numeric value, and the Win32 error they would correspond to
|
1301
|
+
* if the error actually came from a Win32 API. A major mess, we should
|
1302
|
+
* have used LMDB-specific error codes for everything.
|
1303
|
+
*/
|
1304
|
+
switch(err) {
|
1305
|
+
case ENOENT: /* 2, FILE_NOT_FOUND */
|
1306
|
+
case EIO: /* 5, ACCESS_DENIED */
|
1307
|
+
case ENOMEM: /* 12, INVALID_ACCESS */
|
1308
|
+
case EACCES: /* 13, INVALID_DATA */
|
1309
|
+
case EBUSY: /* 16, CURRENT_DIRECTORY */
|
1310
|
+
case EINVAL: /* 22, BAD_COMMAND */
|
1311
|
+
case ENOSPC: /* 28, OUT_OF_PAPER */
|
1312
|
+
return strerror(err);
|
1313
|
+
default:
|
1314
|
+
;
|
1315
|
+
}
|
1316
|
+
buf[0] = 0;
|
1317
|
+
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM |
|
1318
|
+
FORMAT_MESSAGE_IGNORE_INSERTS,
|
1319
|
+
NULL, err, 0, ptr, sizeof(buf), pad);
|
1320
|
+
return ptr;
|
1321
|
+
#else
|
1220
1322
|
return strerror(err);
|
1323
|
+
#endif
|
1221
1324
|
}
|
1222
1325
|
|
1223
1326
|
/** assert(3) variant in cursor context */
|
@@ -1357,7 +1460,7 @@ mdb_page_list(MDB_page *mp)
|
|
1357
1460
|
total = EVEN(total);
|
1358
1461
|
}
|
1359
1462
|
fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
|
1360
|
-
IS_LEAF2(mp) ? PAGEHDRSZ : mp->mp_lower, total, SIZELEFT(mp));
|
1463
|
+
IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp));
|
1361
1464
|
}
|
1362
1465
|
|
1363
1466
|
void
|
@@ -1485,7 +1588,6 @@ mdb_page_malloc(MDB_txn *txn, unsigned num)
|
|
1485
1588
|
}
|
1486
1589
|
return ret;
|
1487
1590
|
}
|
1488
|
-
|
1489
1591
|
/** Free a single page.
|
1490
1592
|
* Saves single pages to a list, for future reuse.
|
1491
1593
|
* (This is not used for multi-page overflow pages.)
|
@@ -1525,6 +1627,62 @@ mdb_dlist_free(MDB_txn *txn)
|
|
1525
1627
|
dl[0].mid = 0;
|
1526
1628
|
}
|
1527
1629
|
|
1630
|
+
/** Loosen or free a single page.
|
1631
|
+
* Saves single pages to a list for future reuse
|
1632
|
+
* in this same txn. It has been pulled from the freeDB
|
1633
|
+
* and already resides on the dirty list, but has been
|
1634
|
+
* deleted. Use these pages first before pulling again
|
1635
|
+
* from the freeDB.
|
1636
|
+
*
|
1637
|
+
* If the page wasn't dirtied in this txn, just add it
|
1638
|
+
* to this txn's free list.
|
1639
|
+
*/
|
1640
|
+
static int
|
1641
|
+
mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
|
1642
|
+
{
|
1643
|
+
int loose = 0;
|
1644
|
+
pgno_t pgno = mp->mp_pgno;
|
1645
|
+
MDB_txn *txn = mc->mc_txn;
|
1646
|
+
|
1647
|
+
if ((mp->mp_flags & P_DIRTY) && mc->mc_dbi != FREE_DBI) {
|
1648
|
+
if (txn->mt_parent) {
|
1649
|
+
MDB_ID2 *dl = txn->mt_u.dirty_list;
|
1650
|
+
/* If txn has a parent, make sure the page is in our
|
1651
|
+
* dirty list.
|
1652
|
+
*/
|
1653
|
+
if (dl[0].mid) {
|
1654
|
+
unsigned x = mdb_mid2l_search(dl, pgno);
|
1655
|
+
if (x <= dl[0].mid && dl[x].mid == pgno) {
|
1656
|
+
if (mp != dl[x].mptr) { /* bad cursor? */
|
1657
|
+
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
1658
|
+
txn->mt_flags |= MDB_TXN_ERROR;
|
1659
|
+
return MDB_CORRUPTED;
|
1660
|
+
}
|
1661
|
+
/* ok, it's ours */
|
1662
|
+
loose = 1;
|
1663
|
+
}
|
1664
|
+
}
|
1665
|
+
} else {
|
1666
|
+
/* no parent txn, so it's just ours */
|
1667
|
+
loose = 1;
|
1668
|
+
}
|
1669
|
+
}
|
1670
|
+
if (loose) {
|
1671
|
+
DPRINTF(("loosen db %d page %"Z"u", DDBI(mc),
|
1672
|
+
mp->mp_pgno));
|
1673
|
+
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
|
1674
|
+
txn->mt_loose_pgs = mp;
|
1675
|
+
txn->mt_loose_count++;
|
1676
|
+
mp->mp_flags |= P_LOOSE;
|
1677
|
+
} else {
|
1678
|
+
int rc = mdb_midl_append(&txn->mt_free_pgs, pgno);
|
1679
|
+
if (rc)
|
1680
|
+
return rc;
|
1681
|
+
}
|
1682
|
+
|
1683
|
+
return MDB_SUCCESS;
|
1684
|
+
}
|
1685
|
+
|
1528
1686
|
/** Set or clear P_KEEP in dirty, non-overflow, non-sub pages watched by txn.
|
1529
1687
|
* @param[in] mc A cursor handle for the current operation.
|
1530
1688
|
* @param[in] pflags Flags of the pages to update:
|
@@ -1535,7 +1693,7 @@ mdb_dlist_free(MDB_txn *txn)
|
|
1535
1693
|
static int
|
1536
1694
|
mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
|
1537
1695
|
{
|
1538
|
-
enum { Mask = P_SUBP|P_DIRTY|P_KEEP };
|
1696
|
+
enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP };
|
1539
1697
|
MDB_txn *txn = mc->mc_txn;
|
1540
1698
|
MDB_cursor *m3;
|
1541
1699
|
MDB_xcursor *mx;
|
@@ -1686,7 +1844,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data)
|
|
1686
1844
|
for (i=dl[0].mid; i && need; i--) {
|
1687
1845
|
MDB_ID pn = dl[i].mid << 1;
|
1688
1846
|
dp = dl[i].mptr;
|
1689
|
-
if (dp->mp_flags & P_KEEP)
|
1847
|
+
if (dp->mp_flags & (P_LOOSE|P_KEEP))
|
1690
1848
|
continue;
|
1691
1849
|
/* Can't spill twice, make sure it's not already in a parent's
|
1692
1850
|
* spill list.
|
@@ -1790,15 +1948,27 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
1790
1948
|
#else
|
1791
1949
|
enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
|
1792
1950
|
#endif
|
1793
|
-
int rc, retry =
|
1951
|
+
int rc, retry = num * 60;
|
1794
1952
|
MDB_txn *txn = mc->mc_txn;
|
1795
1953
|
MDB_env *env = txn->mt_env;
|
1796
1954
|
pgno_t pgno, *mop = env->me_pghead;
|
1797
|
-
unsigned i, j,
|
1955
|
+
unsigned i, j, mop_len = mop ? mop[0] : 0, n2 = num-1;
|
1798
1956
|
MDB_page *np;
|
1799
1957
|
txnid_t oldest = 0, last;
|
1800
1958
|
MDB_cursor_op op;
|
1801
1959
|
MDB_cursor m2;
|
1960
|
+
int found_old = 0;
|
1961
|
+
|
1962
|
+
/* If there are any loose pages, just use them */
|
1963
|
+
if (num == 1 && txn->mt_loose_pgs) {
|
1964
|
+
np = txn->mt_loose_pgs;
|
1965
|
+
txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np);
|
1966
|
+
txn->mt_loose_count--;
|
1967
|
+
DPRINTF(("db %d use loose page %"Z"u", DDBI(mc),
|
1968
|
+
np->mp_pgno));
|
1969
|
+
*mp = np;
|
1970
|
+
return MDB_SUCCESS;
|
1971
|
+
}
|
1802
1972
|
|
1803
1973
|
*mp = NULL;
|
1804
1974
|
|
@@ -1811,7 +1981,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
1811
1981
|
for (op = MDB_FIRST;; op = MDB_NEXT) {
|
1812
1982
|
MDB_val key, data;
|
1813
1983
|
MDB_node *leaf;
|
1814
|
-
pgno_t *idl
|
1984
|
+
pgno_t *idl;
|
1815
1985
|
|
1816
1986
|
/* Seek a big enough contiguous page range. Prefer
|
1817
1987
|
* pages at the tail, just truncating the list.
|
@@ -1823,14 +1993,14 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
1823
1993
|
if (mop[i-n2] == pgno+n2)
|
1824
1994
|
goto search_done;
|
1825
1995
|
} while (--i > n2);
|
1826
|
-
if (
|
1996
|
+
if (--retry < 0)
|
1827
1997
|
break;
|
1828
1998
|
}
|
1829
1999
|
|
1830
2000
|
if (op == MDB_FIRST) { /* 1st iteration */
|
1831
2001
|
/* Prepare to fetch more and coalesce */
|
1832
|
-
oldest = mdb_find_oldest(txn);
|
1833
2002
|
last = env->me_pglast;
|
2003
|
+
oldest = env->me_pgoldest;
|
1834
2004
|
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
|
1835
2005
|
if (last) {
|
1836
2006
|
op = MDB_SET_RANGE;
|
@@ -1845,8 +2015,15 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
1845
2015
|
|
1846
2016
|
last++;
|
1847
2017
|
/* Do not fetch more if the record will be too recent */
|
1848
|
-
if (oldest <= last)
|
1849
|
-
|
2018
|
+
if (oldest <= last) {
|
2019
|
+
if (!found_old) {
|
2020
|
+
oldest = mdb_find_oldest(txn);
|
2021
|
+
env->me_pgoldest = oldest;
|
2022
|
+
found_old = 1;
|
2023
|
+
}
|
2024
|
+
if (oldest <= last)
|
2025
|
+
break;
|
2026
|
+
}
|
1850
2027
|
rc = mdb_cursor_get(&m2, &key, NULL, op);
|
1851
2028
|
if (rc) {
|
1852
2029
|
if (rc == MDB_NOTFOUND)
|
@@ -1854,8 +2031,15 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
1854
2031
|
goto fail;
|
1855
2032
|
}
|
1856
2033
|
last = *(txnid_t*)key.mv_data;
|
1857
|
-
if (oldest <= last)
|
1858
|
-
|
2034
|
+
if (oldest <= last) {
|
2035
|
+
if (!found_old) {
|
2036
|
+
oldest = mdb_find_oldest(txn);
|
2037
|
+
env->me_pgoldest = oldest;
|
2038
|
+
found_old = 1;
|
2039
|
+
}
|
2040
|
+
if (oldest <= last)
|
2041
|
+
break;
|
2042
|
+
}
|
1859
2043
|
np = m2.mc_pg[m2.mc_top];
|
1860
2044
|
leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]);
|
1861
2045
|
if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS)
|
@@ -1877,21 +2061,12 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
1877
2061
|
#if (MDB_DEBUG) > 1
|
1878
2062
|
DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u",
|
1879
2063
|
last, txn->mt_dbs[FREE_DBI].md_root, i));
|
1880
|
-
for (
|
1881
|
-
DPRINTF(("IDL %"Z"u", idl[
|
2064
|
+
for (j = i; j; j--)
|
2065
|
+
DPRINTF(("IDL %"Z"u", idl[j]));
|
1882
2066
|
#endif
|
1883
2067
|
/* Merge in descending sorted order */
|
1884
|
-
|
1885
|
-
|
1886
|
-
mop[0] = (pgno_t)-1;
|
1887
|
-
old_id = mop[j];
|
1888
|
-
while (i) {
|
1889
|
-
new_id = idl[i--];
|
1890
|
-
for (; old_id < new_id; old_id = mop[--j])
|
1891
|
-
mop[k--] = old_id;
|
1892
|
-
mop[k--] = new_id;
|
1893
|
-
}
|
1894
|
-
mop[0] = mop_len;
|
2068
|
+
mdb_midl_xmerge(mop, idl);
|
2069
|
+
mop_len = mop[0];
|
1895
2070
|
}
|
1896
2071
|
|
1897
2072
|
/* Use new pages from the map when nothing suitable in the freeDB */
|
@@ -1946,8 +2121,8 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
|
|
1946
2121
|
* alignment so memcpy may copy words instead of bytes.
|
1947
2122
|
*/
|
1948
2123
|
if ((unused &= -Align) && !IS_LEAF2(src)) {
|
1949
|
-
upper
|
1950
|
-
memcpy(dst, src, (lower + (Align-1)) & -Align);
|
2124
|
+
upper = (upper + PAGEBASE) & -Align;
|
2125
|
+
memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align);
|
1951
2126
|
memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper),
|
1952
2127
|
psize - upper);
|
1953
2128
|
} else {
|
@@ -2314,7 +2489,7 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
2314
2489
|
return MDB_BAD_RSLOT;
|
2315
2490
|
} else {
|
2316
2491
|
MDB_PID_T pid = env->me_pid;
|
2317
|
-
|
2492
|
+
MDB_THR_T tid = pthread_self();
|
2318
2493
|
|
2319
2494
|
if (!env->me_live_reader) {
|
2320
2495
|
rc = mdb_reader_pid(env, Pidset, pid);
|
@@ -2373,6 +2548,7 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
2373
2548
|
txn->mt_free_pgs[0] = 0;
|
2374
2549
|
txn->mt_spill_pgs = NULL;
|
2375
2550
|
env->me_txn = txn;
|
2551
|
+
memcpy(txn->mt_dbiseqs, env->me_dbiseqs, env->me_maxdbs * sizeof(unsigned int));
|
2376
2552
|
}
|
2377
2553
|
|
2378
2554
|
/* Copy the DB info and flags */
|
@@ -2447,23 +2623,39 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
|
|
2447
2623
|
tsize = sizeof(MDB_ntxn);
|
2448
2624
|
}
|
2449
2625
|
size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1);
|
2450
|
-
if (!(flags & MDB_RDONLY))
|
2626
|
+
if (!(flags & MDB_RDONLY)) {
|
2627
|
+
if (!parent) {
|
2628
|
+
txn = env->me_txn0;
|
2629
|
+
goto ok;
|
2630
|
+
}
|
2451
2631
|
size += env->me_maxdbs * sizeof(MDB_cursor *);
|
2632
|
+
/* child txns use parent's dbiseqs */
|
2633
|
+
if (!parent)
|
2634
|
+
size += env->me_maxdbs * sizeof(unsigned int);
|
2635
|
+
}
|
2452
2636
|
|
2453
2637
|
if ((txn = calloc(1, size)) == NULL) {
|
2454
|
-
DPRINTF(("calloc: %s", strerror(
|
2638
|
+
DPRINTF(("calloc: %s", strerror(errno)));
|
2455
2639
|
return ENOMEM;
|
2456
2640
|
}
|
2457
2641
|
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
|
2458
2642
|
if (flags & MDB_RDONLY) {
|
2459
2643
|
txn->mt_flags |= MDB_TXN_RDONLY;
|
2460
2644
|
txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs);
|
2645
|
+
txn->mt_dbiseqs = env->me_dbiseqs;
|
2461
2646
|
} else {
|
2462
2647
|
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
|
2463
|
-
|
2648
|
+
if (parent) {
|
2649
|
+
txn->mt_dbiseqs = parent->mt_dbiseqs;
|
2650
|
+
txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs);
|
2651
|
+
} else {
|
2652
|
+
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
2653
|
+
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
2654
|
+
}
|
2464
2655
|
}
|
2465
2656
|
txn->mt_env = env;
|
2466
2657
|
|
2658
|
+
ok:
|
2467
2659
|
if (parent) {
|
2468
2660
|
unsigned int i;
|
2469
2661
|
txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE);
|
@@ -2506,9 +2698,10 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
|
|
2506
2698
|
} else {
|
2507
2699
|
rc = mdb_txn_renew0(txn);
|
2508
2700
|
}
|
2509
|
-
if (rc)
|
2510
|
-
|
2511
|
-
|
2701
|
+
if (rc) {
|
2702
|
+
if (txn != env->me_txn0)
|
2703
|
+
free(txn);
|
2704
|
+
} else {
|
2512
2705
|
*ret = txn;
|
2513
2706
|
DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
|
2514
2707
|
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
|
@@ -2540,10 +2733,13 @@ mdb_dbis_update(MDB_txn *txn, int keep)
|
|
2540
2733
|
env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
|
2541
2734
|
} else {
|
2542
2735
|
char *ptr = env->me_dbxs[i].md_name.mv_data;
|
2543
|
-
|
2544
|
-
|
2545
|
-
|
2546
|
-
|
2736
|
+
if (ptr) {
|
2737
|
+
env->me_dbxs[i].md_name.mv_data = NULL;
|
2738
|
+
env->me_dbxs[i].md_name.mv_size = 0;
|
2739
|
+
env->me_dbflags[i] = 0;
|
2740
|
+
env->me_dbiseqs[i]++;
|
2741
|
+
free(ptr);
|
2742
|
+
}
|
2547
2743
|
}
|
2548
2744
|
}
|
2549
2745
|
}
|
@@ -2632,7 +2828,8 @@ mdb_txn_abort(MDB_txn *txn)
|
|
2632
2828
|
if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader)
|
2633
2829
|
txn->mt_u.reader->mr_pid = 0;
|
2634
2830
|
|
2635
|
-
|
2831
|
+
if (txn != txn->mt_env->me_txn0)
|
2832
|
+
free(txn);
|
2636
2833
|
}
|
2637
2834
|
|
2638
2835
|
/** Save the freelist as of this transaction to the freeDB.
|
@@ -2661,6 +2858,19 @@ mdb_freelist_save(MDB_txn *txn)
|
|
2661
2858
|
return rc;
|
2662
2859
|
}
|
2663
2860
|
|
2861
|
+
if (!env->me_pghead && txn->mt_loose_pgs) {
|
2862
|
+
/* Put loose page numbers in mt_free_pgs, since
|
2863
|
+
* we may be unable to return them to me_pghead.
|
2864
|
+
*/
|
2865
|
+
MDB_page *mp = txn->mt_loose_pgs;
|
2866
|
+
if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0)
|
2867
|
+
return rc;
|
2868
|
+
for (; mp; mp = NEXT_LOOSE_PAGE(mp))
|
2869
|
+
mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
|
2870
|
+
txn->mt_loose_pgs = NULL;
|
2871
|
+
txn->mt_loose_count = 0;
|
2872
|
+
}
|
2873
|
+
|
2664
2874
|
/* MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */
|
2665
2875
|
clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP))
|
2666
2876
|
? SSIZE_MAX : maxfree_1pg;
|
@@ -2722,7 +2932,7 @@ mdb_freelist_save(MDB_txn *txn)
|
|
2722
2932
|
}
|
2723
2933
|
|
2724
2934
|
mop = env->me_pghead;
|
2725
|
-
mop_len = mop ? mop[0] : 0;
|
2935
|
+
mop_len = (mop ? mop[0] : 0) + txn->mt_loose_count;
|
2726
2936
|
|
2727
2937
|
/* Reserve records for me_pghead[]. Split it if multi-page,
|
2728
2938
|
* to avoid searching freeDB for a page range. Use keys in
|
@@ -2762,6 +2972,28 @@ mdb_freelist_save(MDB_txn *txn)
|
|
2762
2972
|
total_room += head_room;
|
2763
2973
|
}
|
2764
2974
|
|
2975
|
+
/* Return loose page numbers to me_pghead, though usually none are
|
2976
|
+
* left at this point. The pages themselves remain in dirty_list.
|
2977
|
+
*/
|
2978
|
+
if (txn->mt_loose_pgs) {
|
2979
|
+
MDB_page *mp = txn->mt_loose_pgs;
|
2980
|
+
unsigned count = txn->mt_loose_count;
|
2981
|
+
MDB_IDL loose;
|
2982
|
+
/* Room for loose pages + temp IDL with same */
|
2983
|
+
if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0)
|
2984
|
+
return rc;
|
2985
|
+
mop = env->me_pghead;
|
2986
|
+
loose = mop + MDB_IDL_ALLOCLEN(mop) - count;
|
2987
|
+
for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp))
|
2988
|
+
loose[ ++count ] = mp->mp_pgno;
|
2989
|
+
loose[0] = count;
|
2990
|
+
mdb_midl_sort(loose);
|
2991
|
+
mdb_midl_xmerge(mop, loose);
|
2992
|
+
txn->mt_loose_pgs = NULL;
|
2993
|
+
txn->mt_loose_count = 0;
|
2994
|
+
mop_len = mop[0];
|
2995
|
+
}
|
2996
|
+
|
2765
2997
|
/* Fill in the reserved me_pghead records */
|
2766
2998
|
rc = MDB_SUCCESS;
|
2767
2999
|
if (mop_len) {
|
@@ -2823,8 +3055,8 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
2823
3055
|
while (++i <= pagecount) {
|
2824
3056
|
dp = dl[i].mptr;
|
2825
3057
|
/* Don't flush this page yet */
|
2826
|
-
if (dp->mp_flags & P_KEEP) {
|
2827
|
-
dp->mp_flags
|
3058
|
+
if (dp->mp_flags & (P_LOOSE|P_KEEP)) {
|
3059
|
+
dp->mp_flags &= ~P_KEEP;
|
2828
3060
|
dl[++j] = dl[i];
|
2829
3061
|
continue;
|
2830
3062
|
}
|
@@ -2838,8 +3070,8 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
2838
3070
|
if (++i <= pagecount) {
|
2839
3071
|
dp = dl[i].mptr;
|
2840
3072
|
/* Don't flush this page yet */
|
2841
|
-
if (dp->mp_flags & P_KEEP) {
|
2842
|
-
dp->mp_flags
|
3073
|
+
if (dp->mp_flags & (P_LOOSE|P_KEEP)) {
|
3074
|
+
dp->mp_flags &= ~P_KEEP;
|
2843
3075
|
dl[i].mid = 0;
|
2844
3076
|
continue;
|
2845
3077
|
}
|
@@ -2914,6 +3146,12 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
2914
3146
|
#endif /* _WIN32 */
|
2915
3147
|
}
|
2916
3148
|
|
3149
|
+
/* MIPS has cache coherency issues, this is a no-op everywhere else
|
3150
|
+
* Note: for any size >= on-chip cache size, entire on-chip cache is
|
3151
|
+
* flushed.
|
3152
|
+
*/
|
3153
|
+
CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
|
3154
|
+
|
2917
3155
|
for (i = keep; ++i <= pagecount; ) {
|
2918
3156
|
dp = dl[i].mptr;
|
2919
3157
|
/* This is a page we skipped above */
|
@@ -2968,6 +3206,7 @@ mdb_txn_commit(MDB_txn *txn)
|
|
2968
3206
|
|
2969
3207
|
if (txn->mt_parent) {
|
2970
3208
|
MDB_txn *parent = txn->mt_parent;
|
3209
|
+
MDB_page **lp;
|
2971
3210
|
MDB_ID2L dst, src;
|
2972
3211
|
MDB_IDL pspill;
|
2973
3212
|
unsigned x, y, len, ps_len;
|
@@ -3065,6 +3304,12 @@ mdb_txn_commit(MDB_txn *txn)
|
|
3065
3304
|
}
|
3066
3305
|
}
|
3067
3306
|
|
3307
|
+
/* Append our loose page list to parent's */
|
3308
|
+
for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp))
|
3309
|
+
;
|
3310
|
+
*lp = txn->mt_loose_pgs;
|
3311
|
+
parent->mt_loose_count += txn->mt_loose_count;
|
3312
|
+
|
3068
3313
|
parent->mt_child = NULL;
|
3069
3314
|
mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead);
|
3070
3315
|
free(txn);
|
@@ -3096,6 +3341,10 @@ mdb_txn_commit(MDB_txn *txn)
|
|
3096
3341
|
mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
|
3097
3342
|
for (i = 2; i < txn->mt_numdbs; i++) {
|
3098
3343
|
if (txn->mt_dbflags[i] & DB_DIRTY) {
|
3344
|
+
if (TXN_DBI_CHANGED(txn, i)) {
|
3345
|
+
rc = MDB_BAD_DBI;
|
3346
|
+
goto fail;
|
3347
|
+
}
|
3099
3348
|
data.mv_data = &txn->mt_dbs[i];
|
3100
3349
|
rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0);
|
3101
3350
|
if (rc)
|
@@ -3122,6 +3371,10 @@ mdb_txn_commit(MDB_txn *txn)
|
|
3122
3371
|
(rc = mdb_env_write_meta(txn)))
|
3123
3372
|
goto fail;
|
3124
3373
|
|
3374
|
+
/* Free P_LOOSE pages left behind in dirty_list */
|
3375
|
+
if (!(env->me_flags & MDB_WRITEMAP))
|
3376
|
+
mdb_dlist_free(txn);
|
3377
|
+
|
3125
3378
|
done:
|
3126
3379
|
env->me_pglast = 0;
|
3127
3380
|
env->me_txn = NULL;
|
@@ -3129,7 +3382,8 @@ done:
|
|
3129
3382
|
|
3130
3383
|
if (env->me_txns)
|
3131
3384
|
UNLOCK_MUTEX_W(env);
|
3132
|
-
|
3385
|
+
if (txn != env->me_txn0)
|
3386
|
+
free(txn);
|
3133
3387
|
|
3134
3388
|
return MDB_SUCCESS;
|
3135
3389
|
|
@@ -3144,7 +3398,7 @@ fail:
|
|
3144
3398
|
* @param[out] meta address of where to store the meta information
|
3145
3399
|
* @return 0 on success, non-zero on failure.
|
3146
3400
|
*/
|
3147
|
-
static int
|
3401
|
+
static int ESECT
|
3148
3402
|
mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
3149
3403
|
{
|
3150
3404
|
MDB_metabuf pbuf;
|
@@ -3202,12 +3456,26 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
|
3202
3456
|
return 0;
|
3203
3457
|
}
|
3204
3458
|
|
3459
|
+
static void ESECT
|
3460
|
+
mdb_env_init_meta0(MDB_env *env, MDB_meta *meta)
|
3461
|
+
{
|
3462
|
+
meta->mm_magic = MDB_MAGIC;
|
3463
|
+
meta->mm_version = MDB_DATA_VERSION;
|
3464
|
+
meta->mm_mapsize = env->me_mapsize;
|
3465
|
+
meta->mm_psize = env->me_psize;
|
3466
|
+
meta->mm_last_pg = 1;
|
3467
|
+
meta->mm_flags = env->me_flags & 0xffff;
|
3468
|
+
meta->mm_flags |= MDB_INTEGERKEY;
|
3469
|
+
meta->mm_dbs[0].md_root = P_INVALID;
|
3470
|
+
meta->mm_dbs[1].md_root = P_INVALID;
|
3471
|
+
}
|
3472
|
+
|
3205
3473
|
/** Write the environment parameters of a freshly created DB environment.
|
3206
3474
|
* @param[in] env the environment handle
|
3207
3475
|
* @param[out] meta address of where to store the meta information
|
3208
3476
|
* @return 0 on success, non-zero on failure.
|
3209
3477
|
*/
|
3210
|
-
static int
|
3478
|
+
static int ESECT
|
3211
3479
|
mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
3212
3480
|
{
|
3213
3481
|
MDB_page *p, *q;
|
@@ -3231,15 +3499,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
|
3231
3499
|
|
3232
3500
|
psize = env->me_psize;
|
3233
3501
|
|
3234
|
-
meta
|
3235
|
-
meta->mm_version = MDB_DATA_VERSION;
|
3236
|
-
meta->mm_mapsize = env->me_mapsize;
|
3237
|
-
meta->mm_psize = psize;
|
3238
|
-
meta->mm_last_pg = 1;
|
3239
|
-
meta->mm_flags = env->me_flags & 0xffff;
|
3240
|
-
meta->mm_flags |= MDB_INTEGERKEY;
|
3241
|
-
meta->mm_dbs[0].md_root = P_INVALID;
|
3242
|
-
meta->mm_dbs[1].md_root = P_INVALID;
|
3502
|
+
mdb_env_init_meta0(env, meta);
|
3243
3503
|
|
3244
3504
|
p = calloc(2, psize);
|
3245
3505
|
p->mp_pgno = 0;
|
@@ -3271,6 +3531,7 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
3271
3531
|
{
|
3272
3532
|
MDB_env *env;
|
3273
3533
|
MDB_meta meta, metab, *mp;
|
3534
|
+
size_t mapsize;
|
3274
3535
|
off_t off;
|
3275
3536
|
int rc, len, toggle;
|
3276
3537
|
char *ptr;
|
@@ -3287,11 +3548,13 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
3287
3548
|
|
3288
3549
|
env = txn->mt_env;
|
3289
3550
|
mp = env->me_metas[toggle];
|
3551
|
+
mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
|
3552
|
+
/* Persist any increases of mapsize config */
|
3553
|
+
if (mapsize < env->me_mapsize)
|
3554
|
+
mapsize = env->me_mapsize;
|
3290
3555
|
|
3291
3556
|
if (env->me_flags & MDB_WRITEMAP) {
|
3292
|
-
|
3293
|
-
if (env->me_mapsize > mp->mm_mapsize)
|
3294
|
-
mp->mm_mapsize = env->me_mapsize;
|
3557
|
+
mp->mm_mapsize = mapsize;
|
3295
3558
|
mp->mm_dbs[0] = txn->mt_dbs[0];
|
3296
3559
|
mp->mm_dbs[1] = txn->mt_dbs[1];
|
3297
3560
|
mp->mm_last_pg = txn->mt_next_pgno - 1;
|
@@ -3318,22 +3581,15 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
3318
3581
|
metab.mm_txnid = env->me_metas[toggle]->mm_txnid;
|
3319
3582
|
metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg;
|
3320
3583
|
|
3321
|
-
|
3322
|
-
if (env->me_mapsize > mp->mm_mapsize) {
|
3323
|
-
/* Persist any increases of mapsize config */
|
3324
|
-
meta.mm_mapsize = env->me_mapsize;
|
3325
|
-
off = offsetof(MDB_meta, mm_mapsize);
|
3326
|
-
} else {
|
3327
|
-
off = offsetof(MDB_meta, mm_dbs[0].md_depth);
|
3328
|
-
}
|
3329
|
-
len = sizeof(MDB_meta) - off;
|
3330
|
-
|
3331
|
-
ptr += off;
|
3584
|
+
meta.mm_mapsize = mapsize;
|
3332
3585
|
meta.mm_dbs[0] = txn->mt_dbs[0];
|
3333
3586
|
meta.mm_dbs[1] = txn->mt_dbs[1];
|
3334
3587
|
meta.mm_last_pg = txn->mt_next_pgno - 1;
|
3335
3588
|
meta.mm_txnid = txn->mt_txnid;
|
3336
3589
|
|
3590
|
+
off = offsetof(MDB_meta, mm_mapsize);
|
3591
|
+
ptr = (char *)&meta + off;
|
3592
|
+
len = sizeof(MDB_meta) - off;
|
3337
3593
|
if (toggle)
|
3338
3594
|
off += env->me_psize;
|
3339
3595
|
off += PAGEHDRSZ;
|
@@ -3372,6 +3628,8 @@ fail:
|
|
3372
3628
|
env->me_flags |= MDB_FATAL_ERROR;
|
3373
3629
|
return rc;
|
3374
3630
|
}
|
3631
|
+
/* MIPS has cache coherency issues, this is a no-op everywhere else */
|
3632
|
+
CACHEFLUSH(env->me_map + off, len, DCACHE);
|
3375
3633
|
done:
|
3376
3634
|
/* Memory ordering issues are irrelevant; since the entire writer
|
3377
3635
|
* is wrapped by wmutex, all of these changes will become visible
|
@@ -3395,7 +3653,7 @@ mdb_env_pick_meta(const MDB_env *env)
|
|
3395
3653
|
return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid);
|
3396
3654
|
}
|
3397
3655
|
|
3398
|
-
int
|
3656
|
+
int ESECT
|
3399
3657
|
mdb_env_create(MDB_env **env)
|
3400
3658
|
{
|
3401
3659
|
MDB_env *e;
|
@@ -3420,8 +3678,8 @@ mdb_env_create(MDB_env **env)
|
|
3420
3678
|
return MDB_SUCCESS;
|
3421
3679
|
}
|
3422
3680
|
|
3423
|
-
static int
|
3424
|
-
mdb_env_map(MDB_env *env, void *addr
|
3681
|
+
static int ESECT
|
3682
|
+
mdb_env_map(MDB_env *env, void *addr)
|
3425
3683
|
{
|
3426
3684
|
MDB_page *p;
|
3427
3685
|
unsigned int flags = env->me_flags;
|
@@ -3429,18 +3687,28 @@ mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
3429
3687
|
int rc;
|
3430
3688
|
HANDLE mh;
|
3431
3689
|
LONG sizelo, sizehi;
|
3432
|
-
|
3433
|
-
sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */
|
3690
|
+
size_t msize;
|
3434
3691
|
|
3435
|
-
|
3436
|
-
|
3437
|
-
|
3438
|
-
|
3692
|
+
if (flags & MDB_RDONLY) {
|
3693
|
+
/* Don't set explicit map size, use whatever exists */
|
3694
|
+
msize = 0;
|
3695
|
+
sizelo = 0;
|
3696
|
+
sizehi = 0;
|
3697
|
+
} else {
|
3698
|
+
msize = env->me_mapsize;
|
3699
|
+
sizelo = msize & 0xffffffff;
|
3700
|
+
sizehi = msize >> 16 >> 16; /* only needed on Win64 */
|
3701
|
+
|
3702
|
+
/* Windows won't create mappings for zero length files.
|
3703
|
+
* and won't map more than the file size.
|
3704
|
+
* Just set the maxsize right now.
|
3705
|
+
*/
|
3439
3706
|
if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
|
3440
3707
|
|| !SetEndOfFile(env->me_fd)
|
3441
3708
|
|| SetFilePointer(env->me_fd, 0, NULL, 0) != 0)
|
3442
3709
|
return ErrCode();
|
3443
3710
|
}
|
3711
|
+
|
3444
3712
|
mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
|
3445
3713
|
PAGE_READWRITE : PAGE_READONLY,
|
3446
3714
|
sizehi, sizelo, NULL);
|
@@ -3448,7 +3716,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
3448
3716
|
return ErrCode();
|
3449
3717
|
env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
|
3450
3718
|
FILE_MAP_WRITE : FILE_MAP_READ,
|
3451
|
-
0, 0,
|
3719
|
+
0, 0, msize, addr);
|
3452
3720
|
rc = env->me_map ? 0 : ErrCode();
|
3453
3721
|
CloseHandle(mh);
|
3454
3722
|
if (rc)
|
@@ -3494,7 +3762,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
3494
3762
|
return MDB_SUCCESS;
|
3495
3763
|
}
|
3496
3764
|
|
3497
|
-
int
|
3765
|
+
int ESECT
|
3498
3766
|
mdb_env_set_mapsize(MDB_env *env, size_t size)
|
3499
3767
|
{
|
3500
3768
|
/* If env is already open, caller is responsible for making
|
@@ -3518,7 +3786,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
|
|
3518
3786
|
munmap(env->me_map, env->me_mapsize);
|
3519
3787
|
env->me_mapsize = size;
|
3520
3788
|
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
|
3521
|
-
rc = mdb_env_map(env, old
|
3789
|
+
rc = mdb_env_map(env, old);
|
3522
3790
|
if (rc)
|
3523
3791
|
return rc;
|
3524
3792
|
}
|
@@ -3528,7 +3796,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
|
|
3528
3796
|
return MDB_SUCCESS;
|
3529
3797
|
}
|
3530
3798
|
|
3531
|
-
int
|
3799
|
+
int ESECT
|
3532
3800
|
mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
|
3533
3801
|
{
|
3534
3802
|
if (env->me_map)
|
@@ -3537,7 +3805,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
|
|
3537
3805
|
return MDB_SUCCESS;
|
3538
3806
|
}
|
3539
3807
|
|
3540
|
-
int
|
3808
|
+
int ESECT
|
3541
3809
|
mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
|
3542
3810
|
{
|
3543
3811
|
if (env->me_map || readers < 1)
|
@@ -3546,7 +3814,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
|
|
3546
3814
|
return MDB_SUCCESS;
|
3547
3815
|
}
|
3548
3816
|
|
3549
|
-
int
|
3817
|
+
int ESECT
|
3550
3818
|
mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
|
3551
3819
|
{
|
3552
3820
|
if (!env || !readers)
|
@@ -3555,9 +3823,9 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
|
|
3555
3823
|
return MDB_SUCCESS;
|
3556
3824
|
}
|
3557
3825
|
|
3558
|
-
/** Further setup required for opening an
|
3826
|
+
/** Further setup required for opening an LMDB environment
|
3559
3827
|
*/
|
3560
|
-
static int
|
3828
|
+
static int ESECT
|
3561
3829
|
mdb_env_open2(MDB_env *env)
|
3562
3830
|
{
|
3563
3831
|
unsigned int flags = env->me_flags;
|
@@ -3602,7 +3870,7 @@ mdb_env_open2(MDB_env *env)
|
|
3602
3870
|
env->me_mapsize = minsize;
|
3603
3871
|
}
|
3604
3872
|
|
3605
|
-
rc = mdb_env_map(env,
|
3873
|
+
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL);
|
3606
3874
|
if (rc)
|
3607
3875
|
return rc;
|
3608
3876
|
|
@@ -3714,7 +3982,7 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback;
|
|
3714
3982
|
#endif
|
3715
3983
|
|
3716
3984
|
/** Downgrade the exclusive lock on the region back to shared */
|
3717
|
-
static int
|
3985
|
+
static int ESECT
|
3718
3986
|
mdb_env_share_locks(MDB_env *env, int *excl)
|
3719
3987
|
{
|
3720
3988
|
int rc = 0, toggle = mdb_env_pick_meta(env);
|
@@ -3756,7 +4024,7 @@ mdb_env_share_locks(MDB_env *env, int *excl)
|
|
3756
4024
|
/** Try to get exlusive lock, otherwise shared.
|
3757
4025
|
* Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive.
|
3758
4026
|
*/
|
3759
|
-
static int
|
4027
|
+
static int ESECT
|
3760
4028
|
mdb_env_excl_lock(MDB_env *env, int *excl)
|
3761
4029
|
{
|
3762
4030
|
int rc = 0;
|
@@ -3891,14 +4159,14 @@ mdb_hash_enc(MDB_val *val, char *encbuf)
|
|
3891
4159
|
#endif
|
3892
4160
|
|
3893
4161
|
/** Open and/or initialize the lock region for the environment.
|
3894
|
-
* @param[in] env The
|
4162
|
+
* @param[in] env The LMDB environment.
|
3895
4163
|
* @param[in] lpath The pathname of the file used for the lock region.
|
3896
4164
|
* @param[in] mode The Unix permissions for the file, if we create it.
|
3897
4165
|
* @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive
|
3898
4166
|
* @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive
|
3899
4167
|
* @return 0 on success, non-zero on failure.
|
3900
4168
|
*/
|
3901
|
-
static int
|
4169
|
+
static int ESECT
|
3902
4170
|
mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
|
3903
4171
|
{
|
3904
4172
|
#ifdef _WIN32
|
@@ -4128,7 +4396,7 @@ fail:
|
|
4128
4396
|
# error "Persistent DB flags & env flags overlap, but both go in mm_flags"
|
4129
4397
|
#endif
|
4130
4398
|
|
4131
|
-
int
|
4399
|
+
int ESECT
|
4132
4400
|
mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
|
4133
4401
|
{
|
4134
4402
|
int oflags, rc, len, excl = -1;
|
@@ -4173,7 +4441,8 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
4173
4441
|
env->me_path = strdup(path);
|
4174
4442
|
env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
|
4175
4443
|
env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t));
|
4176
|
-
|
4444
|
+
env->me_dbiseqs = calloc(env->me_maxdbs, sizeof(unsigned int));
|
4445
|
+
if (!(env->me_dbxs && env->me_path && env->me_dbflags && env->me_dbiseqs)) {
|
4177
4446
|
rc = ENOMEM;
|
4178
4447
|
goto leave;
|
4179
4448
|
}
|
@@ -4245,6 +4514,22 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
4245
4514
|
if (!((flags & MDB_RDONLY) ||
|
4246
4515
|
(env->me_pbuf = calloc(1, env->me_psize))))
|
4247
4516
|
rc = ENOMEM;
|
4517
|
+
if (!(flags & MDB_RDONLY)) {
|
4518
|
+
MDB_txn *txn;
|
4519
|
+
int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs *
|
4520
|
+
(sizeof(MDB_db)+sizeof(MDB_cursor)+sizeof(unsigned int)+1);
|
4521
|
+
txn = calloc(1, size);
|
4522
|
+
if (txn) {
|
4523
|
+
txn->mt_dbs = (MDB_db *)((char *)txn + tsize);
|
4524
|
+
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
|
4525
|
+
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
4526
|
+
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
4527
|
+
txn->mt_env = env;
|
4528
|
+
env->me_txn0 = txn;
|
4529
|
+
} else {
|
4530
|
+
rc = ENOMEM;
|
4531
|
+
}
|
4532
|
+
}
|
4248
4533
|
}
|
4249
4534
|
|
4250
4535
|
leave:
|
@@ -4256,7 +4541,7 @@ leave:
|
|
4256
4541
|
}
|
4257
4542
|
|
4258
4543
|
/** Destroy resources from mdb_env_open(), clear our readers & DBIs */
|
4259
|
-
static void
|
4544
|
+
static void ESECT
|
4260
4545
|
mdb_env_close0(MDB_env *env, int excl)
|
4261
4546
|
{
|
4262
4547
|
int i;
|
@@ -4269,6 +4554,7 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
4269
4554
|
free(env->me_dbxs[i].md_name.mv_data);
|
4270
4555
|
|
4271
4556
|
free(env->me_pbuf);
|
4557
|
+
free(env->me_dbiseqs);
|
4272
4558
|
free(env->me_dbflags);
|
4273
4559
|
free(env->me_dbxs);
|
4274
4560
|
free(env->me_path);
|
@@ -4344,186 +4630,41 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
4344
4630
|
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
|
4345
4631
|
}
|
4346
4632
|
|
4347
|
-
int
|
4348
|
-
mdb_env_copyfd(MDB_env *env, HANDLE fd)
|
4349
|
-
{
|
4350
|
-
MDB_txn *txn = NULL;
|
4351
|
-
int rc;
|
4352
|
-
size_t wsize;
|
4353
|
-
char *ptr;
|
4354
|
-
#ifdef _WIN32
|
4355
|
-
DWORD len, w2;
|
4356
|
-
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
4357
|
-
#else
|
4358
|
-
ssize_t len;
|
4359
|
-
size_t w2;
|
4360
|
-
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
4361
|
-
#endif
|
4362
|
-
|
4363
|
-
/* Do the lock/unlock of the reader mutex before starting the
|
4364
|
-
* write txn. Otherwise other read txns could block writers.
|
4365
|
-
*/
|
4366
|
-
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
4367
|
-
if (rc)
|
4368
|
-
return rc;
|
4369
|
-
|
4370
|
-
if (env->me_txns) {
|
4371
|
-
/* We must start the actual read txn after blocking writers */
|
4372
|
-
mdb_txn_reset0(txn, "reset-stage1");
|
4373
4633
|
|
4374
|
-
|
4375
|
-
|
4634
|
+
void ESECT
|
4635
|
+
mdb_env_close(MDB_env *env)
|
4636
|
+
{
|
4637
|
+
MDB_page *dp;
|
4376
4638
|
|
4377
|
-
|
4378
|
-
|
4379
|
-
UNLOCK_MUTEX_W(env);
|
4380
|
-
goto leave;
|
4381
|
-
}
|
4382
|
-
}
|
4639
|
+
if (env == NULL)
|
4640
|
+
return;
|
4383
4641
|
|
4384
|
-
|
4385
|
-
|
4386
|
-
|
4387
|
-
|
4388
|
-
|
4389
|
-
if (!rc) {
|
4390
|
-
rc = ErrCode();
|
4391
|
-
break;
|
4392
|
-
} else if (len > 0) {
|
4393
|
-
rc = MDB_SUCCESS;
|
4394
|
-
ptr += len;
|
4395
|
-
w2 -= len;
|
4396
|
-
continue;
|
4397
|
-
} else {
|
4398
|
-
/* Non-blocking or async handles are not supported */
|
4399
|
-
rc = EIO;
|
4400
|
-
break;
|
4401
|
-
}
|
4642
|
+
VGMEMP_DESTROY(env);
|
4643
|
+
while ((dp = env->me_dpages) != NULL) {
|
4644
|
+
VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
|
4645
|
+
env->me_dpages = dp->mp_next;
|
4646
|
+
free(dp);
|
4402
4647
|
}
|
4403
|
-
if (env->me_txns)
|
4404
|
-
UNLOCK_MUTEX_W(env);
|
4405
|
-
|
4406
|
-
if (rc)
|
4407
|
-
goto leave;
|
4408
4648
|
|
4409
|
-
|
4410
|
-
|
4411
|
-
|
4412
|
-
w2 = MAX_WRITE;
|
4413
|
-
else
|
4414
|
-
w2 = wsize;
|
4415
|
-
DO_WRITE(rc, fd, ptr, w2, len);
|
4416
|
-
if (!rc) {
|
4417
|
-
rc = ErrCode();
|
4418
|
-
break;
|
4419
|
-
} else if (len > 0) {
|
4420
|
-
rc = MDB_SUCCESS;
|
4421
|
-
ptr += len;
|
4422
|
-
wsize -= len;
|
4423
|
-
continue;
|
4424
|
-
} else {
|
4425
|
-
rc = EIO;
|
4426
|
-
break;
|
4427
|
-
}
|
4428
|
-
}
|
4649
|
+
mdb_env_close0(env, 0);
|
4650
|
+
free(env);
|
4651
|
+
}
|
4429
4652
|
|
4430
|
-
|
4431
|
-
|
4432
|
-
|
4653
|
+
/** Compare two items pointing at aligned size_t's */
|
4654
|
+
static int
|
4655
|
+
mdb_cmp_long(const MDB_val *a, const MDB_val *b)
|
4656
|
+
{
|
4657
|
+
return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
|
4658
|
+
*(size_t *)a->mv_data > *(size_t *)b->mv_data;
|
4433
4659
|
}
|
4434
4660
|
|
4435
|
-
int
|
4436
|
-
|
4661
|
+
/** Compare two items pointing at aligned unsigned int's */
|
4662
|
+
static int
|
4663
|
+
mdb_cmp_int(const MDB_val *a, const MDB_val *b)
|
4437
4664
|
{
|
4438
|
-
int
|
4439
|
-
|
4440
|
-
|
4441
|
-
|
4442
|
-
if (env->me_flags & MDB_NOSUBDIR) {
|
4443
|
-
lpath = (char *)path;
|
4444
|
-
} else {
|
4445
|
-
len = strlen(path);
|
4446
|
-
len += sizeof(DATANAME);
|
4447
|
-
lpath = malloc(len);
|
4448
|
-
if (!lpath)
|
4449
|
-
return ENOMEM;
|
4450
|
-
sprintf(lpath, "%s" DATANAME, path);
|
4451
|
-
}
|
4452
|
-
|
4453
|
-
/* The destination path must exist, but the destination file must not.
|
4454
|
-
* We don't want the OS to cache the writes, since the source data is
|
4455
|
-
* already in the OS cache.
|
4456
|
-
*/
|
4457
|
-
#ifdef _WIN32
|
4458
|
-
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
|
4459
|
-
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
|
4460
|
-
#else
|
4461
|
-
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
|
4462
|
-
#endif
|
4463
|
-
if (newfd == INVALID_HANDLE_VALUE) {
|
4464
|
-
rc = ErrCode();
|
4465
|
-
goto leave;
|
4466
|
-
}
|
4467
|
-
|
4468
|
-
#ifdef O_DIRECT
|
4469
|
-
/* Set O_DIRECT if the file system supports it */
|
4470
|
-
if ((rc = fcntl(newfd, F_GETFL)) != -1)
|
4471
|
-
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
|
4472
|
-
#endif
|
4473
|
-
#ifdef F_NOCACHE /* __APPLE__ */
|
4474
|
-
rc = fcntl(newfd, F_NOCACHE, 1);
|
4475
|
-
if (rc) {
|
4476
|
-
rc = ErrCode();
|
4477
|
-
goto leave;
|
4478
|
-
}
|
4479
|
-
#endif
|
4480
|
-
|
4481
|
-
rc = mdb_env_copyfd(env, newfd);
|
4482
|
-
|
4483
|
-
leave:
|
4484
|
-
if (!(env->me_flags & MDB_NOSUBDIR))
|
4485
|
-
free(lpath);
|
4486
|
-
if (newfd != INVALID_HANDLE_VALUE)
|
4487
|
-
if (close(newfd) < 0 && rc == MDB_SUCCESS)
|
4488
|
-
rc = ErrCode();
|
4489
|
-
|
4490
|
-
return rc;
|
4491
|
-
}
|
4492
|
-
|
4493
|
-
void
|
4494
|
-
mdb_env_close(MDB_env *env)
|
4495
|
-
{
|
4496
|
-
MDB_page *dp;
|
4497
|
-
|
4498
|
-
if (env == NULL)
|
4499
|
-
return;
|
4500
|
-
|
4501
|
-
VGMEMP_DESTROY(env);
|
4502
|
-
while ((dp = env->me_dpages) != NULL) {
|
4503
|
-
VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
|
4504
|
-
env->me_dpages = dp->mp_next;
|
4505
|
-
free(dp);
|
4506
|
-
}
|
4507
|
-
|
4508
|
-
mdb_env_close0(env, 0);
|
4509
|
-
free(env);
|
4510
|
-
}
|
4511
|
-
|
4512
|
-
/** Compare two items pointing at aligned size_t's */
|
4513
|
-
static int
|
4514
|
-
mdb_cmp_long(const MDB_val *a, const MDB_val *b)
|
4515
|
-
{
|
4516
|
-
return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
|
4517
|
-
*(size_t *)a->mv_data > *(size_t *)b->mv_data;
|
4518
|
-
}
|
4519
|
-
|
4520
|
-
/** Compare two items pointing at aligned unsigned int's */
|
4521
|
-
static int
|
4522
|
-
mdb_cmp_int(const MDB_val *a, const MDB_val *b)
|
4523
|
-
{
|
4524
|
-
return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 :
|
4525
|
-
*(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data;
|
4526
|
-
}
|
4665
|
+
return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 :
|
4666
|
+
*(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data;
|
4667
|
+
}
|
4527
4668
|
|
4528
4669
|
/** Compare two items pointing at unsigned ints of unknown alignment.
|
4529
4670
|
* Nodes and keys are guaranteed to be 2-byte aligned.
|
@@ -4542,7 +4683,16 @@ mdb_cmp_cint(const MDB_val *a, const MDB_val *b)
|
|
4542
4683
|
} while(!x && u > (unsigned short *)a->mv_data);
|
4543
4684
|
return x;
|
4544
4685
|
#else
|
4545
|
-
|
4686
|
+
unsigned short *u, *c, *end;
|
4687
|
+
int x;
|
4688
|
+
|
4689
|
+
end = (unsigned short *) ((char *) a->mv_data + a->mv_size);
|
4690
|
+
u = (unsigned short *)a->mv_data;
|
4691
|
+
c = (unsigned short *)b->mv_data;
|
4692
|
+
do {
|
4693
|
+
x = *u++ - *c++;
|
4694
|
+
} while(!x && u < end);
|
4695
|
+
return x;
|
4546
4696
|
#endif
|
4547
4697
|
}
|
4548
4698
|
|
@@ -4924,6 +5074,8 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
|
|
4924
5074
|
/* Make sure we're using an up-to-date root */
|
4925
5075
|
if (*mc->mc_dbflag & DB_STALE) {
|
4926
5076
|
MDB_cursor mc2;
|
5077
|
+
if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi))
|
5078
|
+
return MDB_BAD_DBI;
|
4927
5079
|
mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL);
|
4928
5080
|
rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 0);
|
4929
5081
|
if (rc)
|
@@ -5264,8 +5416,10 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
5264
5416
|
if (op == MDB_PREV || op == MDB_PREV_DUP) {
|
5265
5417
|
rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV);
|
5266
5418
|
if (op != MDB_PREV || rc != MDB_NOTFOUND) {
|
5267
|
-
if (rc == MDB_SUCCESS)
|
5419
|
+
if (rc == MDB_SUCCESS) {
|
5268
5420
|
MDB_GET_KEY(leaf, key);
|
5421
|
+
mc->mc_flags &= ~C_EOF;
|
5422
|
+
}
|
5269
5423
|
return rc;
|
5270
5424
|
}
|
5271
5425
|
} else {
|
@@ -5457,8 +5611,10 @@ set1:
|
|
5457
5611
|
mc->mc_flags &= ~C_EOF;
|
5458
5612
|
|
5459
5613
|
if (IS_LEAF2(mp)) {
|
5460
|
-
|
5461
|
-
|
5614
|
+
if (op == MDB_SET_RANGE || op == MDB_SET_KEY) {
|
5615
|
+
key->mv_size = mc->mc_db->md_pad;
|
5616
|
+
key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
|
5617
|
+
}
|
5462
5618
|
return MDB_SUCCESS;
|
5463
5619
|
}
|
5464
5620
|
|
@@ -5740,6 +5896,14 @@ fetchm:
|
|
5740
5896
|
rc = MDB_INCOMPATIBLE;
|
5741
5897
|
break;
|
5742
5898
|
}
|
5899
|
+
{
|
5900
|
+
MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
5901
|
+
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
5902
|
+
MDB_GET_KEY(leaf, key);
|
5903
|
+
rc = mdb_node_read(mc->mc_txn, leaf, data);
|
5904
|
+
break;
|
5905
|
+
}
|
5906
|
+
}
|
5743
5907
|
if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) {
|
5744
5908
|
rc = EINVAL;
|
5745
5909
|
break;
|
@@ -5776,6 +5940,8 @@ mdb_cursor_touch(MDB_cursor *mc)
|
|
5776
5940
|
if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) {
|
5777
5941
|
MDB_cursor mc2;
|
5778
5942
|
MDB_xcursor mcx;
|
5943
|
+
if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi))
|
5944
|
+
return MDB_BAD_DBI;
|
5779
5945
|
mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx);
|
5780
5946
|
rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY);
|
5781
5947
|
if (rc)
|
@@ -5932,22 +6098,42 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
5932
6098
|
if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
|
5933
6099
|
LEAFSIZE(key, data) > env->me_nodemax)
|
5934
6100
|
{
|
5935
|
-
/* Too big for a node, insert in sub-DB
|
6101
|
+
/* Too big for a node, insert in sub-DB. Set up an empty
|
6102
|
+
* "old sub-page" for prep_subDB to expand to a full page.
|
6103
|
+
*/
|
5936
6104
|
fp_flags = P_LEAF|P_DIRTY;
|
5937
6105
|
fp = env->me_pbuf;
|
5938
6106
|
fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
|
5939
|
-
fp->mp_lower = fp->mp_upper =
|
6107
|
+
fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE);
|
6108
|
+
olddata.mv_size = PAGEHDRSZ;
|
5940
6109
|
goto prep_subDB;
|
5941
6110
|
}
|
5942
6111
|
} else {
|
5943
6112
|
/* there's only a key anyway, so this is a no-op */
|
5944
6113
|
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
|
6114
|
+
char *ptr;
|
5945
6115
|
unsigned int ksize = mc->mc_db->md_pad;
|
5946
6116
|
if (key->mv_size != ksize)
|
5947
6117
|
return MDB_BAD_VALSIZE;
|
5948
|
-
|
5949
|
-
|
5950
|
-
|
6118
|
+
ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
|
6119
|
+
memcpy(ptr, key->mv_data, ksize);
|
6120
|
+
fix_parent:
|
6121
|
+
/* if overwriting slot 0 of leaf, need to
|
6122
|
+
* update branch key if there is a parent page
|
6123
|
+
*/
|
6124
|
+
if (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
|
6125
|
+
unsigned short top = mc->mc_top;
|
6126
|
+
mc->mc_top--;
|
6127
|
+
/* slot 0 is always an empty key, find real slot */
|
6128
|
+
while (mc->mc_top && !mc->mc_ki[mc->mc_top])
|
6129
|
+
mc->mc_top--;
|
6130
|
+
if (mc->mc_ki[mc->mc_top])
|
6131
|
+
rc2 = mdb_update_key(mc, key);
|
6132
|
+
else
|
6133
|
+
rc2 = MDB_SUCCESS;
|
6134
|
+
mc->mc_top = top;
|
6135
|
+
if (rc2)
|
6136
|
+
return rc2;
|
5951
6137
|
}
|
5952
6138
|
return MDB_SUCCESS;
|
5953
6139
|
}
|
@@ -5978,12 +6164,12 @@ more:
|
|
5978
6164
|
if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
|
5979
6165
|
mc->mc_dbx->md_dcmp = mdb_cmp_clong;
|
5980
6166
|
#endif
|
5981
|
-
/*
|
6167
|
+
/* does data match? */
|
5982
6168
|
if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
|
5983
6169
|
if (flags & MDB_NODUPDATA)
|
5984
6170
|
return MDB_KEYEXIST;
|
5985
|
-
|
5986
|
-
goto
|
6171
|
+
/* overwrite it */
|
6172
|
+
goto current;
|
5987
6173
|
}
|
5988
6174
|
|
5989
6175
|
/* Back up original data item */
|
@@ -5992,7 +6178,7 @@ more:
|
|
5992
6178
|
|
5993
6179
|
/* Make sub-page header for the dup items, with dummy body */
|
5994
6180
|
fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
|
5995
|
-
fp->mp_lower = PAGEHDRSZ;
|
6181
|
+
fp->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
5996
6182
|
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
|
5997
6183
|
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
5998
6184
|
fp->mp_flags |= P_LEAF2;
|
@@ -6002,8 +6188,8 @@ more:
|
|
6002
6188
|
xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) +
|
6003
6189
|
(dkey.mv_size & 1) + (data->mv_size & 1);
|
6004
6190
|
}
|
6005
|
-
fp->mp_upper = xdata.mv_size;
|
6006
|
-
olddata.mv_size =
|
6191
|
+
fp->mp_upper = xdata.mv_size - PAGEBASE;
|
6192
|
+
olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */
|
6007
6193
|
} else if (leaf->mn_flags & F_SUBDATA) {
|
6008
6194
|
/* Data is on sub-DB, just store it */
|
6009
6195
|
flags |= F_DUPDATA|F_SUBDATA;
|
@@ -6070,8 +6256,8 @@ prep_subDB:
|
|
6070
6256
|
if (fp_flags & P_LEAF2) {
|
6071
6257
|
memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
|
6072
6258
|
} else {
|
6073
|
-
memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper,
|
6074
|
-
olddata.mv_size - fp->mp_upper);
|
6259
|
+
memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE,
|
6260
|
+
olddata.mv_size - fp->mp_upper - PAGEBASE);
|
6075
6261
|
for (i=0; i<NUMKEYS(fp); i++)
|
6076
6262
|
mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset;
|
6077
6263
|
}
|
@@ -6154,8 +6340,10 @@ current:
|
|
6154
6340
|
data->mv_data = olddata.mv_data;
|
6155
6341
|
else if (!(mc->mc_flags & C_SUB))
|
6156
6342
|
memcpy(olddata.mv_data, data->mv_data, data->mv_size);
|
6157
|
-
else
|
6343
|
+
else {
|
6158
6344
|
memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
|
6345
|
+
goto fix_parent;
|
6346
|
+
}
|
6159
6347
|
return MDB_SUCCESS;
|
6160
6348
|
}
|
6161
6349
|
mdb_node_del(mc, 0);
|
@@ -6259,7 +6447,6 @@ put_sub:
|
|
6259
6447
|
*/
|
6260
6448
|
mc->mc_flags |= C_INITIALIZED;
|
6261
6449
|
}
|
6262
|
-
next_sub:
|
6263
6450
|
if (flags & MDB_MULTIPLE) {
|
6264
6451
|
if (!rc) {
|
6265
6452
|
mcount++;
|
@@ -6393,8 +6580,8 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp)
|
|
6393
6580
|
DPRINTF(("allocated new mpage %"Z"u, page size %u",
|
6394
6581
|
np->mp_pgno, mc->mc_txn->mt_env->me_psize));
|
6395
6582
|
np->mp_flags = flags | P_DIRTY;
|
6396
|
-
np->mp_lower = PAGEHDRSZ;
|
6397
|
-
np->mp_upper = mc->mc_txn->mt_env->me_psize;
|
6583
|
+
np->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
6584
|
+
np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE;
|
6398
6585
|
|
6399
6586
|
if (IS_BRANCH(np))
|
6400
6587
|
mc->mc_db->md_branch_pages++;
|
@@ -6647,7 +6834,7 @@ mdb_node_del(MDB_cursor *mc, int ksize)
|
|
6647
6834
|
}
|
6648
6835
|
}
|
6649
6836
|
|
6650
|
-
base = (char *)mp + mp->mp_upper;
|
6837
|
+
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
6651
6838
|
memmove(base + sz, base, ptr - mp->mp_upper);
|
6652
6839
|
|
6653
6840
|
mp->mp_lower -= sizeof(indx_t);
|
@@ -6701,7 +6888,7 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
|
|
6701
6888
|
mp->mp_ptrs[i] += delta;
|
6702
6889
|
}
|
6703
6890
|
|
6704
|
-
base = (char *)mp + mp->mp_upper;
|
6891
|
+
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
6705
6892
|
memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node));
|
6706
6893
|
mp->mp_upper += delta;
|
6707
6894
|
}
|
@@ -6877,6 +7064,12 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp)
|
|
6877
7064
|
if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
|
6878
7065
|
return MDB_BAD_TXN;
|
6879
7066
|
|
7067
|
+
if (!(mc->mc_flags & C_INITIALIZED))
|
7068
|
+
return EINVAL;
|
7069
|
+
|
7070
|
+
if (!mc->mc_snum || (mc->mc_flags & C_EOF))
|
7071
|
+
return MDB_NOTFOUND;
|
7072
|
+
|
6880
7073
|
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
6881
7074
|
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
6882
7075
|
*countp = 1;
|
@@ -6973,7 +7166,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
|
|
6973
7166
|
mp->mp_ptrs[i] -= delta;
|
6974
7167
|
}
|
6975
7168
|
|
6976
|
-
base = (char *)mp + mp->mp_upper;
|
7169
|
+
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
6977
7170
|
len = ptr - mp->mp_upper + NODESIZE;
|
6978
7171
|
memmove(base - delta, base, len);
|
6979
7172
|
mp->mp_upper -= delta;
|
@@ -7054,20 +7247,20 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7054
7247
|
MDB_node *s2;
|
7055
7248
|
MDB_val bkey;
|
7056
7249
|
/* must find the lowest key below dst */
|
7057
|
-
|
7250
|
+
mdb_cursor_copy(cdst, &mn);
|
7251
|
+
rc = mdb_page_search_lowest(&mn);
|
7058
7252
|
if (rc)
|
7059
7253
|
return rc;
|
7060
|
-
if (IS_LEAF2(
|
7061
|
-
bkey.mv_size =
|
7062
|
-
bkey.mv_data = LEAF2KEY(
|
7254
|
+
if (IS_LEAF2(mn.mc_pg[mn.mc_top])) {
|
7255
|
+
bkey.mv_size = mn.mc_db->md_pad;
|
7256
|
+
bkey.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, bkey.mv_size);
|
7063
7257
|
} else {
|
7064
|
-
s2 = NODEPTR(
|
7258
|
+
s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0);
|
7065
7259
|
bkey.mv_size = NODEKSZ(s2);
|
7066
7260
|
bkey.mv_data = NODEKEY(s2);
|
7067
7261
|
}
|
7068
|
-
|
7069
|
-
|
7070
|
-
mdb_cursor_copy(cdst, &mn);
|
7262
|
+
mn.mc_snum = snum--;
|
7263
|
+
mn.mc_top = snum;
|
7071
7264
|
mn.mc_ki[snum] = 0;
|
7072
7265
|
rc = mdb_update_key(&mn, &bkey);
|
7073
7266
|
if (rc)
|
@@ -7183,14 +7376,17 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7183
7376
|
static int
|
7184
7377
|
mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
7185
7378
|
{
|
7186
|
-
|
7187
|
-
|
7188
|
-
MDB_node *srcnode;
|
7379
|
+
MDB_page *psrc, *pdst;
|
7380
|
+
MDB_node *srcnode;
|
7189
7381
|
MDB_val key, data;
|
7190
|
-
unsigned
|
7382
|
+
unsigned nkeys;
|
7383
|
+
int rc;
|
7384
|
+
indx_t i, j;
|
7191
7385
|
|
7192
|
-
|
7193
|
-
|
7386
|
+
psrc = csrc->mc_pg[csrc->mc_top];
|
7387
|
+
pdst = cdst->mc_pg[cdst->mc_top];
|
7388
|
+
|
7389
|
+
DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno));
|
7194
7390
|
|
7195
7391
|
mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
|
7196
7392
|
mdb_cassert(csrc, cdst->mc_snum > 1);
|
@@ -7201,36 +7397,35 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7201
7397
|
|
7202
7398
|
/* Move all nodes from src to dst.
|
7203
7399
|
*/
|
7204
|
-
j = nkeys = NUMKEYS(
|
7205
|
-
if (IS_LEAF2(
|
7400
|
+
j = nkeys = NUMKEYS(pdst);
|
7401
|
+
if (IS_LEAF2(psrc)) {
|
7206
7402
|
key.mv_size = csrc->mc_db->md_pad;
|
7207
|
-
key.mv_data = METADATA(
|
7208
|
-
for (i = 0; i < NUMKEYS(
|
7403
|
+
key.mv_data = METADATA(psrc);
|
7404
|
+
for (i = 0; i < NUMKEYS(psrc); i++, j++) {
|
7209
7405
|
rc = mdb_node_add(cdst, j, &key, NULL, 0, 0);
|
7210
7406
|
if (rc != MDB_SUCCESS)
|
7211
7407
|
return rc;
|
7212
7408
|
key.mv_data = (char *)key.mv_data + key.mv_size;
|
7213
7409
|
}
|
7214
7410
|
} else {
|
7215
|
-
for (i = 0; i < NUMKEYS(
|
7216
|
-
srcnode = NODEPTR(
|
7217
|
-
if (i == 0 && IS_BRANCH(
|
7218
|
-
|
7411
|
+
for (i = 0; i < NUMKEYS(psrc); i++, j++) {
|
7412
|
+
srcnode = NODEPTR(psrc, i);
|
7413
|
+
if (i == 0 && IS_BRANCH(psrc)) {
|
7414
|
+
MDB_cursor mn;
|
7219
7415
|
MDB_node *s2;
|
7416
|
+
mdb_cursor_copy(csrc, &mn);
|
7220
7417
|
/* must find the lowest key below src */
|
7221
|
-
rc = mdb_page_search_lowest(
|
7418
|
+
rc = mdb_page_search_lowest(&mn);
|
7222
7419
|
if (rc)
|
7223
7420
|
return rc;
|
7224
|
-
if (IS_LEAF2(
|
7225
|
-
key.mv_size =
|
7226
|
-
key.mv_data = LEAF2KEY(
|
7421
|
+
if (IS_LEAF2(mn.mc_pg[mn.mc_top])) {
|
7422
|
+
key.mv_size = mn.mc_db->md_pad;
|
7423
|
+
key.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, key.mv_size);
|
7227
7424
|
} else {
|
7228
|
-
s2 = NODEPTR(
|
7425
|
+
s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0);
|
7229
7426
|
key.mv_size = NODEKSZ(s2);
|
7230
7427
|
key.mv_data = NODEKEY(s2);
|
7231
7428
|
}
|
7232
|
-
csrc->mc_snum = snum--;
|
7233
|
-
csrc->mc_top = snum;
|
7234
7429
|
} else {
|
7235
7430
|
key.mv_size = srcnode->mn_ksize;
|
7236
7431
|
key.mv_data = NODEKEY(srcnode);
|
@@ -7245,8 +7440,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7245
7440
|
}
|
7246
7441
|
|
7247
7442
|
DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)",
|
7248
|
-
|
7249
|
-
(float)PAGEFILL(cdst->mc_txn->mt_env,
|
7443
|
+
pdst->mp_pgno, NUMKEYS(pdst),
|
7444
|
+
(float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10));
|
7250
7445
|
|
7251
7446
|
/* Unlink the src page from parent and add to free list.
|
7252
7447
|
*/
|
@@ -7262,11 +7457,14 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7262
7457
|
}
|
7263
7458
|
csrc->mc_top++;
|
7264
7459
|
|
7265
|
-
|
7266
|
-
|
7460
|
+
psrc = csrc->mc_pg[csrc->mc_top];
|
7461
|
+
/* If not operating on FreeDB, allow this page to be reused
|
7462
|
+
* in this txn. Otherwise just add to free list.
|
7463
|
+
*/
|
7464
|
+
rc = mdb_page_loose(csrc, psrc);
|
7267
7465
|
if (rc)
|
7268
7466
|
return rc;
|
7269
|
-
if (IS_LEAF(
|
7467
|
+
if (IS_LEAF(psrc))
|
7270
7468
|
csrc->mc_db->md_leaf_pages--;
|
7271
7469
|
else
|
7272
7470
|
csrc->mc_db->md_branch_pages--;
|
@@ -7274,7 +7472,6 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7274
7472
|
/* Adjust other cursors pointing to mp */
|
7275
7473
|
MDB_cursor *m2, *m3;
|
7276
7474
|
MDB_dbi dbi = csrc->mc_dbi;
|
7277
|
-
MDB_page *mp = cdst->mc_pg[cdst->mc_top];
|
7278
7475
|
|
7279
7476
|
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
|
7280
7477
|
if (csrc->mc_flags & C_SUB)
|
@@ -7283,8 +7480,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7283
7480
|
m3 = m2;
|
7284
7481
|
if (m3 == csrc) continue;
|
7285
7482
|
if (m3->mc_snum < csrc->mc_snum) continue;
|
7286
|
-
if (m3->mc_pg[csrc->mc_top] ==
|
7287
|
-
m3->mc_pg[csrc->mc_top] =
|
7483
|
+
if (m3->mc_pg[csrc->mc_top] == psrc) {
|
7484
|
+
m3->mc_pg[csrc->mc_top] = pdst;
|
7288
7485
|
m3->mc_ki[csrc->mc_top] += nkeys;
|
7289
7486
|
}
|
7290
7487
|
}
|
@@ -7525,8 +7722,10 @@ mdb_cursor_del0(MDB_cursor *mc)
|
|
7525
7722
|
/* if mc points past last node in page, find next sibling */
|
7526
7723
|
if (mc->mc_ki[mc->mc_top] >= nkeys) {
|
7527
7724
|
rc = mdb_cursor_sibling(mc, 1);
|
7528
|
-
if (rc == MDB_NOTFOUND)
|
7725
|
+
if (rc == MDB_NOTFOUND) {
|
7726
|
+
mc->mc_flags |= C_EOF;
|
7529
7727
|
rc = MDB_SUCCESS;
|
7728
|
+
}
|
7530
7729
|
}
|
7531
7730
|
|
7532
7731
|
/* Adjust other cursors pointing to mp */
|
@@ -7541,11 +7740,15 @@ mdb_cursor_del0(MDB_cursor *mc)
|
|
7541
7740
|
m3->mc_flags |= C_DEL;
|
7542
7741
|
if (m3->mc_ki[mc->mc_top] > ki)
|
7543
7742
|
m3->mc_ki[mc->mc_top]--;
|
7743
|
+
else if (mc->mc_db->md_flags & MDB_DUPSORT)
|
7744
|
+
m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF;
|
7544
7745
|
}
|
7545
7746
|
if (m3->mc_ki[mc->mc_top] >= nkeys) {
|
7546
7747
|
rc = mdb_cursor_sibling(m3, 1);
|
7547
|
-
if (rc == MDB_NOTFOUND)
|
7748
|
+
if (rc == MDB_NOTFOUND) {
|
7749
|
+
m3->mc_flags |= C_EOF;
|
7548
7750
|
rc = MDB_SUCCESS;
|
7751
|
+
}
|
7549
7752
|
}
|
7550
7753
|
}
|
7551
7754
|
}
|
@@ -7760,8 +7963,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
7760
7963
|
}
|
7761
7964
|
copy->mp_pgno = mp->mp_pgno;
|
7762
7965
|
copy->mp_flags = mp->mp_flags;
|
7763
|
-
copy->mp_lower = PAGEHDRSZ;
|
7764
|
-
copy->mp_upper = env->me_psize;
|
7966
|
+
copy->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
7967
|
+
copy->mp_upper = env->me_psize - PAGEBASE;
|
7765
7968
|
|
7766
7969
|
/* prepare to insert */
|
7767
7970
|
for (i=0, j=0; i<nkeys; i++) {
|
@@ -7801,7 +8004,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
7801
8004
|
psize += nsize;
|
7802
8005
|
node = NULL;
|
7803
8006
|
} else {
|
7804
|
-
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]);
|
8007
|
+
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE);
|
7805
8008
|
psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t);
|
7806
8009
|
if (IS_LEAF(mp)) {
|
7807
8010
|
if (F_ISSET(node->mn_flags, F_BIGDATA))
|
@@ -7821,7 +8024,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
7821
8024
|
sepkey.mv_size = newkey->mv_size;
|
7822
8025
|
sepkey.mv_data = newkey->mv_data;
|
7823
8026
|
} else {
|
7824
|
-
node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx]);
|
8027
|
+
node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE);
|
7825
8028
|
sepkey.mv_size = node->mn_ksize;
|
7826
8029
|
sepkey.mv_data = NODEKEY(node);
|
7827
8030
|
}
|
@@ -7902,7 +8105,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
7902
8105
|
/* Update index for the new key. */
|
7903
8106
|
mc->mc_ki[mc->mc_top] = j;
|
7904
8107
|
} else {
|
7905
|
-
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]);
|
8108
|
+
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE);
|
7906
8109
|
rkey.mv_data = NODEKEY(node);
|
7907
8110
|
rkey.mv_size = node->mn_ksize;
|
7908
8111
|
if (IS_LEAF(mp)) {
|
@@ -7938,7 +8141,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
7938
8141
|
mp->mp_lower = copy->mp_lower;
|
7939
8142
|
mp->mp_upper = copy->mp_upper;
|
7940
8143
|
memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1),
|
7941
|
-
env->me_psize - copy->mp_upper);
|
8144
|
+
env->me_psize - copy->mp_upper - PAGEBASE);
|
7942
8145
|
|
7943
8146
|
/* reset back to original page */
|
7944
8147
|
if (newindx < split_indx) {
|
@@ -8037,7 +8240,568 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
|
|
8037
8240
|
return mdb_cursor_put(&mc, key, data, flags);
|
8038
8241
|
}
|
8039
8242
|
|
8040
|
-
|
8243
|
+
#ifndef MDB_WBUF
|
8244
|
+
#define MDB_WBUF (1024*1024)
|
8245
|
+
#endif
|
8246
|
+
|
8247
|
+
/** State needed for a compacting copy. */
|
8248
|
+
typedef struct mdb_copy {
|
8249
|
+
pthread_mutex_t mc_mutex;
|
8250
|
+
pthread_cond_t mc_cond;
|
8251
|
+
char *mc_wbuf[2];
|
8252
|
+
char *mc_over[2];
|
8253
|
+
MDB_env *mc_env;
|
8254
|
+
MDB_txn *mc_txn;
|
8255
|
+
int mc_wlen[2];
|
8256
|
+
int mc_olen[2];
|
8257
|
+
pgno_t mc_next_pgno;
|
8258
|
+
HANDLE mc_fd;
|
8259
|
+
int mc_status;
|
8260
|
+
volatile int mc_new;
|
8261
|
+
int mc_toggle;
|
8262
|
+
|
8263
|
+
} mdb_copy;
|
8264
|
+
|
8265
|
+
/** Dedicated writer thread for compacting copy. */
|
8266
|
+
static THREAD_RET ESECT
|
8267
|
+
mdb_env_copythr(void *arg)
|
8268
|
+
{
|
8269
|
+
mdb_copy *my = arg;
|
8270
|
+
char *ptr;
|
8271
|
+
int toggle = 0, wsize, rc;
|
8272
|
+
#ifdef _WIN32
|
8273
|
+
DWORD len;
|
8274
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
8275
|
+
#else
|
8276
|
+
int len;
|
8277
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
8278
|
+
#endif
|
8279
|
+
|
8280
|
+
pthread_mutex_lock(&my->mc_mutex);
|
8281
|
+
my->mc_new = 0;
|
8282
|
+
pthread_cond_signal(&my->mc_cond);
|
8283
|
+
for(;;) {
|
8284
|
+
while (!my->mc_new)
|
8285
|
+
pthread_cond_wait(&my->mc_cond, &my->mc_mutex);
|
8286
|
+
if (my->mc_new < 0) {
|
8287
|
+
my->mc_new = 0;
|
8288
|
+
break;
|
8289
|
+
}
|
8290
|
+
my->mc_new = 0;
|
8291
|
+
wsize = my->mc_wlen[toggle];
|
8292
|
+
ptr = my->mc_wbuf[toggle];
|
8293
|
+
again:
|
8294
|
+
while (wsize > 0) {
|
8295
|
+
DO_WRITE(rc, my->mc_fd, ptr, wsize, len);
|
8296
|
+
if (!rc) {
|
8297
|
+
rc = ErrCode();
|
8298
|
+
break;
|
8299
|
+
} else if (len > 0) {
|
8300
|
+
rc = MDB_SUCCESS;
|
8301
|
+
ptr += len;
|
8302
|
+
wsize -= len;
|
8303
|
+
continue;
|
8304
|
+
} else {
|
8305
|
+
rc = EIO;
|
8306
|
+
break;
|
8307
|
+
}
|
8308
|
+
}
|
8309
|
+
if (rc) {
|
8310
|
+
my->mc_status = rc;
|
8311
|
+
break;
|
8312
|
+
}
|
8313
|
+
/* If there's an overflow page tail, write it too */
|
8314
|
+
if (my->mc_olen[toggle]) {
|
8315
|
+
wsize = my->mc_olen[toggle];
|
8316
|
+
ptr = my->mc_over[toggle];
|
8317
|
+
my->mc_olen[toggle] = 0;
|
8318
|
+
goto again;
|
8319
|
+
}
|
8320
|
+
my->mc_wlen[toggle] = 0;
|
8321
|
+
toggle ^= 1;
|
8322
|
+
pthread_cond_signal(&my->mc_cond);
|
8323
|
+
}
|
8324
|
+
pthread_cond_signal(&my->mc_cond);
|
8325
|
+
pthread_mutex_unlock(&my->mc_mutex);
|
8326
|
+
return (THREAD_RET)0;
|
8327
|
+
#undef DO_WRITE
|
8328
|
+
}
|
8329
|
+
|
8330
|
+
/** Tell the writer thread there's a buffer ready to write */
|
8331
|
+
static int ESECT
|
8332
|
+
mdb_env_cthr_toggle(mdb_copy *my, int st)
|
8333
|
+
{
|
8334
|
+
int toggle = my->mc_toggle ^ 1;
|
8335
|
+
pthread_mutex_lock(&my->mc_mutex);
|
8336
|
+
if (my->mc_status) {
|
8337
|
+
pthread_mutex_unlock(&my->mc_mutex);
|
8338
|
+
return my->mc_status;
|
8339
|
+
}
|
8340
|
+
while (my->mc_new == 1)
|
8341
|
+
pthread_cond_wait(&my->mc_cond, &my->mc_mutex);
|
8342
|
+
my->mc_new = st;
|
8343
|
+
my->mc_toggle = toggle;
|
8344
|
+
pthread_cond_signal(&my->mc_cond);
|
8345
|
+
pthread_mutex_unlock(&my->mc_mutex);
|
8346
|
+
return 0;
|
8347
|
+
}
|
8348
|
+
|
8349
|
+
/** Depth-first tree traversal for compacting copy. */
|
8350
|
+
static int ESECT
|
8351
|
+
mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags)
|
8352
|
+
{
|
8353
|
+
MDB_cursor mc;
|
8354
|
+
MDB_txn *txn = my->mc_txn;
|
8355
|
+
MDB_node *ni;
|
8356
|
+
MDB_page *mo, *mp, *leaf;
|
8357
|
+
char *buf, *ptr;
|
8358
|
+
int rc, toggle;
|
8359
|
+
unsigned int i;
|
8360
|
+
|
8361
|
+
/* Empty DB, nothing to do */
|
8362
|
+
if (*pg == P_INVALID)
|
8363
|
+
return MDB_SUCCESS;
|
8364
|
+
|
8365
|
+
mc.mc_snum = 1;
|
8366
|
+
mc.mc_top = 0;
|
8367
|
+
mc.mc_txn = txn;
|
8368
|
+
|
8369
|
+
rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL);
|
8370
|
+
if (rc)
|
8371
|
+
return rc;
|
8372
|
+
rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST);
|
8373
|
+
if (rc)
|
8374
|
+
return rc;
|
8375
|
+
|
8376
|
+
/* Make cursor pages writable */
|
8377
|
+
buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum);
|
8378
|
+
if (buf == NULL)
|
8379
|
+
return ENOMEM;
|
8380
|
+
|
8381
|
+
for (i=0; i<mc.mc_top; i++) {
|
8382
|
+
mdb_page_copy((MDB_page *)ptr, mc.mc_pg[i], my->mc_env->me_psize);
|
8383
|
+
mc.mc_pg[i] = (MDB_page *)ptr;
|
8384
|
+
ptr += my->mc_env->me_psize;
|
8385
|
+
}
|
8386
|
+
|
8387
|
+
/* This is writable space for a leaf page. Usually not needed. */
|
8388
|
+
leaf = (MDB_page *)ptr;
|
8389
|
+
|
8390
|
+
toggle = my->mc_toggle;
|
8391
|
+
while (mc.mc_snum > 0) {
|
8392
|
+
unsigned n;
|
8393
|
+
mp = mc.mc_pg[mc.mc_top];
|
8394
|
+
n = NUMKEYS(mp);
|
8395
|
+
|
8396
|
+
if (IS_LEAF(mp)) {
|
8397
|
+
if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) {
|
8398
|
+
for (i=0; i<n; i++) {
|
8399
|
+
ni = NODEPTR(mp, i);
|
8400
|
+
if (ni->mn_flags & F_BIGDATA) {
|
8401
|
+
MDB_page *omp;
|
8402
|
+
pgno_t pg;
|
8403
|
+
|
8404
|
+
/* Need writable leaf */
|
8405
|
+
if (mp != leaf) {
|
8406
|
+
mc.mc_pg[mc.mc_top] = leaf;
|
8407
|
+
mdb_page_copy(leaf, mp, my->mc_env->me_psize);
|
8408
|
+
mp = leaf;
|
8409
|
+
ni = NODEPTR(mp, i);
|
8410
|
+
}
|
8411
|
+
|
8412
|
+
memcpy(&pg, NODEDATA(ni), sizeof(pg));
|
8413
|
+
rc = mdb_page_get(txn, pg, &omp, NULL);
|
8414
|
+
if (rc)
|
8415
|
+
goto done;
|
8416
|
+
if (my->mc_wlen[toggle] >= MDB_WBUF) {
|
8417
|
+
rc = mdb_env_cthr_toggle(my, 1);
|
8418
|
+
if (rc)
|
8419
|
+
goto done;
|
8420
|
+
toggle = my->mc_toggle;
|
8421
|
+
}
|
8422
|
+
mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
8423
|
+
memcpy(mo, omp, my->mc_env->me_psize);
|
8424
|
+
mo->mp_pgno = my->mc_next_pgno;
|
8425
|
+
my->mc_next_pgno += omp->mp_pages;
|
8426
|
+
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
8427
|
+
if (omp->mp_pages > 1) {
|
8428
|
+
my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1);
|
8429
|
+
my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize;
|
8430
|
+
rc = mdb_env_cthr_toggle(my, 1);
|
8431
|
+
if (rc)
|
8432
|
+
goto done;
|
8433
|
+
toggle = my->mc_toggle;
|
8434
|
+
}
|
8435
|
+
memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t));
|
8436
|
+
} else if (ni->mn_flags & F_SUBDATA) {
|
8437
|
+
MDB_db db;
|
8438
|
+
|
8439
|
+
/* Need writable leaf */
|
8440
|
+
if (mp != leaf) {
|
8441
|
+
mc.mc_pg[mc.mc_top] = leaf;
|
8442
|
+
mdb_page_copy(leaf, mp, my->mc_env->me_psize);
|
8443
|
+
mp = leaf;
|
8444
|
+
ni = NODEPTR(mp, i);
|
8445
|
+
}
|
8446
|
+
|
8447
|
+
memcpy(&db, NODEDATA(ni), sizeof(db));
|
8448
|
+
my->mc_toggle = toggle;
|
8449
|
+
rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA);
|
8450
|
+
if (rc)
|
8451
|
+
goto done;
|
8452
|
+
toggle = my->mc_toggle;
|
8453
|
+
memcpy(NODEDATA(ni), &db, sizeof(db));
|
8454
|
+
}
|
8455
|
+
}
|
8456
|
+
}
|
8457
|
+
} else {
|
8458
|
+
mc.mc_ki[mc.mc_top]++;
|
8459
|
+
if (mc.mc_ki[mc.mc_top] < n) {
|
8460
|
+
pgno_t pg;
|
8461
|
+
again:
|
8462
|
+
ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]);
|
8463
|
+
pg = NODEPGNO(ni);
|
8464
|
+
rc = mdb_page_get(txn, pg, &mp, NULL);
|
8465
|
+
if (rc)
|
8466
|
+
goto done;
|
8467
|
+
mc.mc_top++;
|
8468
|
+
mc.mc_snum++;
|
8469
|
+
mc.mc_ki[mc.mc_top] = 0;
|
8470
|
+
if (IS_BRANCH(mp)) {
|
8471
|
+
/* Whenever we advance to a sibling branch page,
|
8472
|
+
* we must proceed all the way down to its first leaf.
|
8473
|
+
*/
|
8474
|
+
mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize);
|
8475
|
+
goto again;
|
8476
|
+
} else
|
8477
|
+
mc.mc_pg[mc.mc_top] = mp;
|
8478
|
+
continue;
|
8479
|
+
}
|
8480
|
+
}
|
8481
|
+
if (my->mc_wlen[toggle] >= MDB_WBUF) {
|
8482
|
+
rc = mdb_env_cthr_toggle(my, 1);
|
8483
|
+
if (rc)
|
8484
|
+
goto done;
|
8485
|
+
toggle = my->mc_toggle;
|
8486
|
+
}
|
8487
|
+
mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
8488
|
+
mdb_page_copy(mo, mp, my->mc_env->me_psize);
|
8489
|
+
mo->mp_pgno = my->mc_next_pgno++;
|
8490
|
+
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
8491
|
+
if (mc.mc_top) {
|
8492
|
+
/* Update parent if there is one */
|
8493
|
+
ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]);
|
8494
|
+
SETPGNO(ni, mo->mp_pgno);
|
8495
|
+
mdb_cursor_pop(&mc);
|
8496
|
+
} else {
|
8497
|
+
/* Otherwise we're done */
|
8498
|
+
*pg = mo->mp_pgno;
|
8499
|
+
break;
|
8500
|
+
}
|
8501
|
+
}
|
8502
|
+
done:
|
8503
|
+
free(buf);
|
8504
|
+
return rc;
|
8505
|
+
}
|
8506
|
+
|
8507
|
+
/** Copy environment with compaction. */
|
8508
|
+
static int ESECT
|
8509
|
+
mdb_env_copyfd1(MDB_env *env, HANDLE fd)
|
8510
|
+
{
|
8511
|
+
MDB_meta *mm;
|
8512
|
+
MDB_page *mp;
|
8513
|
+
mdb_copy my;
|
8514
|
+
MDB_txn *txn = NULL;
|
8515
|
+
pthread_t thr;
|
8516
|
+
int rc;
|
8517
|
+
|
8518
|
+
#ifdef _WIN32
|
8519
|
+
my.mc_mutex = CreateMutex(NULL, FALSE, NULL);
|
8520
|
+
my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL);
|
8521
|
+
my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize);
|
8522
|
+
if (my.mc_wbuf[0] == NULL)
|
8523
|
+
return errno;
|
8524
|
+
#else
|
8525
|
+
pthread_mutex_init(&my.mc_mutex, NULL);
|
8526
|
+
pthread_cond_init(&my.mc_cond, NULL);
|
8527
|
+
#ifdef HAVE_MEMALIGN
|
8528
|
+
my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2);
|
8529
|
+
if (my.mc_wbuf[0] == NULL)
|
8530
|
+
return errno;
|
8531
|
+
#else
|
8532
|
+
rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2);
|
8533
|
+
if (rc)
|
8534
|
+
return rc;
|
8535
|
+
#endif
|
8536
|
+
#endif
|
8537
|
+
memset(my.mc_wbuf[0], 0, MDB_WBUF*2);
|
8538
|
+
my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF;
|
8539
|
+
my.mc_wlen[0] = 0;
|
8540
|
+
my.mc_wlen[1] = 0;
|
8541
|
+
my.mc_olen[0] = 0;
|
8542
|
+
my.mc_olen[1] = 0;
|
8543
|
+
my.mc_next_pgno = 2;
|
8544
|
+
my.mc_status = 0;
|
8545
|
+
my.mc_new = 1;
|
8546
|
+
my.mc_toggle = 0;
|
8547
|
+
my.mc_env = env;
|
8548
|
+
my.mc_fd = fd;
|
8549
|
+
THREAD_CREATE(thr, mdb_env_copythr, &my);
|
8550
|
+
|
8551
|
+
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
8552
|
+
if (rc)
|
8553
|
+
return rc;
|
8554
|
+
|
8555
|
+
mp = (MDB_page *)my.mc_wbuf[0];
|
8556
|
+
memset(mp, 0, 2*env->me_psize);
|
8557
|
+
mp->mp_pgno = 0;
|
8558
|
+
mp->mp_flags = P_META;
|
8559
|
+
mm = (MDB_meta *)METADATA(mp);
|
8560
|
+
mdb_env_init_meta0(env, mm);
|
8561
|
+
mm->mm_address = env->me_metas[0]->mm_address;
|
8562
|
+
|
8563
|
+
mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize);
|
8564
|
+
mp->mp_pgno = 1;
|
8565
|
+
mp->mp_flags = P_META;
|
8566
|
+
*(MDB_meta *)METADATA(mp) = *mm;
|
8567
|
+
mm = (MDB_meta *)METADATA(mp);
|
8568
|
+
|
8569
|
+
/* Count the number of free pages, subtract from lastpg to find
|
8570
|
+
* number of active pages
|
8571
|
+
*/
|
8572
|
+
{
|
8573
|
+
MDB_ID freecount = 0;
|
8574
|
+
MDB_cursor mc;
|
8575
|
+
MDB_val key, data;
|
8576
|
+
mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
|
8577
|
+
while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
|
8578
|
+
freecount += *(MDB_ID *)data.mv_data;
|
8579
|
+
freecount += txn->mt_dbs[0].md_branch_pages +
|
8580
|
+
txn->mt_dbs[0].md_leaf_pages +
|
8581
|
+
txn->mt_dbs[0].md_overflow_pages;
|
8582
|
+
|
8583
|
+
/* Set metapage 1 */
|
8584
|
+
mm->mm_last_pg = txn->mt_next_pgno - freecount - 1;
|
8585
|
+
mm->mm_dbs[1] = txn->mt_dbs[1];
|
8586
|
+
mm->mm_dbs[1].md_root = mm->mm_last_pg;
|
8587
|
+
mm->mm_txnid = 1;
|
8588
|
+
}
|
8589
|
+
my.mc_wlen[0] = env->me_psize * 2;
|
8590
|
+
my.mc_txn = txn;
|
8591
|
+
pthread_mutex_lock(&my.mc_mutex);
|
8592
|
+
while(my.mc_new)
|
8593
|
+
pthread_cond_wait(&my.mc_cond, &my.mc_mutex);
|
8594
|
+
pthread_mutex_unlock(&my.mc_mutex);
|
8595
|
+
rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0);
|
8596
|
+
if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle])
|
8597
|
+
rc = mdb_env_cthr_toggle(&my, 1);
|
8598
|
+
mdb_env_cthr_toggle(&my, -1);
|
8599
|
+
pthread_mutex_lock(&my.mc_mutex);
|
8600
|
+
while(my.mc_new)
|
8601
|
+
pthread_cond_wait(&my.mc_cond, &my.mc_mutex);
|
8602
|
+
pthread_mutex_unlock(&my.mc_mutex);
|
8603
|
+
THREAD_FINISH(thr);
|
8604
|
+
|
8605
|
+
mdb_txn_abort(txn);
|
8606
|
+
#ifdef _WIN32
|
8607
|
+
CloseHandle(my.mc_cond);
|
8608
|
+
CloseHandle(my.mc_mutex);
|
8609
|
+
_aligned_free(my.mc_wbuf[0]);
|
8610
|
+
#else
|
8611
|
+
pthread_cond_destroy(&my.mc_cond);
|
8612
|
+
pthread_mutex_destroy(&my.mc_mutex);
|
8613
|
+
free(my.mc_wbuf[0]);
|
8614
|
+
#endif
|
8615
|
+
return rc;
|
8616
|
+
}
|
8617
|
+
|
8618
|
+
/** Copy environment as-is. */
|
8619
|
+
static int ESECT
|
8620
|
+
mdb_env_copyfd0(MDB_env *env, HANDLE fd)
|
8621
|
+
{
|
8622
|
+
MDB_txn *txn = NULL;
|
8623
|
+
int rc;
|
8624
|
+
size_t wsize;
|
8625
|
+
char *ptr;
|
8626
|
+
#ifdef _WIN32
|
8627
|
+
DWORD len, w2;
|
8628
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
8629
|
+
#else
|
8630
|
+
ssize_t len;
|
8631
|
+
size_t w2;
|
8632
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
8633
|
+
#endif
|
8634
|
+
|
8635
|
+
/* Do the lock/unlock of the reader mutex before starting the
|
8636
|
+
* write txn. Otherwise other read txns could block writers.
|
8637
|
+
*/
|
8638
|
+
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
8639
|
+
if (rc)
|
8640
|
+
return rc;
|
8641
|
+
|
8642
|
+
if (env->me_txns) {
|
8643
|
+
/* We must start the actual read txn after blocking writers */
|
8644
|
+
mdb_txn_reset0(txn, "reset-stage1");
|
8645
|
+
|
8646
|
+
/* Temporarily block writers until we snapshot the meta pages */
|
8647
|
+
LOCK_MUTEX_W(env);
|
8648
|
+
|
8649
|
+
rc = mdb_txn_renew0(txn);
|
8650
|
+
if (rc) {
|
8651
|
+
UNLOCK_MUTEX_W(env);
|
8652
|
+
goto leave;
|
8653
|
+
}
|
8654
|
+
}
|
8655
|
+
|
8656
|
+
wsize = env->me_psize * 2;
|
8657
|
+
ptr = env->me_map;
|
8658
|
+
w2 = wsize;
|
8659
|
+
while (w2 > 0) {
|
8660
|
+
DO_WRITE(rc, fd, ptr, w2, len);
|
8661
|
+
if (!rc) {
|
8662
|
+
rc = ErrCode();
|
8663
|
+
break;
|
8664
|
+
} else if (len > 0) {
|
8665
|
+
rc = MDB_SUCCESS;
|
8666
|
+
ptr += len;
|
8667
|
+
w2 -= len;
|
8668
|
+
continue;
|
8669
|
+
} else {
|
8670
|
+
/* Non-blocking or async handles are not supported */
|
8671
|
+
rc = EIO;
|
8672
|
+
break;
|
8673
|
+
}
|
8674
|
+
}
|
8675
|
+
if (env->me_txns)
|
8676
|
+
UNLOCK_MUTEX_W(env);
|
8677
|
+
|
8678
|
+
if (rc)
|
8679
|
+
goto leave;
|
8680
|
+
|
8681
|
+
w2 = txn->mt_next_pgno * env->me_psize;
|
8682
|
+
#ifdef WIN32
|
8683
|
+
{
|
8684
|
+
LARGE_INTEGER fsize;
|
8685
|
+
GetFileSizeEx(env->me_fd, &fsize);
|
8686
|
+
if (w2 > fsize.QuadPart)
|
8687
|
+
w2 = fsize.QuadPart;
|
8688
|
+
}
|
8689
|
+
#else
|
8690
|
+
{
|
8691
|
+
struct stat st;
|
8692
|
+
fstat(env->me_fd, &st);
|
8693
|
+
if (w2 > (size_t)st.st_size)
|
8694
|
+
w2 = st.st_size;
|
8695
|
+
}
|
8696
|
+
#endif
|
8697
|
+
wsize = w2 - wsize;
|
8698
|
+
while (wsize > 0) {
|
8699
|
+
if (wsize > MAX_WRITE)
|
8700
|
+
w2 = MAX_WRITE;
|
8701
|
+
else
|
8702
|
+
w2 = wsize;
|
8703
|
+
DO_WRITE(rc, fd, ptr, w2, len);
|
8704
|
+
if (!rc) {
|
8705
|
+
rc = ErrCode();
|
8706
|
+
break;
|
8707
|
+
} else if (len > 0) {
|
8708
|
+
rc = MDB_SUCCESS;
|
8709
|
+
ptr += len;
|
8710
|
+
wsize -= len;
|
8711
|
+
continue;
|
8712
|
+
} else {
|
8713
|
+
rc = EIO;
|
8714
|
+
break;
|
8715
|
+
}
|
8716
|
+
}
|
8717
|
+
|
8718
|
+
leave:
|
8719
|
+
mdb_txn_abort(txn);
|
8720
|
+
return rc;
|
8721
|
+
}
|
8722
|
+
|
8723
|
+
int ESECT
|
8724
|
+
mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags)
|
8725
|
+
{
|
8726
|
+
if (flags & MDB_CP_COMPACT)
|
8727
|
+
return mdb_env_copyfd1(env, fd);
|
8728
|
+
else
|
8729
|
+
return mdb_env_copyfd0(env, fd);
|
8730
|
+
}
|
8731
|
+
|
8732
|
+
int ESECT
|
8733
|
+
mdb_env_copyfd(MDB_env *env, HANDLE fd)
|
8734
|
+
{
|
8735
|
+
return mdb_env_copyfd2(env, fd, 0);
|
8736
|
+
}
|
8737
|
+
|
8738
|
+
int ESECT
|
8739
|
+
mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags)
|
8740
|
+
{
|
8741
|
+
int rc, len;
|
8742
|
+
char *lpath;
|
8743
|
+
HANDLE newfd = INVALID_HANDLE_VALUE;
|
8744
|
+
|
8745
|
+
if (env->me_flags & MDB_NOSUBDIR) {
|
8746
|
+
lpath = (char *)path;
|
8747
|
+
} else {
|
8748
|
+
len = strlen(path);
|
8749
|
+
len += sizeof(DATANAME);
|
8750
|
+
lpath = malloc(len);
|
8751
|
+
if (!lpath)
|
8752
|
+
return ENOMEM;
|
8753
|
+
sprintf(lpath, "%s" DATANAME, path);
|
8754
|
+
}
|
8755
|
+
|
8756
|
+
/* The destination path must exist, but the destination file must not.
|
8757
|
+
* We don't want the OS to cache the writes, since the source data is
|
8758
|
+
* already in the OS cache.
|
8759
|
+
*/
|
8760
|
+
#ifdef _WIN32
|
8761
|
+
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
|
8762
|
+
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
|
8763
|
+
#else
|
8764
|
+
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
|
8765
|
+
#endif
|
8766
|
+
if (newfd == INVALID_HANDLE_VALUE) {
|
8767
|
+
rc = ErrCode();
|
8768
|
+
goto leave;
|
8769
|
+
}
|
8770
|
+
|
8771
|
+
if (env->me_psize >= env->me_os_psize) {
|
8772
|
+
#ifdef O_DIRECT
|
8773
|
+
/* Set O_DIRECT if the file system supports it */
|
8774
|
+
if ((rc = fcntl(newfd, F_GETFL)) != -1)
|
8775
|
+
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
|
8776
|
+
#endif
|
8777
|
+
#ifdef F_NOCACHE /* __APPLE__ */
|
8778
|
+
rc = fcntl(newfd, F_NOCACHE, 1);
|
8779
|
+
if (rc) {
|
8780
|
+
rc = ErrCode();
|
8781
|
+
goto leave;
|
8782
|
+
}
|
8783
|
+
#endif
|
8784
|
+
}
|
8785
|
+
|
8786
|
+
rc = mdb_env_copyfd2(env, newfd, flags);
|
8787
|
+
|
8788
|
+
leave:
|
8789
|
+
if (!(env->me_flags & MDB_NOSUBDIR))
|
8790
|
+
free(lpath);
|
8791
|
+
if (newfd != INVALID_HANDLE_VALUE)
|
8792
|
+
if (close(newfd) < 0 && rc == MDB_SUCCESS)
|
8793
|
+
rc = ErrCode();
|
8794
|
+
|
8795
|
+
return rc;
|
8796
|
+
}
|
8797
|
+
|
8798
|
+
int ESECT
|
8799
|
+
mdb_env_copy(MDB_env *env, const char *path)
|
8800
|
+
{
|
8801
|
+
return mdb_env_copy2(env, path, 0);
|
8802
|
+
}
|
8803
|
+
|
8804
|
+
int ESECT
|
8041
8805
|
mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
|
8042
8806
|
{
|
8043
8807
|
if ((flag & CHANGEABLE) != flag)
|
@@ -8049,7 +8813,7 @@ mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
|
|
8049
8813
|
return MDB_SUCCESS;
|
8050
8814
|
}
|
8051
8815
|
|
8052
|
-
int
|
8816
|
+
int ESECT
|
8053
8817
|
mdb_env_get_flags(MDB_env *env, unsigned int *arg)
|
8054
8818
|
{
|
8055
8819
|
if (!env || !arg)
|
@@ -8059,7 +8823,7 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg)
|
|
8059
8823
|
return MDB_SUCCESS;
|
8060
8824
|
}
|
8061
8825
|
|
8062
|
-
int
|
8826
|
+
int ESECT
|
8063
8827
|
mdb_env_set_userctx(MDB_env *env, void *ctx)
|
8064
8828
|
{
|
8065
8829
|
if (!env)
|
@@ -8068,13 +8832,13 @@ mdb_env_set_userctx(MDB_env *env, void *ctx)
|
|
8068
8832
|
return MDB_SUCCESS;
|
8069
8833
|
}
|
8070
8834
|
|
8071
|
-
void *
|
8835
|
+
void * ESECT
|
8072
8836
|
mdb_env_get_userctx(MDB_env *env)
|
8073
8837
|
{
|
8074
8838
|
return env ? env->me_userctx : NULL;
|
8075
8839
|
}
|
8076
8840
|
|
8077
|
-
int
|
8841
|
+
int ESECT
|
8078
8842
|
mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
|
8079
8843
|
{
|
8080
8844
|
if (!env)
|
@@ -8085,7 +8849,7 @@ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
|
|
8085
8849
|
return MDB_SUCCESS;
|
8086
8850
|
}
|
8087
8851
|
|
8088
|
-
int
|
8852
|
+
int ESECT
|
8089
8853
|
mdb_env_get_path(MDB_env *env, const char **arg)
|
8090
8854
|
{
|
8091
8855
|
if (!env || !arg)
|
@@ -8095,7 +8859,7 @@ mdb_env_get_path(MDB_env *env, const char **arg)
|
|
8095
8859
|
return MDB_SUCCESS;
|
8096
8860
|
}
|
8097
8861
|
|
8098
|
-
int
|
8862
|
+
int ESECT
|
8099
8863
|
mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg)
|
8100
8864
|
{
|
8101
8865
|
if (!env || !arg)
|
@@ -8111,7 +8875,7 @@ mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg)
|
|
8111
8875
|
* @param[out] arg the address of an #MDB_stat structure to receive the stats.
|
8112
8876
|
* @return 0, this function always succeeds.
|
8113
8877
|
*/
|
8114
|
-
static int
|
8878
|
+
static int ESECT
|
8115
8879
|
mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
|
8116
8880
|
{
|
8117
8881
|
arg->ms_psize = env->me_psize;
|
@@ -8123,7 +8887,8 @@ mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
|
|
8123
8887
|
|
8124
8888
|
return MDB_SUCCESS;
|
8125
8889
|
}
|
8126
|
-
|
8890
|
+
|
8891
|
+
int ESECT
|
8127
8892
|
mdb_env_stat(MDB_env *env, MDB_stat *arg)
|
8128
8893
|
{
|
8129
8894
|
int toggle;
|
@@ -8136,7 +8901,7 @@ mdb_env_stat(MDB_env *env, MDB_stat *arg)
|
|
8136
8901
|
return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg);
|
8137
8902
|
}
|
8138
8903
|
|
8139
|
-
int
|
8904
|
+
int ESECT
|
8140
8905
|
mdb_env_info(MDB_env *env, MDB_envinfo *arg)
|
8141
8906
|
{
|
8142
8907
|
int toggle;
|
@@ -8145,7 +8910,7 @@ mdb_env_info(MDB_env *env, MDB_envinfo *arg)
|
|
8145
8910
|
return EINVAL;
|
8146
8911
|
|
8147
8912
|
toggle = mdb_env_pick_meta(env);
|
8148
|
-
arg->me_mapaddr =
|
8913
|
+
arg->me_mapaddr = env->me_metas[toggle]->mm_address;
|
8149
8914
|
arg->me_mapsize = env->me_mapsize;
|
8150
8915
|
arg->me_maxreaders = env->me_maxreaders;
|
8151
8916
|
|
@@ -8187,8 +8952,9 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
8187
8952
|
MDB_val key, data;
|
8188
8953
|
MDB_dbi i;
|
8189
8954
|
MDB_cursor mc;
|
8955
|
+
MDB_db dummy;
|
8190
8956
|
int rc, dbflag, exact;
|
8191
|
-
unsigned int unused = 0;
|
8957
|
+
unsigned int unused = 0, seq;
|
8192
8958
|
size_t len;
|
8193
8959
|
|
8194
8960
|
if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
|
@@ -8256,7 +9022,6 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
8256
9022
|
return MDB_INCOMPATIBLE;
|
8257
9023
|
} else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
|
8258
9024
|
/* Create if requested */
|
8259
|
-
MDB_db dummy;
|
8260
9025
|
data.mv_size = sizeof(MDB_db);
|
8261
9026
|
data.mv_data = &dummy;
|
8262
9027
|
memset(&dummy, 0, sizeof(dummy));
|
@@ -8273,6 +9038,12 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
8273
9038
|
txn->mt_dbxs[slot].md_name.mv_size = len;
|
8274
9039
|
txn->mt_dbxs[slot].md_rel = NULL;
|
8275
9040
|
txn->mt_dbflags[slot] = dbflag;
|
9041
|
+
/* txn-> and env-> are the same in read txns, use
|
9042
|
+
* tmp variable to avoid undefined assignment
|
9043
|
+
*/
|
9044
|
+
seq = ++txn->mt_env->me_dbiseqs[slot];
|
9045
|
+
txn->mt_dbiseqs[slot] = seq;
|
9046
|
+
|
8276
9047
|
memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
|
8277
9048
|
*dbi = slot;
|
8278
9049
|
mdb_default_cmp(txn, slot);
|
@@ -8307,10 +9078,14 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
|
|
8307
9078
|
if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs)
|
8308
9079
|
return;
|
8309
9080
|
ptr = env->me_dbxs[dbi].md_name.mv_data;
|
8310
|
-
|
8311
|
-
|
8312
|
-
|
8313
|
-
|
9081
|
+
/* If there was no name, this was already closed */
|
9082
|
+
if (ptr) {
|
9083
|
+
env->me_dbxs[dbi].md_name.mv_data = NULL;
|
9084
|
+
env->me_dbxs[dbi].md_name.mv_size = 0;
|
9085
|
+
env->me_dbflags[dbi] = 0;
|
9086
|
+
env->me_dbiseqs[dbi]++;
|
9087
|
+
free(ptr);
|
9088
|
+
}
|
8314
9089
|
}
|
8315
9090
|
|
8316
9091
|
int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags)
|
@@ -8420,6 +9195,9 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
|
|
8420
9195
|
if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
|
8421
9196
|
return EACCES;
|
8422
9197
|
|
9198
|
+
if (dbi > MAIN_DBI && TXN_DBI_CHANGED(txn, dbi))
|
9199
|
+
return MDB_BAD_DBI;
|
9200
|
+
|
8423
9201
|
rc = mdb_cursor_open(txn, dbi, &mc);
|
8424
9202
|
if (rc)
|
8425
9203
|
return rc;
|
@@ -8493,12 +9271,14 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
|
|
8493
9271
|
return MDB_SUCCESS;
|
8494
9272
|
}
|
8495
9273
|
|
8496
|
-
int
|
9274
|
+
int ESECT
|
9275
|
+
mdb_env_get_maxkeysize(MDB_env *env)
|
8497
9276
|
{
|
8498
9277
|
return ENV_MAXKEY(env);
|
8499
9278
|
}
|
8500
9279
|
|
8501
|
-
int
|
9280
|
+
int ESECT
|
9281
|
+
mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
8502
9282
|
{
|
8503
9283
|
unsigned int i, rdrs;
|
8504
9284
|
MDB_reader *mr;
|
@@ -8538,7 +9318,8 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
|
8538
9318
|
/** Insert pid into list if not already present.
|
8539
9319
|
* return -1 if already present.
|
8540
9320
|
*/
|
8541
|
-
static int
|
9321
|
+
static int ESECT
|
9322
|
+
mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
|
8542
9323
|
{
|
8543
9324
|
/* binary search of pid in list */
|
8544
9325
|
unsigned base = 0;
|
@@ -8574,7 +9355,8 @@ static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
|
|
8574
9355
|
return 0;
|
8575
9356
|
}
|
8576
9357
|
|
8577
|
-
int
|
9358
|
+
int ESECT
|
9359
|
+
mdb_reader_check(MDB_env *env, int *dead)
|
8578
9360
|
{
|
8579
9361
|
unsigned int i, j, rdrs;
|
8580
9362
|
MDB_reader *mr;
|