lmdb 0.4.5 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/lmdb_ext/liblmdb/CHANGES +22 -0
- data/ext/lmdb_ext/liblmdb/lmdb.h +111 -39
- data/ext/lmdb_ext/liblmdb/mdb.c +1165 -383
- data/ext/lmdb_ext/liblmdb/midl.c +16 -2
- data/ext/lmdb_ext/liblmdb/midl.h +12 -3
- data/ext/lmdb_ext/lmdb_ext.c +88 -4
- data/lib/lmdb/version.rb +1 -1
- data/spec/lmdb_spec.rb +36 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cfbe0e0fc20cfe471e48ab16b7a6f03a868f5fa0
|
|
4
|
+
data.tar.gz: cd1e90a95ee5eef3bf2bfe0f0db3e389feb22a51
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 23b7d820ead899db18c95d87e819b9d8a533a118a86bffe2bd996cd1352ca73bf8c3399150ed7b4fdaea7a5e4ab1901bc3844e14d25672874744572d383a1985
|
|
7
|
+
data.tar.gz: e3d13fe0515fd9ca626ed81526b3be34a4b9f0affe73afef4862033bb8b124c1c84687115f8f5f40a3524a4308d44ac86e877113362a6b08e87f002f081052b5
|
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
LMDB 0.9 Change Log
|
|
2
2
|
|
|
3
|
+
LMDB 0.9.14 Release (2014/09/15)
|
|
4
|
+
Fix to support 64K page size (ITS#7713)
|
|
5
|
+
Fix to persist decreased as well as increased mapsizes (ITS#7789)
|
|
6
|
+
Fix cursor bug when deleting last node of a DUPSORT key
|
|
7
|
+
Fix mdb_env_info to return FIXEDMAP address
|
|
8
|
+
Fix ambiguous error code from writing to closed DBI (ITS#7825)
|
|
9
|
+
Fix mdb_copy copying past end of file (ITS#7886)
|
|
10
|
+
Fix cursor bugs from page_merge/rebalance
|
|
11
|
+
Fix to dirty fewer pages in deletes (mdb_page_loose())
|
|
12
|
+
Fix mdb_dbi_open creating subDBs (ITS#7917)
|
|
13
|
+
Fix mdb_cursor_get(_DUP) with single value (ITS#7913)
|
|
14
|
+
Fix Windows compat issues in mtests (ITS#7879)
|
|
15
|
+
Add compacting variant of mdb_copy
|
|
16
|
+
Add BigEndian integer key compare code
|
|
17
|
+
Add mdb_dump/mdb_load utilities
|
|
18
|
+
|
|
19
|
+
LMDB 0.9.13 Release (2014/06/18)
|
|
20
|
+
Fix mdb_page_alloc unlimited overflow page search
|
|
21
|
+
Documentation
|
|
22
|
+
Re-fix MDB_CURRENT doc (ITS#7793)
|
|
23
|
+
Fix MDB_GET_MULTIPLE/MDB_NEXT_MULTIPLE doc
|
|
24
|
+
|
|
3
25
|
LMDB 0.9.12 Release (2014/06/13)
|
|
4
26
|
Fix MDB_GET_BOTH regression (ITS#7875,#7681)
|
|
5
27
|
Fix MDB_MULTIPLE writing multiple keys (ITS#7834)
|
data/ext/lmdb_ext/liblmdb/lmdb.h
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
/** @file lmdb.h
|
|
2
2
|
* @brief Lightning memory-mapped database library
|
|
3
3
|
*
|
|
4
|
-
* @mainpage Lightning Memory-Mapped Database Manager (
|
|
4
|
+
* @mainpage Lightning Memory-Mapped Database Manager (LMDB)
|
|
5
5
|
*
|
|
6
6
|
* @section intro_sec Introduction
|
|
7
|
-
*
|
|
7
|
+
* LMDB is a Btree-based database management library modeled loosely on the
|
|
8
8
|
* BerkeleyDB API, but much simplified. The entire database is exposed
|
|
9
9
|
* in a memory map, and all data fetches return data directly
|
|
10
10
|
* from the mapped memory, so no malloc's or memcpy's occur during
|
|
@@ -26,10 +26,10 @@
|
|
|
26
26
|
* readers, and readers don't block writers.
|
|
27
27
|
*
|
|
28
28
|
* Unlike other well-known database mechanisms which use either write-ahead
|
|
29
|
-
* transaction logs or append-only data writes,
|
|
29
|
+
* transaction logs or append-only data writes, LMDB requires no maintenance
|
|
30
30
|
* during operation. Both write-ahead loggers and append-only databases
|
|
31
31
|
* require periodic checkpointing and/or compaction of their log or database
|
|
32
|
-
* files otherwise they grow without bound.
|
|
32
|
+
* files otherwise they grow without bound. LMDB tracks free pages within
|
|
33
33
|
* the database and re-uses them for new write operations, so the database
|
|
34
34
|
* size does not grow without bound in normal use.
|
|
35
35
|
*
|
|
@@ -49,7 +49,7 @@
|
|
|
49
49
|
* stale locks can block further operation.
|
|
50
50
|
*
|
|
51
51
|
* Fix: Check for stale readers periodically, using the
|
|
52
|
-
* #mdb_reader_check function or the mdb_stat tool. Or just
|
|
52
|
+
* #mdb_reader_check function or the \ref mdb_stat_1 "mdb_stat" tool. Or just
|
|
53
53
|
* make all programs using the database close it; the lockfile
|
|
54
54
|
* is always reset on first open of the environment.
|
|
55
55
|
*
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
*
|
|
87
87
|
* - Use an MDB_env* in the process which opened it, without fork()ing.
|
|
88
88
|
*
|
|
89
|
-
* - Do not have open an
|
|
89
|
+
* - Do not have open an LMDB database twice in the same process at
|
|
90
90
|
* the same time. Not even from a plain open() call - close()ing it
|
|
91
91
|
* breaks flock() advisory locking.
|
|
92
92
|
*
|
|
@@ -109,7 +109,7 @@
|
|
|
109
109
|
* - If you do that anyway, do a periodic check for stale readers. Or
|
|
110
110
|
* close the environment once in a while, so the lockfile can get reset.
|
|
111
111
|
*
|
|
112
|
-
* - Do not use
|
|
112
|
+
* - Do not use LMDB databases on remote filesystems, even between
|
|
113
113
|
* processes on the same host. This breaks flock() on some OSes,
|
|
114
114
|
* possibly memory map sync, and certainly sync between programs
|
|
115
115
|
* on different hosts.
|
|
@@ -172,7 +172,7 @@ typedef void *mdb_filehandle_t;
|
|
|
172
172
|
typedef int mdb_filehandle_t;
|
|
173
173
|
#endif
|
|
174
174
|
|
|
175
|
-
/** @defgroup mdb
|
|
175
|
+
/** @defgroup mdb LMDB API
|
|
176
176
|
* @{
|
|
177
177
|
* @brief OpenLDAP Lightning Memory-Mapped Database Manager
|
|
178
178
|
*/
|
|
@@ -184,7 +184,7 @@ typedef int mdb_filehandle_t;
|
|
|
184
184
|
/** Library minor version */
|
|
185
185
|
#define MDB_VERSION_MINOR 9
|
|
186
186
|
/** Library patch version */
|
|
187
|
-
#define MDB_VERSION_PATCH
|
|
187
|
+
#define MDB_VERSION_PATCH 14
|
|
188
188
|
|
|
189
189
|
/** Combine args a,b,c into a single integer for easy version comparisons */
|
|
190
190
|
#define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
|
|
@@ -194,10 +194,10 @@ typedef int mdb_filehandle_t;
|
|
|
194
194
|
MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
|
|
195
195
|
|
|
196
196
|
/** The release date of this library version */
|
|
197
|
-
#define MDB_VERSION_DATE "
|
|
197
|
+
#define MDB_VERSION_DATE "September 15, 2014"
|
|
198
198
|
|
|
199
199
|
/** A stringifier for the version info */
|
|
200
|
-
#define MDB_VERSTR(a,b,c,d) "
|
|
200
|
+
#define MDB_VERSTR(a,b,c,d) "LMDB " #a "." #b "." #c ": (" d ")"
|
|
201
201
|
|
|
202
202
|
/** A helper for the stringifier macro */
|
|
203
203
|
#define MDB_VERFOO(a,b,c,d) MDB_VERSTR(a,b,c,d)
|
|
@@ -333,6 +333,15 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
|
|
|
333
333
|
#define MDB_MULTIPLE 0x80000
|
|
334
334
|
/* @} */
|
|
335
335
|
|
|
336
|
+
/** @defgroup mdb_copy Copy Flags
|
|
337
|
+
* @{
|
|
338
|
+
*/
|
|
339
|
+
/** Compacting copy: Omit free space from copy, and renumber all
|
|
340
|
+
* pages sequentially.
|
|
341
|
+
*/
|
|
342
|
+
#define MDB_CP_COMPACT 0x01
|
|
343
|
+
/* @} */
|
|
344
|
+
|
|
336
345
|
/** @brief Cursor Get operations.
|
|
337
346
|
*
|
|
338
347
|
* This is the set of all operations for retrieving data
|
|
@@ -345,16 +354,18 @@ typedef enum MDB_cursor_op {
|
|
|
345
354
|
MDB_GET_BOTH, /**< Position at key/data pair. Only for #MDB_DUPSORT */
|
|
346
355
|
MDB_GET_BOTH_RANGE, /**< position at key, nearest data. Only for #MDB_DUPSORT */
|
|
347
356
|
MDB_GET_CURRENT, /**< Return key/data at current cursor position */
|
|
348
|
-
MDB_GET_MULTIPLE, /**< Return
|
|
349
|
-
|
|
357
|
+
MDB_GET_MULTIPLE, /**< Return key and up to a page of duplicate data items
|
|
358
|
+
from current cursor position. Move cursor to prepare
|
|
359
|
+
for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
|
|
350
360
|
MDB_LAST, /**< Position at last key/data item */
|
|
351
361
|
MDB_LAST_DUP, /**< Position at last data item of current key.
|
|
352
362
|
Only for #MDB_DUPSORT */
|
|
353
363
|
MDB_NEXT, /**< Position at next data item */
|
|
354
364
|
MDB_NEXT_DUP, /**< Position at next data item of current key.
|
|
355
365
|
Only for #MDB_DUPSORT */
|
|
356
|
-
MDB_NEXT_MULTIPLE, /**< Return
|
|
357
|
-
cursor position.
|
|
366
|
+
MDB_NEXT_MULTIPLE, /**< Return key and up to a page of duplicate data items
|
|
367
|
+
from next cursor position. Move cursor to prepare
|
|
368
|
+
for #MDB_NEXT_MULTIPLE. Only for #MDB_DUPFIXED */
|
|
358
369
|
MDB_NEXT_NODUP, /**< Position at first data item of next key */
|
|
359
370
|
MDB_PREV, /**< Position at previous data item */
|
|
360
371
|
MDB_PREV_DUP, /**< Position at previous data item of current key.
|
|
@@ -384,7 +395,7 @@ typedef enum MDB_cursor_op {
|
|
|
384
395
|
#define MDB_PANIC (-30795)
|
|
385
396
|
/** Environment version mismatch */
|
|
386
397
|
#define MDB_VERSION_MISMATCH (-30794)
|
|
387
|
-
/** File is not a valid
|
|
398
|
+
/** File is not a valid LMDB file */
|
|
388
399
|
#define MDB_INVALID (-30793)
|
|
389
400
|
/** Environment mapsize reached */
|
|
390
401
|
#define MDB_MAP_FULL (-30792)
|
|
@@ -410,7 +421,10 @@ typedef enum MDB_cursor_op {
|
|
|
410
421
|
#define MDB_BAD_TXN (-30782)
|
|
411
422
|
/** Unsupported size of key/DB name/data, or wrong DUPFIXED size */
|
|
412
423
|
#define MDB_BAD_VALSIZE (-30781)
|
|
413
|
-
|
|
424
|
+
/** The specified DBI was changed unexpectedly */
|
|
425
|
+
#define MDB_BAD_DBI (-30780)
|
|
426
|
+
/** The last defined error code */
|
|
427
|
+
#define MDB_LAST_ERRCODE MDB_BAD_DBI
|
|
414
428
|
/** @} */
|
|
415
429
|
|
|
416
430
|
/** @brief Statistics for a database in the environment */
|
|
@@ -434,7 +448,7 @@ typedef struct MDB_envinfo {
|
|
|
434
448
|
unsigned int me_numreaders; /**< max reader slots used in the environment */
|
|
435
449
|
} MDB_envinfo;
|
|
436
450
|
|
|
437
|
-
/** @brief Return the
|
|
451
|
+
/** @brief Return the LMDB library version information.
|
|
438
452
|
*
|
|
439
453
|
* @param[out] major if non-NULL, the library major version number is copied here
|
|
440
454
|
* @param[out] minor if non-NULL, the library minor version number is copied here
|
|
@@ -448,14 +462,14 @@ char *mdb_version(int *major, int *minor, int *patch);
|
|
|
448
462
|
* This function is a superset of the ANSI C X3.159-1989 (ANSI C) strerror(3)
|
|
449
463
|
* function. If the error code is greater than or equal to 0, then the string
|
|
450
464
|
* returned by the system function strerror(3) is returned. If the error code
|
|
451
|
-
* is less than 0, an error string corresponding to the
|
|
452
|
-
* returned. See @ref errors for a list of
|
|
465
|
+
* is less than 0, an error string corresponding to the LMDB library error is
|
|
466
|
+
* returned. See @ref errors for a list of LMDB-specific error codes.
|
|
453
467
|
* @param[in] err The error code
|
|
454
468
|
* @retval "error message" The description of the error
|
|
455
469
|
*/
|
|
456
470
|
char *mdb_strerror(int err);
|
|
457
471
|
|
|
458
|
-
/** @brief Create an
|
|
472
|
+
/** @brief Create an LMDB environment handle.
|
|
459
473
|
*
|
|
460
474
|
* This function allocates memory for a #MDB_env structure. To release
|
|
461
475
|
* the allocated memory and discard the handle, call #mdb_env_close().
|
|
@@ -488,15 +502,15 @@ int mdb_env_create(MDB_env **env);
|
|
|
488
502
|
* how the operating system has allocated memory to shared libraries and other uses.
|
|
489
503
|
* The feature is highly experimental.
|
|
490
504
|
* <li>#MDB_NOSUBDIR
|
|
491
|
-
* By default,
|
|
505
|
+
* By default, LMDB creates its environment in a directory whose
|
|
492
506
|
* pathname is given in \b path, and creates its data and lock files
|
|
493
507
|
* under that directory. With this option, \b path is used as-is for
|
|
494
508
|
* the database main data file. The database lock file is the \b path
|
|
495
509
|
* with "-lock" appended.
|
|
496
510
|
* <li>#MDB_RDONLY
|
|
497
511
|
* Open the environment in read-only mode. No write operations will be
|
|
498
|
-
* allowed.
|
|
499
|
-
* filesystems, where
|
|
512
|
+
* allowed. LMDB will still modify the lock file - except on read-only
|
|
513
|
+
* filesystems, where LMDB does not use locks.
|
|
500
514
|
* <li>#MDB_WRITEMAP
|
|
501
515
|
* Use a writeable memory map unless MDB_RDONLY is set. This is faster
|
|
502
516
|
* and uses fewer mallocs, but loses protection from application bugs
|
|
@@ -540,7 +554,7 @@ int mdb_env_create(MDB_env **env);
|
|
|
540
554
|
* the user synchronizes its use. Applications that multiplex many
|
|
541
555
|
* user threads over individual OS threads need this option. Such an
|
|
542
556
|
* application must also serialize the write transactions in an OS
|
|
543
|
-
* thread, since
|
|
557
|
+
* thread, since LMDB's write locking is unaware of the user threads.
|
|
544
558
|
* <li>#MDB_NOLOCK
|
|
545
559
|
* Don't do any locking. If concurrent access is anticipated, the
|
|
546
560
|
* caller must manage all concurrency itself. For proper operation
|
|
@@ -579,7 +593,7 @@ int mdb_env_create(MDB_env **env);
|
|
|
579
593
|
* @return A non-zero error value on failure and 0 on success. Some possible
|
|
580
594
|
* errors are:
|
|
581
595
|
* <ul>
|
|
582
|
-
* <li>#MDB_VERSION_MISMATCH - the version of the
|
|
596
|
+
* <li>#MDB_VERSION_MISMATCH - the version of the LMDB library doesn't match the
|
|
583
597
|
* version that created the database environment.
|
|
584
598
|
* <li>#MDB_INVALID - the environment file headers are corrupted.
|
|
585
599
|
* <li>ENOENT - the directory specified by the path parameter doesn't exist.
|
|
@@ -589,7 +603,7 @@ int mdb_env_create(MDB_env **env);
|
|
|
589
603
|
*/
|
|
590
604
|
int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode);
|
|
591
605
|
|
|
592
|
-
/** @brief Copy an
|
|
606
|
+
/** @brief Copy an LMDB environment to the specified path.
|
|
593
607
|
*
|
|
594
608
|
* This function may be used to make a backup of an existing environment.
|
|
595
609
|
* No lockfile is created, since it gets recreated at need.
|
|
@@ -605,7 +619,7 @@ int mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t
|
|
|
605
619
|
*/
|
|
606
620
|
int mdb_env_copy(MDB_env *env, const char *path);
|
|
607
621
|
|
|
608
|
-
/** @brief Copy an
|
|
622
|
+
/** @brief Copy an LMDB environment to the specified file descriptor.
|
|
609
623
|
*
|
|
610
624
|
* This function may be used to make a backup of an existing environment.
|
|
611
625
|
* No lockfile is created, since it gets recreated at need.
|
|
@@ -620,7 +634,50 @@ int mdb_env_copy(MDB_env *env, const char *path);
|
|
|
620
634
|
*/
|
|
621
635
|
int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
|
|
622
636
|
|
|
623
|
-
/** @brief
|
|
637
|
+
/** @brief Copy an LMDB environment to the specified path, with options.
|
|
638
|
+
*
|
|
639
|
+
* This function may be used to make a backup of an existing environment.
|
|
640
|
+
* No lockfile is created, since it gets recreated at need.
|
|
641
|
+
* @note This call can trigger significant file size growth if run in
|
|
642
|
+
* parallel with write transactions, because it employs a read-only
|
|
643
|
+
* transaction. See long-lived transactions under @ref caveats_sec.
|
|
644
|
+
* @param[in] env An environment handle returned by #mdb_env_create(). It
|
|
645
|
+
* must have already been opened successfully.
|
|
646
|
+
* @param[in] path The directory in which the copy will reside. This
|
|
647
|
+
* directory must already exist and be writable but must otherwise be
|
|
648
|
+
* empty.
|
|
649
|
+
* @param[in] flags Special options for this operation. This parameter
|
|
650
|
+
* must be set to 0 or by bitwise OR'ing together one or more of the
|
|
651
|
+
* values described here.
|
|
652
|
+
* <ul>
|
|
653
|
+
* <li>#MDB_CP_COMPACT - Perform compaction while copying: omit free
|
|
654
|
+
* pages and sequentially renumber all pages in output. This option
|
|
655
|
+
* consumes more CPU and runs more slowly than the default.
|
|
656
|
+
* </ul>
|
|
657
|
+
* @return A non-zero error value on failure and 0 on success.
|
|
658
|
+
*/
|
|
659
|
+
int mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags);
|
|
660
|
+
|
|
661
|
+
/** @brief Copy an LMDB environment to the specified file descriptor,
|
|
662
|
+
* with options.
|
|
663
|
+
*
|
|
664
|
+
* This function may be used to make a backup of an existing environment.
|
|
665
|
+
* No lockfile is created, since it gets recreated at need. See
|
|
666
|
+
* #mdb_env_copy2() for further details.
|
|
667
|
+
* @note This call can trigger significant file size growth if run in
|
|
668
|
+
* parallel with write transactions, because it employs a read-only
|
|
669
|
+
* transaction. See long-lived transactions under @ref caveats_sec.
|
|
670
|
+
* @param[in] env An environment handle returned by #mdb_env_create(). It
|
|
671
|
+
* must have already been opened successfully.
|
|
672
|
+
* @param[in] fd The filedescriptor to write the copy to. It must
|
|
673
|
+
* have already been opened for Write access.
|
|
674
|
+
* @param[in] flags Special options for this operation.
|
|
675
|
+
* See #mdb_env_copy2() for options.
|
|
676
|
+
* @return A non-zero error value on failure and 0 on success.
|
|
677
|
+
*/
|
|
678
|
+
int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned int flags);
|
|
679
|
+
|
|
680
|
+
/** @brief Return statistics about the LMDB environment.
|
|
624
681
|
*
|
|
625
682
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
|
626
683
|
* @param[out] stat The address of an #MDB_stat structure
|
|
@@ -628,7 +685,7 @@ int mdb_env_copyfd(MDB_env *env, mdb_filehandle_t fd);
|
|
|
628
685
|
*/
|
|
629
686
|
int mdb_env_stat(MDB_env *env, MDB_stat *stat);
|
|
630
687
|
|
|
631
|
-
/** @brief Return information about the
|
|
688
|
+
/** @brief Return information about the LMDB environment.
|
|
632
689
|
*
|
|
633
690
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
|
634
691
|
* @param[out] stat The address of an #MDB_envinfo structure
|
|
@@ -639,7 +696,7 @@ int mdb_env_info(MDB_env *env, MDB_envinfo *stat);
|
|
|
639
696
|
/** @brief Flush the data buffers to disk.
|
|
640
697
|
*
|
|
641
698
|
* Data is always written to disk when #mdb_txn_commit() is called,
|
|
642
|
-
* but the operating system may keep it buffered.
|
|
699
|
+
* but the operating system may keep it buffered. LMDB always flushes
|
|
643
700
|
* the OS buffers upon commit as well, unless the environment was
|
|
644
701
|
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
|
|
645
702
|
* @param[in] env An environment handle returned by #mdb_env_create()
|
|
@@ -730,7 +787,13 @@ int mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *fd);
|
|
|
730
787
|
* this process. Note that the library does not check for this condition,
|
|
731
788
|
* the caller must ensure it explicitly.
|
|
732
789
|
*
|
|
733
|
-
*
|
|
790
|
+
* The new size takes effect immediately for the current process but
|
|
791
|
+
* will not be persisted to any others until a write transaction has been
|
|
792
|
+
* committed by the current process. Also, only mapsize increases are
|
|
793
|
+
* persisted into the environment.
|
|
794
|
+
*
|
|
795
|
+
* If the mapsize is increased by another process, and data has grown
|
|
796
|
+
* beyond the range of the current mapsize, #mdb_txn_begin() will
|
|
734
797
|
* return #MDB_MAP_RESIZED. This function may be called with a size
|
|
735
798
|
* of zero to adopt the new size.
|
|
736
799
|
*
|
|
@@ -822,7 +885,7 @@ int mdb_env_set_userctx(MDB_env *env, void *ctx);
|
|
|
822
885
|
*/
|
|
823
886
|
void *mdb_env_get_userctx(MDB_env *env);
|
|
824
887
|
|
|
825
|
-
/** @brief A callback function for most
|
|
888
|
+
/** @brief A callback function for most LMDB assert() failures,
|
|
826
889
|
* called before printing the message and aborting.
|
|
827
890
|
*
|
|
828
891
|
* @param[in] env An environment handle returned by #mdb_env_create().
|
|
@@ -1204,7 +1267,7 @@ int mdb_get(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data);
|
|
|
1204
1267
|
* reserved space, which the caller can fill in later - before
|
|
1205
1268
|
* the next update operation or the transaction ends. This saves
|
|
1206
1269
|
* an extra memcpy if the data is being generated later.
|
|
1207
|
-
*
|
|
1270
|
+
* LMDB does nothing else with this memory, the caller is expected
|
|
1208
1271
|
* to modify all of the space requested.
|
|
1209
1272
|
* <li>#MDB_APPEND - append the given key/data pair to the end of the
|
|
1210
1273
|
* database. No key comparisons are performed. This option allows
|
|
@@ -1345,11 +1408,12 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
|
|
|
1345
1408
|
* @param[in] flags Options for this operation. This parameter
|
|
1346
1409
|
* must be set to 0 or one of the values described here.
|
|
1347
1410
|
* <ul>
|
|
1348
|
-
* <li>#MDB_CURRENT -
|
|
1349
|
-
*
|
|
1350
|
-
*
|
|
1351
|
-
*
|
|
1352
|
-
*
|
|
1411
|
+
* <li>#MDB_CURRENT - replace the item at the current cursor position.
|
|
1412
|
+
* The \b key parameter must still be provided, and must match it.
|
|
1413
|
+
* If using sorted duplicates (#MDB_DUPSORT) the data item must still
|
|
1414
|
+
* sort into the same place. This is intended to be used when the
|
|
1415
|
+
* new data is the same size as the old. Otherwise it will simply
|
|
1416
|
+
* perform a delete of the old record followed by an insert.
|
|
1353
1417
|
* <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
|
|
1354
1418
|
* already appear in the database. This flag may only be specified
|
|
1355
1419
|
* if the database was opened with #MDB_DUPSORT. The function will
|
|
@@ -1478,4 +1542,12 @@ int mdb_reader_check(MDB_env *env, int *dead);
|
|
|
1478
1542
|
#ifdef __cplusplus
|
|
1479
1543
|
}
|
|
1480
1544
|
#endif
|
|
1545
|
+
/** @page tools LMDB Command Line Tools
|
|
1546
|
+
The following describes the command line tools that are available for LMDB.
|
|
1547
|
+
\li \ref mdb_copy_1
|
|
1548
|
+
\li \ref mdb_dump_1
|
|
1549
|
+
\li \ref mdb_load_1
|
|
1550
|
+
\li \ref mdb_stat_1
|
|
1551
|
+
*/
|
|
1552
|
+
|
|
1481
1553
|
#endif /* _LMDB_H_ */
|
data/ext/lmdb_ext/liblmdb/mdb.c
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
/** @file mdb.c
|
|
2
|
-
* @brief memory-mapped database library
|
|
2
|
+
* @brief Lightning memory-mapped database library
|
|
3
3
|
*
|
|
4
4
|
* A Btree-based database management library modeled loosely on the
|
|
5
5
|
* BerkeleyDB API, but much simplified.
|
|
6
6
|
*/
|
|
7
7
|
/*
|
|
8
|
-
* Copyright 2011-
|
|
8
|
+
* Copyright 2011-2014 Howard Chu, Symas Corp.
|
|
9
9
|
* All rights reserved.
|
|
10
10
|
*
|
|
11
11
|
* Redistribution and use in source and binary forms, with or without
|
|
@@ -35,15 +35,17 @@
|
|
|
35
35
|
#ifndef _GNU_SOURCE
|
|
36
36
|
#define _GNU_SOURCE 1
|
|
37
37
|
#endif
|
|
38
|
-
#include <sys/types.h>
|
|
39
|
-
#include <sys/stat.h>
|
|
40
38
|
#ifdef _WIN32
|
|
39
|
+
#include <malloc.h>
|
|
41
40
|
#include <windows.h>
|
|
42
41
|
/** getpid() returns int; MinGW defines pid_t but MinGW64 typedefs it
|
|
43
42
|
* as int64 which is wrong. MSVC doesn't define it at all, so just
|
|
44
43
|
* don't use it.
|
|
45
44
|
*/
|
|
46
45
|
#define MDB_PID_T int
|
|
46
|
+
#define MDB_THR_T DWORD
|
|
47
|
+
#include <sys/types.h>
|
|
48
|
+
#include <sys/stat.h>
|
|
47
49
|
#ifdef __GNUC__
|
|
48
50
|
# include <sys/param.h>
|
|
49
51
|
#else
|
|
@@ -55,7 +57,10 @@
|
|
|
55
57
|
# endif
|
|
56
58
|
#endif
|
|
57
59
|
#else
|
|
60
|
+
#include <sys/types.h>
|
|
61
|
+
#include <sys/stat.h>
|
|
58
62
|
#define MDB_PID_T pid_t
|
|
63
|
+
#define MDB_THR_T pthread_t
|
|
59
64
|
#include <sys/param.h>
|
|
60
65
|
#include <sys/uio.h>
|
|
61
66
|
#include <sys/mman.h>
|
|
@@ -65,6 +70,16 @@
|
|
|
65
70
|
#include <fcntl.h>
|
|
66
71
|
#endif
|
|
67
72
|
|
|
73
|
+
#if defined(__mips) && defined(__linux)
|
|
74
|
+
/* MIPS has cache coherency issues, requires explicit cache control */
|
|
75
|
+
#include <asm/cachectl.h>
|
|
76
|
+
extern int cacheflush(char *addr, int nbytes, int cache);
|
|
77
|
+
#define CACHEFLUSH(addr, bytes, cache) cacheflush(addr, bytes, cache)
|
|
78
|
+
#else
|
|
79
|
+
#define CACHEFLUSH(addr, bytes, cache)
|
|
80
|
+
#endif
|
|
81
|
+
|
|
82
|
+
|
|
68
83
|
#include <errno.h>
|
|
69
84
|
#include <limits.h>
|
|
70
85
|
#include <stddef.h>
|
|
@@ -75,6 +90,12 @@
|
|
|
75
90
|
#include <time.h>
|
|
76
91
|
#include <unistd.h>
|
|
77
92
|
|
|
93
|
+
#if defined(__sun)
|
|
94
|
+
/* Most platforms have posix_memalign, older may only have memalign */
|
|
95
|
+
#define HAVE_MEMALIGN 1
|
|
96
|
+
#include <malloc.h>
|
|
97
|
+
#endif
|
|
98
|
+
|
|
78
99
|
#if !(defined(BYTE_ORDER) || defined(__BYTE_ORDER))
|
|
79
100
|
#include <netinet/in.h>
|
|
80
101
|
#include <resolv.h> /* defines BYTE_ORDER on HPUX and Solaris */
|
|
@@ -145,7 +166,18 @@
|
|
|
145
166
|
# error "Two's complement, reasonably sized integer types, please"
|
|
146
167
|
#endif
|
|
147
168
|
|
|
148
|
-
|
|
169
|
+
#ifdef __GNUC__
|
|
170
|
+
/** Put infrequently used env functions in separate section */
|
|
171
|
+
# ifdef __APPLE__
|
|
172
|
+
# define ESECT __attribute__ ((section("__TEXT,text_env")))
|
|
173
|
+
# else
|
|
174
|
+
# define ESECT __attribute__ ((section("text_env")))
|
|
175
|
+
# endif
|
|
176
|
+
#else
|
|
177
|
+
#define ESECT
|
|
178
|
+
#endif
|
|
179
|
+
|
|
180
|
+
/** @defgroup internal LMDB Internals
|
|
149
181
|
* @{
|
|
150
182
|
*/
|
|
151
183
|
/** @defgroup compat Compatibility Macros
|
|
@@ -156,6 +188,11 @@
|
|
|
156
188
|
* @{
|
|
157
189
|
*/
|
|
158
190
|
|
|
191
|
+
/** Features under development */
|
|
192
|
+
#ifndef MDB_DEVEL
|
|
193
|
+
#define MDB_DEVEL 0
|
|
194
|
+
#endif
|
|
195
|
+
|
|
159
196
|
/** Wrapper around __func__, which is a C99 feature */
|
|
160
197
|
#if __STDC_VERSION__ >= 199901L
|
|
161
198
|
# define mdb_func_ __func__
|
|
@@ -169,8 +206,10 @@
|
|
|
169
206
|
#ifdef _WIN32
|
|
170
207
|
#define MDB_USE_HASH 1
|
|
171
208
|
#define MDB_PIDLOCK 0
|
|
172
|
-
#define
|
|
209
|
+
#define THREAD_RET DWORD
|
|
210
|
+
#define pthread_t HANDLE
|
|
173
211
|
#define pthread_mutex_t HANDLE
|
|
212
|
+
#define pthread_cond_t HANDLE
|
|
174
213
|
#define pthread_key_t DWORD
|
|
175
214
|
#define pthread_self() GetCurrentThreadId()
|
|
176
215
|
#define pthread_key_create(x,y) \
|
|
@@ -178,12 +217,16 @@
|
|
|
178
217
|
#define pthread_key_delete(x) TlsFree(x)
|
|
179
218
|
#define pthread_getspecific(x) TlsGetValue(x)
|
|
180
219
|
#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode())
|
|
181
|
-
#define pthread_mutex_unlock(x) ReleaseMutex(x)
|
|
182
|
-
#define pthread_mutex_lock(x) WaitForSingleObject(x, INFINITE)
|
|
183
|
-
#define
|
|
184
|
-
#define
|
|
185
|
-
#define
|
|
186
|
-
#define
|
|
220
|
+
#define pthread_mutex_unlock(x) ReleaseMutex(*x)
|
|
221
|
+
#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE)
|
|
222
|
+
#define pthread_cond_signal(x) SetEvent(*x)
|
|
223
|
+
#define pthread_cond_wait(cond,mutex) do{SignalObjectAndWait(*mutex, *cond, INFINITE, FALSE); WaitForSingleObject(*mutex, INFINITE);}while(0)
|
|
224
|
+
#define THREAD_CREATE(thr,start,arg) thr=CreateThread(NULL,0,start,arg,0,NULL)
|
|
225
|
+
#define THREAD_FINISH(thr) WaitForSingleObject(thr, INFINITE)
|
|
226
|
+
#define LOCK_MUTEX_R(env) pthread_mutex_lock(&(env)->me_rmutex)
|
|
227
|
+
#define UNLOCK_MUTEX_R(env) pthread_mutex_unlock(&(env)->me_rmutex)
|
|
228
|
+
#define LOCK_MUTEX_W(env) pthread_mutex_lock(&(env)->me_wmutex)
|
|
229
|
+
#define UNLOCK_MUTEX_W(env) pthread_mutex_unlock(&(env)->me_wmutex)
|
|
187
230
|
#define getpid() GetCurrentProcessId()
|
|
188
231
|
#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd))
|
|
189
232
|
#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len))
|
|
@@ -198,7 +241,9 @@
|
|
|
198
241
|
#endif
|
|
199
242
|
#define Z "I"
|
|
200
243
|
#else
|
|
201
|
-
|
|
244
|
+
#define THREAD_RET void *
|
|
245
|
+
#define THREAD_CREATE(thr,start,arg) pthread_create(&thr,NULL,start,arg)
|
|
246
|
+
#define THREAD_FINISH(thr) pthread_join(thr,NULL)
|
|
202
247
|
#define Z "z" /**< printf format modifier for size_t */
|
|
203
248
|
|
|
204
249
|
/** For MDB_LOCK_FORMAT: True if readers take a pid lock in the lockfile */
|
|
@@ -352,7 +397,8 @@ static txnid_t mdb_debug_start;
|
|
|
352
397
|
|
|
353
398
|
/** @brief The maximum size of a database page.
|
|
354
399
|
*
|
|
355
|
-
*
|
|
400
|
+
* It is 32k or 64k, since value-PAGEBASE must fit in
|
|
401
|
+
* #MDB_page.%mp_upper.
|
|
356
402
|
*
|
|
357
403
|
* LMDB will use database pages < OS pages if needed.
|
|
358
404
|
* That causes more I/O in write transactions: The OS must
|
|
@@ -365,7 +411,7 @@ static txnid_t mdb_debug_start;
|
|
|
365
411
|
* pressure from other processes is high. So until OSs have
|
|
366
412
|
* actual paging support for Huge pages, they're not viable.
|
|
367
413
|
*/
|
|
368
|
-
#define MAX_PAGESIZE 0x8000
|
|
414
|
+
#define MAX_PAGESIZE (PAGEBASE ? 0x10000 : 0x8000)
|
|
369
415
|
|
|
370
416
|
/** The minimum number of keys required in a database page.
|
|
371
417
|
* Setting this to a larger value will place a smaller bound on the
|
|
@@ -381,14 +427,14 @@ static txnid_t mdb_debug_start;
|
|
|
381
427
|
*/
|
|
382
428
|
#define MDB_MINKEYS 2
|
|
383
429
|
|
|
384
|
-
/** A stamp that identifies a file as an
|
|
430
|
+
/** A stamp that identifies a file as an LMDB file.
|
|
385
431
|
* There's nothing special about this value other than that it is easily
|
|
386
432
|
* recognizable, and it will reflect any byte order mismatches.
|
|
387
433
|
*/
|
|
388
434
|
#define MDB_MAGIC 0xBEEFC0DE
|
|
389
435
|
|
|
390
436
|
/** The version number for a database's datafile format. */
|
|
391
|
-
#define MDB_DATA_VERSION 1
|
|
437
|
+
#define MDB_DATA_VERSION ((MDB_DEVEL) ? 999 : 1)
|
|
392
438
|
/** The version number for a database's lockfile format. */
|
|
393
439
|
#define MDB_LOCK_VERSION 1
|
|
394
440
|
|
|
@@ -397,13 +443,14 @@ static txnid_t mdb_debug_start;
|
|
|
397
443
|
* Define this as 0 to compute the max from the page size. 511
|
|
398
444
|
* is default for backwards compat: liblmdb <= 0.9.10 can break
|
|
399
445
|
* when modifying a DB with keys/dupsort data bigger than its max.
|
|
446
|
+
* #MDB_DEVEL sets the default to 0.
|
|
400
447
|
*
|
|
401
448
|
* Data items in an #MDB_DUPSORT database are also limited to
|
|
402
449
|
* this size, since they're actually keys of a sub-DB. Keys and
|
|
403
450
|
* #MDB_DUPSORT data items must fit on a node in a regular page.
|
|
404
451
|
*/
|
|
405
452
|
#ifndef MDB_MAXKEYSIZE
|
|
406
|
-
#define MDB_MAXKEYSIZE 511
|
|
453
|
+
#define MDB_MAXKEYSIZE ((MDB_DEVEL) ? 0 : 511)
|
|
407
454
|
#endif
|
|
408
455
|
|
|
409
456
|
/** The maximum size of a key we can write to the environment. */
|
|
@@ -537,7 +584,7 @@ typedef struct MDB_rxbody {
|
|
|
537
584
|
/** The process ID of the process owning this reader txn. */
|
|
538
585
|
MDB_PID_T mrb_pid;
|
|
539
586
|
/** The thread ID of the thread owning this txn. */
|
|
540
|
-
|
|
587
|
+
MDB_THR_T mrb_tid;
|
|
541
588
|
} MDB_rxbody;
|
|
542
589
|
|
|
543
590
|
/** The actual reader record, with cacheline padding. */
|
|
@@ -568,7 +615,7 @@ typedef struct MDB_reader {
|
|
|
568
615
|
* unlikely. If a collision occurs, the results are unpredictable.
|
|
569
616
|
*/
|
|
570
617
|
typedef struct MDB_txbody {
|
|
571
|
-
/** Stamp identifying this as an
|
|
618
|
+
/** Stamp identifying this as an LMDB file. It must be set
|
|
572
619
|
* to #MDB_MAGIC. */
|
|
573
620
|
uint32_t mtb_magic;
|
|
574
621
|
/** Format of this lock file. Must be set to #MDB_LOCK_FORMAT. */
|
|
@@ -635,7 +682,7 @@ typedef struct MDB_page {
|
|
|
635
682
|
#define mp_next mp_p.p_next
|
|
636
683
|
union {
|
|
637
684
|
pgno_t p_pgno; /**< page number */
|
|
638
|
-
|
|
685
|
+
struct MDB_page *p_next; /**< for in-memory list of freed pages */
|
|
639
686
|
} mp_p;
|
|
640
687
|
uint16_t mp_pad;
|
|
641
688
|
/** @defgroup mdb_page Page Flags
|
|
@@ -650,6 +697,7 @@ typedef struct MDB_page {
|
|
|
650
697
|
#define P_DIRTY 0x10 /**< dirty page, also set for #P_SUBP pages */
|
|
651
698
|
#define P_LEAF2 0x20 /**< for #MDB_DUPFIXED records */
|
|
652
699
|
#define P_SUBP 0x40 /**< for #MDB_DUPSORT sub-pages */
|
|
700
|
+
#define P_LOOSE 0x4000 /**< page was dirtied then freed, can be reused */
|
|
653
701
|
#define P_KEEP 0x8000 /**< leave this page alone during spill */
|
|
654
702
|
/** @} */
|
|
655
703
|
uint16_t mp_flags; /**< @ref mdb_page */
|
|
@@ -672,8 +720,11 @@ typedef struct MDB_page {
|
|
|
672
720
|
/** Address of first usable data byte in a page, after the header */
|
|
673
721
|
#define METADATA(p) ((void *)((char *)(p) + PAGEHDRSZ))
|
|
674
722
|
|
|
723
|
+
/** ITS#7713, change PAGEBASE to handle 65536 byte pages */
|
|
724
|
+
#define PAGEBASE ((MDB_DEVEL) ? PAGEHDRSZ : 0)
|
|
725
|
+
|
|
675
726
|
/** Number of nodes on a page */
|
|
676
|
-
#define NUMKEYS(p) (((p)->mp_lower - PAGEHDRSZ) >> 1)
|
|
727
|
+
#define NUMKEYS(p) (((p)->mp_lower - (PAGEHDRSZ-PAGEBASE)) >> 1)
|
|
677
728
|
|
|
678
729
|
/** The amount of space remaining in the page */
|
|
679
730
|
#define SIZELEFT(p) (indx_t)((p)->mp_upper - (p)->mp_lower)
|
|
@@ -700,6 +751,9 @@ typedef struct MDB_page {
|
|
|
700
751
|
/** The number of overflow pages needed to store the given size. */
|
|
701
752
|
#define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
|
|
702
753
|
|
|
754
|
+
/** Link in #MDB_txn.%mt_loose_pgs list */
|
|
755
|
+
#define NEXT_LOOSE_PAGE(p) (*(MDB_page **)((p) + 2))
|
|
756
|
+
|
|
703
757
|
/** Header for a single key/data pair within a page.
|
|
704
758
|
* Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2.
|
|
705
759
|
* We guarantee 2-byte alignment for 'MDB_node's.
|
|
@@ -751,7 +805,7 @@ typedef struct MDB_node {
|
|
|
751
805
|
#define LEAFSIZE(k, d) (NODESIZE + (k)->mv_size + (d)->mv_size)
|
|
752
806
|
|
|
753
807
|
/** Address of node \b i in page \b p */
|
|
754
|
-
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i]))
|
|
808
|
+
#define NODEPTR(p, i) ((MDB_node *)((char *)(p) + (p)->mp_ptrs[i] + PAGEBASE))
|
|
755
809
|
|
|
756
810
|
/** Address of the key for the node */
|
|
757
811
|
#define NODEKEY(node) (void *)((node)->mn_data)
|
|
@@ -841,7 +895,7 @@ typedef struct MDB_db {
|
|
|
841
895
|
* Pages 0-1 are meta pages. Transaction N writes meta page #(N % 2).
|
|
842
896
|
*/
|
|
843
897
|
typedef struct MDB_meta {
|
|
844
|
-
/** Stamp identifying this as an
|
|
898
|
+
/** Stamp identifying this as an LMDB file. It must be set
|
|
845
899
|
* to #MDB_MAGIC. */
|
|
846
900
|
uint32_t mm_magic;
|
|
847
901
|
/** Version number of this lock file. Must be set to #MDB_DATA_VERSION. */
|
|
@@ -898,6 +952,12 @@ struct MDB_txn {
|
|
|
898
952
|
/** The list of pages that became unused during this transaction.
|
|
899
953
|
*/
|
|
900
954
|
MDB_IDL mt_free_pgs;
|
|
955
|
+
/** The list of loose pages that became unused and may be reused
|
|
956
|
+
* in this transaction, linked through #NEXT_LOOSE_PAGE(page).
|
|
957
|
+
*/
|
|
958
|
+
MDB_page *mt_loose_pgs;
|
|
959
|
+
/* #Number of loose pages (#mt_loose_pgs) */
|
|
960
|
+
int mt_loose_count;
|
|
901
961
|
/** The sorted list of dirty pages we temporarily wrote to disk
|
|
902
962
|
* because the dirty list was full. page numbers in here are
|
|
903
963
|
* shifted left by 1, deleted slots have the LSB set.
|
|
@@ -913,6 +973,8 @@ struct MDB_txn {
|
|
|
913
973
|
MDB_dbx *mt_dbxs;
|
|
914
974
|
/** Array of MDB_db records for each known DB */
|
|
915
975
|
MDB_db *mt_dbs;
|
|
976
|
+
/** Array of sequence numbers for each DB handle */
|
|
977
|
+
unsigned int *mt_dbiseqs;
|
|
916
978
|
/** @defgroup mt_dbflag Transaction DB Flags
|
|
917
979
|
* @ingroup internal
|
|
918
980
|
* @{
|
|
@@ -1048,12 +1110,15 @@ struct MDB_env {
|
|
|
1048
1110
|
MDB_meta *me_metas[2]; /**< pointers to the two meta pages */
|
|
1049
1111
|
void *me_pbuf; /**< scratch area for DUPSORT put() */
|
|
1050
1112
|
MDB_txn *me_txn; /**< current write transaction */
|
|
1113
|
+
MDB_txn *me_txn0; /**< prealloc'd write transaction */
|
|
1051
1114
|
size_t me_mapsize; /**< size of the data memory map */
|
|
1052
1115
|
off_t me_size; /**< current file size */
|
|
1053
1116
|
pgno_t me_maxpg; /**< me_mapsize / me_psize */
|
|
1054
1117
|
MDB_dbx *me_dbxs; /**< array of static DB info */
|
|
1055
1118
|
uint16_t *me_dbflags; /**< array of flags from MDB_db.md_flags */
|
|
1119
|
+
unsigned int *me_dbiseqs; /**< array of dbi sequence numbers */
|
|
1056
1120
|
pthread_key_t me_txkey; /**< thread-key for readers */
|
|
1121
|
+
txnid_t me_pgoldest; /**< ID of oldest reader last time we looked */
|
|
1057
1122
|
MDB_pgstate me_pgstate; /**< state of old pages from freeDB */
|
|
1058
1123
|
# define me_pglast me_pgstate.mf_pglast
|
|
1059
1124
|
# define me_pghead me_pgstate.mf_pghead
|
|
@@ -1102,6 +1167,10 @@ typedef struct MDB_ntxn {
|
|
|
1102
1167
|
#define TXN_DBI_EXIST(txn, dbi) \
|
|
1103
1168
|
((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID))
|
|
1104
1169
|
|
|
1170
|
+
/** Check for misused \b dbi handles */
|
|
1171
|
+
#define TXN_DBI_CHANGED(txn, dbi) \
|
|
1172
|
+
((txn)->mt_dbiseqs[dbi] != (txn)->mt_env->me_dbiseqs[dbi])
|
|
1173
|
+
|
|
1105
1174
|
static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
|
|
1106
1175
|
static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
|
|
1107
1176
|
static int mdb_page_touch(MDB_cursor *mc);
|
|
@@ -1182,7 +1251,7 @@ mdb_version(int *major, int *minor, int *patch)
|
|
|
1182
1251
|
return MDB_VERSION_STRING;
|
|
1183
1252
|
}
|
|
1184
1253
|
|
|
1185
|
-
/** Table of descriptions for
|
|
1254
|
+
/** Table of descriptions for LMDB @ref errors */
|
|
1186
1255
|
static char *const mdb_errstr[] = {
|
|
1187
1256
|
"MDB_KEYEXIST: Key/data pair already exists",
|
|
1188
1257
|
"MDB_NOTFOUND: No matching key/data pair found",
|
|
@@ -1190,7 +1259,7 @@ static char *const mdb_errstr[] = {
|
|
|
1190
1259
|
"MDB_CORRUPTED: Located page was wrong type",
|
|
1191
1260
|
"MDB_PANIC: Update of meta page failed",
|
|
1192
1261
|
"MDB_VERSION_MISMATCH: Database environment version mismatch",
|
|
1193
|
-
"MDB_INVALID: File is not an
|
|
1262
|
+
"MDB_INVALID: File is not an LMDB file",
|
|
1194
1263
|
"MDB_MAP_FULL: Environment mapsize limit reached",
|
|
1195
1264
|
"MDB_DBS_FULL: Environment maxdbs limit reached",
|
|
1196
1265
|
"MDB_READERS_FULL: Environment maxreaders limit reached",
|
|
@@ -1203,11 +1272,20 @@ static char *const mdb_errstr[] = {
|
|
|
1203
1272
|
"MDB_BAD_RSLOT: Invalid reuse of reader locktable slot",
|
|
1204
1273
|
"MDB_BAD_TXN: Transaction cannot recover - it must be aborted",
|
|
1205
1274
|
"MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
|
|
1275
|
+
"MDB_BAD_DBI: The specified DBI handle was closed/changed unexpectedly",
|
|
1206
1276
|
};
|
|
1207
1277
|
|
|
1208
1278
|
char *
|
|
1209
1279
|
mdb_strerror(int err)
|
|
1210
1280
|
{
|
|
1281
|
+
#ifdef _WIN32
|
|
1282
|
+
/** HACK: pad 4KB on stack over the buf. Return system msgs in buf.
|
|
1283
|
+
* This works as long as no function between the call to mdb_strerror
|
|
1284
|
+
* and the actual use of the message uses more than 4K of stack.
|
|
1285
|
+
*/
|
|
1286
|
+
char pad[4096];
|
|
1287
|
+
char buf[1024], *ptr = buf;
|
|
1288
|
+
#endif
|
|
1211
1289
|
int i;
|
|
1212
1290
|
if (!err)
|
|
1213
1291
|
return ("Successful return: 0");
|
|
@@ -1217,7 +1295,32 @@ mdb_strerror(int err)
|
|
|
1217
1295
|
return mdb_errstr[i];
|
|
1218
1296
|
}
|
|
1219
1297
|
|
|
1298
|
+
#ifdef _WIN32
|
|
1299
|
+
/* These are the C-runtime error codes we use. The comment indicates
|
|
1300
|
+
* their numeric value, and the Win32 error they would correspond to
|
|
1301
|
+
* if the error actually came from a Win32 API. A major mess, we should
|
|
1302
|
+
* have used LMDB-specific error codes for everything.
|
|
1303
|
+
*/
|
|
1304
|
+
switch(err) {
|
|
1305
|
+
case ENOENT: /* 2, FILE_NOT_FOUND */
|
|
1306
|
+
case EIO: /* 5, ACCESS_DENIED */
|
|
1307
|
+
case ENOMEM: /* 12, INVALID_ACCESS */
|
|
1308
|
+
case EACCES: /* 13, INVALID_DATA */
|
|
1309
|
+
case EBUSY: /* 16, CURRENT_DIRECTORY */
|
|
1310
|
+
case EINVAL: /* 22, BAD_COMMAND */
|
|
1311
|
+
case ENOSPC: /* 28, OUT_OF_PAPER */
|
|
1312
|
+
return strerror(err);
|
|
1313
|
+
default:
|
|
1314
|
+
;
|
|
1315
|
+
}
|
|
1316
|
+
buf[0] = 0;
|
|
1317
|
+
FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM |
|
|
1318
|
+
FORMAT_MESSAGE_IGNORE_INSERTS,
|
|
1319
|
+
NULL, err, 0, ptr, sizeof(buf), pad);
|
|
1320
|
+
return ptr;
|
|
1321
|
+
#else
|
|
1220
1322
|
return strerror(err);
|
|
1323
|
+
#endif
|
|
1221
1324
|
}
|
|
1222
1325
|
|
|
1223
1326
|
/** assert(3) variant in cursor context */
|
|
@@ -1357,7 +1460,7 @@ mdb_page_list(MDB_page *mp)
|
|
|
1357
1460
|
total = EVEN(total);
|
|
1358
1461
|
}
|
|
1359
1462
|
fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
|
|
1360
|
-
IS_LEAF2(mp) ? PAGEHDRSZ : mp->mp_lower, total, SIZELEFT(mp));
|
|
1463
|
+
IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower, total, SIZELEFT(mp));
|
|
1361
1464
|
}
|
|
1362
1465
|
|
|
1363
1466
|
void
|
|
@@ -1485,7 +1588,6 @@ mdb_page_malloc(MDB_txn *txn, unsigned num)
|
|
|
1485
1588
|
}
|
|
1486
1589
|
return ret;
|
|
1487
1590
|
}
|
|
1488
|
-
|
|
1489
1591
|
/** Free a single page.
|
|
1490
1592
|
* Saves single pages to a list, for future reuse.
|
|
1491
1593
|
* (This is not used for multi-page overflow pages.)
|
|
@@ -1525,6 +1627,62 @@ mdb_dlist_free(MDB_txn *txn)
|
|
|
1525
1627
|
dl[0].mid = 0;
|
|
1526
1628
|
}
|
|
1527
1629
|
|
|
1630
|
+
/** Loosen or free a single page.
|
|
1631
|
+
* Saves single pages to a list for future reuse
|
|
1632
|
+
* in this same txn. It has been pulled from the freeDB
|
|
1633
|
+
* and already resides on the dirty list, but has been
|
|
1634
|
+
* deleted. Use these pages first before pulling again
|
|
1635
|
+
* from the freeDB.
|
|
1636
|
+
*
|
|
1637
|
+
* If the page wasn't dirtied in this txn, just add it
|
|
1638
|
+
* to this txn's free list.
|
|
1639
|
+
*/
|
|
1640
|
+
static int
|
|
1641
|
+
mdb_page_loose(MDB_cursor *mc, MDB_page *mp)
|
|
1642
|
+
{
|
|
1643
|
+
int loose = 0;
|
|
1644
|
+
pgno_t pgno = mp->mp_pgno;
|
|
1645
|
+
MDB_txn *txn = mc->mc_txn;
|
|
1646
|
+
|
|
1647
|
+
if ((mp->mp_flags & P_DIRTY) && mc->mc_dbi != FREE_DBI) {
|
|
1648
|
+
if (txn->mt_parent) {
|
|
1649
|
+
MDB_ID2 *dl = txn->mt_u.dirty_list;
|
|
1650
|
+
/* If txn has a parent, make sure the page is in our
|
|
1651
|
+
* dirty list.
|
|
1652
|
+
*/
|
|
1653
|
+
if (dl[0].mid) {
|
|
1654
|
+
unsigned x = mdb_mid2l_search(dl, pgno);
|
|
1655
|
+
if (x <= dl[0].mid && dl[x].mid == pgno) {
|
|
1656
|
+
if (mp != dl[x].mptr) { /* bad cursor? */
|
|
1657
|
+
mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
|
|
1658
|
+
txn->mt_flags |= MDB_TXN_ERROR;
|
|
1659
|
+
return MDB_CORRUPTED;
|
|
1660
|
+
}
|
|
1661
|
+
/* ok, it's ours */
|
|
1662
|
+
loose = 1;
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
} else {
|
|
1666
|
+
/* no parent txn, so it's just ours */
|
|
1667
|
+
loose = 1;
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
if (loose) {
|
|
1671
|
+
DPRINTF(("loosen db %d page %"Z"u", DDBI(mc),
|
|
1672
|
+
mp->mp_pgno));
|
|
1673
|
+
NEXT_LOOSE_PAGE(mp) = txn->mt_loose_pgs;
|
|
1674
|
+
txn->mt_loose_pgs = mp;
|
|
1675
|
+
txn->mt_loose_count++;
|
|
1676
|
+
mp->mp_flags |= P_LOOSE;
|
|
1677
|
+
} else {
|
|
1678
|
+
int rc = mdb_midl_append(&txn->mt_free_pgs, pgno);
|
|
1679
|
+
if (rc)
|
|
1680
|
+
return rc;
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
return MDB_SUCCESS;
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1528
1686
|
/** Set or clear P_KEEP in dirty, non-overflow, non-sub pages watched by txn.
|
|
1529
1687
|
* @param[in] mc A cursor handle for the current operation.
|
|
1530
1688
|
* @param[in] pflags Flags of the pages to update:
|
|
@@ -1535,7 +1693,7 @@ mdb_dlist_free(MDB_txn *txn)
|
|
|
1535
1693
|
static int
|
|
1536
1694
|
mdb_pages_xkeep(MDB_cursor *mc, unsigned pflags, int all)
|
|
1537
1695
|
{
|
|
1538
|
-
enum { Mask = P_SUBP|P_DIRTY|P_KEEP };
|
|
1696
|
+
enum { Mask = P_SUBP|P_DIRTY|P_LOOSE|P_KEEP };
|
|
1539
1697
|
MDB_txn *txn = mc->mc_txn;
|
|
1540
1698
|
MDB_cursor *m3;
|
|
1541
1699
|
MDB_xcursor *mx;
|
|
@@ -1686,7 +1844,7 @@ mdb_page_spill(MDB_cursor *m0, MDB_val *key, MDB_val *data)
|
|
|
1686
1844
|
for (i=dl[0].mid; i && need; i--) {
|
|
1687
1845
|
MDB_ID pn = dl[i].mid << 1;
|
|
1688
1846
|
dp = dl[i].mptr;
|
|
1689
|
-
if (dp->mp_flags & P_KEEP)
|
|
1847
|
+
if (dp->mp_flags & (P_LOOSE|P_KEEP))
|
|
1690
1848
|
continue;
|
|
1691
1849
|
/* Can't spill twice, make sure it's not already in a parent's
|
|
1692
1850
|
* spill list.
|
|
@@ -1790,15 +1948,27 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
1790
1948
|
#else
|
|
1791
1949
|
enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
|
|
1792
1950
|
#endif
|
|
1793
|
-
int rc, retry =
|
|
1951
|
+
int rc, retry = num * 60;
|
|
1794
1952
|
MDB_txn *txn = mc->mc_txn;
|
|
1795
1953
|
MDB_env *env = txn->mt_env;
|
|
1796
1954
|
pgno_t pgno, *mop = env->me_pghead;
|
|
1797
|
-
unsigned i, j,
|
|
1955
|
+
unsigned i, j, mop_len = mop ? mop[0] : 0, n2 = num-1;
|
|
1798
1956
|
MDB_page *np;
|
|
1799
1957
|
txnid_t oldest = 0, last;
|
|
1800
1958
|
MDB_cursor_op op;
|
|
1801
1959
|
MDB_cursor m2;
|
|
1960
|
+
int found_old = 0;
|
|
1961
|
+
|
|
1962
|
+
/* If there are any loose pages, just use them */
|
|
1963
|
+
if (num == 1 && txn->mt_loose_pgs) {
|
|
1964
|
+
np = txn->mt_loose_pgs;
|
|
1965
|
+
txn->mt_loose_pgs = NEXT_LOOSE_PAGE(np);
|
|
1966
|
+
txn->mt_loose_count--;
|
|
1967
|
+
DPRINTF(("db %d use loose page %"Z"u", DDBI(mc),
|
|
1968
|
+
np->mp_pgno));
|
|
1969
|
+
*mp = np;
|
|
1970
|
+
return MDB_SUCCESS;
|
|
1971
|
+
}
|
|
1802
1972
|
|
|
1803
1973
|
*mp = NULL;
|
|
1804
1974
|
|
|
@@ -1811,7 +1981,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
1811
1981
|
for (op = MDB_FIRST;; op = MDB_NEXT) {
|
|
1812
1982
|
MDB_val key, data;
|
|
1813
1983
|
MDB_node *leaf;
|
|
1814
|
-
pgno_t *idl
|
|
1984
|
+
pgno_t *idl;
|
|
1815
1985
|
|
|
1816
1986
|
/* Seek a big enough contiguous page range. Prefer
|
|
1817
1987
|
* pages at the tail, just truncating the list.
|
|
@@ -1823,14 +1993,14 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
1823
1993
|
if (mop[i-n2] == pgno+n2)
|
|
1824
1994
|
goto search_done;
|
|
1825
1995
|
} while (--i > n2);
|
|
1826
|
-
if (
|
|
1996
|
+
if (--retry < 0)
|
|
1827
1997
|
break;
|
|
1828
1998
|
}
|
|
1829
1999
|
|
|
1830
2000
|
if (op == MDB_FIRST) { /* 1st iteration */
|
|
1831
2001
|
/* Prepare to fetch more and coalesce */
|
|
1832
|
-
oldest = mdb_find_oldest(txn);
|
|
1833
2002
|
last = env->me_pglast;
|
|
2003
|
+
oldest = env->me_pgoldest;
|
|
1834
2004
|
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
|
|
1835
2005
|
if (last) {
|
|
1836
2006
|
op = MDB_SET_RANGE;
|
|
@@ -1845,8 +2015,15 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
1845
2015
|
|
|
1846
2016
|
last++;
|
|
1847
2017
|
/* Do not fetch more if the record will be too recent */
|
|
1848
|
-
if (oldest <= last)
|
|
1849
|
-
|
|
2018
|
+
if (oldest <= last) {
|
|
2019
|
+
if (!found_old) {
|
|
2020
|
+
oldest = mdb_find_oldest(txn);
|
|
2021
|
+
env->me_pgoldest = oldest;
|
|
2022
|
+
found_old = 1;
|
|
2023
|
+
}
|
|
2024
|
+
if (oldest <= last)
|
|
2025
|
+
break;
|
|
2026
|
+
}
|
|
1850
2027
|
rc = mdb_cursor_get(&m2, &key, NULL, op);
|
|
1851
2028
|
if (rc) {
|
|
1852
2029
|
if (rc == MDB_NOTFOUND)
|
|
@@ -1854,8 +2031,15 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
1854
2031
|
goto fail;
|
|
1855
2032
|
}
|
|
1856
2033
|
last = *(txnid_t*)key.mv_data;
|
|
1857
|
-
if (oldest <= last)
|
|
1858
|
-
|
|
2034
|
+
if (oldest <= last) {
|
|
2035
|
+
if (!found_old) {
|
|
2036
|
+
oldest = mdb_find_oldest(txn);
|
|
2037
|
+
env->me_pgoldest = oldest;
|
|
2038
|
+
found_old = 1;
|
|
2039
|
+
}
|
|
2040
|
+
if (oldest <= last)
|
|
2041
|
+
break;
|
|
2042
|
+
}
|
|
1859
2043
|
np = m2.mc_pg[m2.mc_top];
|
|
1860
2044
|
leaf = NODEPTR(np, m2.mc_ki[m2.mc_top]);
|
|
1861
2045
|
if ((rc = mdb_node_read(txn, leaf, &data)) != MDB_SUCCESS)
|
|
@@ -1877,21 +2061,12 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
|
|
|
1877
2061
|
#if (MDB_DEBUG) > 1
|
|
1878
2062
|
DPRINTF(("IDL read txn %"Z"u root %"Z"u num %u",
|
|
1879
2063
|
last, txn->mt_dbs[FREE_DBI].md_root, i));
|
|
1880
|
-
for (
|
|
1881
|
-
DPRINTF(("IDL %"Z"u", idl[
|
|
2064
|
+
for (j = i; j; j--)
|
|
2065
|
+
DPRINTF(("IDL %"Z"u", idl[j]));
|
|
1882
2066
|
#endif
|
|
1883
2067
|
/* Merge in descending sorted order */
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
mop[0] = (pgno_t)-1;
|
|
1887
|
-
old_id = mop[j];
|
|
1888
|
-
while (i) {
|
|
1889
|
-
new_id = idl[i--];
|
|
1890
|
-
for (; old_id < new_id; old_id = mop[--j])
|
|
1891
|
-
mop[k--] = old_id;
|
|
1892
|
-
mop[k--] = new_id;
|
|
1893
|
-
}
|
|
1894
|
-
mop[0] = mop_len;
|
|
2068
|
+
mdb_midl_xmerge(mop, idl);
|
|
2069
|
+
mop_len = mop[0];
|
|
1895
2070
|
}
|
|
1896
2071
|
|
|
1897
2072
|
/* Use new pages from the map when nothing suitable in the freeDB */
|
|
@@ -1946,8 +2121,8 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
|
|
|
1946
2121
|
* alignment so memcpy may copy words instead of bytes.
|
|
1947
2122
|
*/
|
|
1948
2123
|
if ((unused &= -Align) && !IS_LEAF2(src)) {
|
|
1949
|
-
upper
|
|
1950
|
-
memcpy(dst, src, (lower + (Align-1)) & -Align);
|
|
2124
|
+
upper = (upper + PAGEBASE) & -Align;
|
|
2125
|
+
memcpy(dst, src, (lower + PAGEBASE + (Align-1)) & -Align);
|
|
1951
2126
|
memcpy((pgno_t *)((char *)dst+upper), (pgno_t *)((char *)src+upper),
|
|
1952
2127
|
psize - upper);
|
|
1953
2128
|
} else {
|
|
@@ -2314,7 +2489,7 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
|
2314
2489
|
return MDB_BAD_RSLOT;
|
|
2315
2490
|
} else {
|
|
2316
2491
|
MDB_PID_T pid = env->me_pid;
|
|
2317
|
-
|
|
2492
|
+
MDB_THR_T tid = pthread_self();
|
|
2318
2493
|
|
|
2319
2494
|
if (!env->me_live_reader) {
|
|
2320
2495
|
rc = mdb_reader_pid(env, Pidset, pid);
|
|
@@ -2373,6 +2548,7 @@ mdb_txn_renew0(MDB_txn *txn)
|
|
|
2373
2548
|
txn->mt_free_pgs[0] = 0;
|
|
2374
2549
|
txn->mt_spill_pgs = NULL;
|
|
2375
2550
|
env->me_txn = txn;
|
|
2551
|
+
memcpy(txn->mt_dbiseqs, env->me_dbiseqs, env->me_maxdbs * sizeof(unsigned int));
|
|
2376
2552
|
}
|
|
2377
2553
|
|
|
2378
2554
|
/* Copy the DB info and flags */
|
|
@@ -2447,23 +2623,39 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
|
|
|
2447
2623
|
tsize = sizeof(MDB_ntxn);
|
|
2448
2624
|
}
|
|
2449
2625
|
size = tsize + env->me_maxdbs * (sizeof(MDB_db)+1);
|
|
2450
|
-
if (!(flags & MDB_RDONLY))
|
|
2626
|
+
if (!(flags & MDB_RDONLY)) {
|
|
2627
|
+
if (!parent) {
|
|
2628
|
+
txn = env->me_txn0;
|
|
2629
|
+
goto ok;
|
|
2630
|
+
}
|
|
2451
2631
|
size += env->me_maxdbs * sizeof(MDB_cursor *);
|
|
2632
|
+
/* child txns use parent's dbiseqs */
|
|
2633
|
+
if (!parent)
|
|
2634
|
+
size += env->me_maxdbs * sizeof(unsigned int);
|
|
2635
|
+
}
|
|
2452
2636
|
|
|
2453
2637
|
if ((txn = calloc(1, size)) == NULL) {
|
|
2454
|
-
DPRINTF(("calloc: %s", strerror(
|
|
2638
|
+
DPRINTF(("calloc: %s", strerror(errno)));
|
|
2455
2639
|
return ENOMEM;
|
|
2456
2640
|
}
|
|
2457
2641
|
txn->mt_dbs = (MDB_db *) ((char *)txn + tsize);
|
|
2458
2642
|
if (flags & MDB_RDONLY) {
|
|
2459
2643
|
txn->mt_flags |= MDB_TXN_RDONLY;
|
|
2460
2644
|
txn->mt_dbflags = (unsigned char *)(txn->mt_dbs + env->me_maxdbs);
|
|
2645
|
+
txn->mt_dbiseqs = env->me_dbiseqs;
|
|
2461
2646
|
} else {
|
|
2462
2647
|
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
|
|
2463
|
-
|
|
2648
|
+
if (parent) {
|
|
2649
|
+
txn->mt_dbiseqs = parent->mt_dbiseqs;
|
|
2650
|
+
txn->mt_dbflags = (unsigned char *)(txn->mt_cursors + env->me_maxdbs);
|
|
2651
|
+
} else {
|
|
2652
|
+
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
|
2653
|
+
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
|
2654
|
+
}
|
|
2464
2655
|
}
|
|
2465
2656
|
txn->mt_env = env;
|
|
2466
2657
|
|
|
2658
|
+
ok:
|
|
2467
2659
|
if (parent) {
|
|
2468
2660
|
unsigned int i;
|
|
2469
2661
|
txn->mt_u.dirty_list = malloc(sizeof(MDB_ID2)*MDB_IDL_UM_SIZE);
|
|
@@ -2506,9 +2698,10 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned int flags, MDB_txn **ret)
|
|
|
2506
2698
|
} else {
|
|
2507
2699
|
rc = mdb_txn_renew0(txn);
|
|
2508
2700
|
}
|
|
2509
|
-
if (rc)
|
|
2510
|
-
|
|
2511
|
-
|
|
2701
|
+
if (rc) {
|
|
2702
|
+
if (txn != env->me_txn0)
|
|
2703
|
+
free(txn);
|
|
2704
|
+
} else {
|
|
2512
2705
|
*ret = txn;
|
|
2513
2706
|
DPRINTF(("begin txn %"Z"u%c %p on mdbenv %p, root page %"Z"u",
|
|
2514
2707
|
txn->mt_txnid, (txn->mt_flags & MDB_TXN_RDONLY) ? 'r' : 'w',
|
|
@@ -2540,10 +2733,13 @@ mdb_dbis_update(MDB_txn *txn, int keep)
|
|
|
2540
2733
|
env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
|
|
2541
2734
|
} else {
|
|
2542
2735
|
char *ptr = env->me_dbxs[i].md_name.mv_data;
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2736
|
+
if (ptr) {
|
|
2737
|
+
env->me_dbxs[i].md_name.mv_data = NULL;
|
|
2738
|
+
env->me_dbxs[i].md_name.mv_size = 0;
|
|
2739
|
+
env->me_dbflags[i] = 0;
|
|
2740
|
+
env->me_dbiseqs[i]++;
|
|
2741
|
+
free(ptr);
|
|
2742
|
+
}
|
|
2547
2743
|
}
|
|
2548
2744
|
}
|
|
2549
2745
|
}
|
|
@@ -2632,7 +2828,8 @@ mdb_txn_abort(MDB_txn *txn)
|
|
|
2632
2828
|
if ((txn->mt_flags & MDB_TXN_RDONLY) && txn->mt_u.reader)
|
|
2633
2829
|
txn->mt_u.reader->mr_pid = 0;
|
|
2634
2830
|
|
|
2635
|
-
|
|
2831
|
+
if (txn != txn->mt_env->me_txn0)
|
|
2832
|
+
free(txn);
|
|
2636
2833
|
}
|
|
2637
2834
|
|
|
2638
2835
|
/** Save the freelist as of this transaction to the freeDB.
|
|
@@ -2661,6 +2858,19 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
2661
2858
|
return rc;
|
|
2662
2859
|
}
|
|
2663
2860
|
|
|
2861
|
+
if (!env->me_pghead && txn->mt_loose_pgs) {
|
|
2862
|
+
/* Put loose page numbers in mt_free_pgs, since
|
|
2863
|
+
* we may be unable to return them to me_pghead.
|
|
2864
|
+
*/
|
|
2865
|
+
MDB_page *mp = txn->mt_loose_pgs;
|
|
2866
|
+
if ((rc = mdb_midl_need(&txn->mt_free_pgs, txn->mt_loose_count)) != 0)
|
|
2867
|
+
return rc;
|
|
2868
|
+
for (; mp; mp = NEXT_LOOSE_PAGE(mp))
|
|
2869
|
+
mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
|
|
2870
|
+
txn->mt_loose_pgs = NULL;
|
|
2871
|
+
txn->mt_loose_count = 0;
|
|
2872
|
+
}
|
|
2873
|
+
|
|
2664
2874
|
/* MDB_RESERVE cancels meminit in ovpage malloc (when no WRITEMAP) */
|
|
2665
2875
|
clean_limit = (env->me_flags & (MDB_NOMEMINIT|MDB_WRITEMAP))
|
|
2666
2876
|
? SSIZE_MAX : maxfree_1pg;
|
|
@@ -2722,7 +2932,7 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
2722
2932
|
}
|
|
2723
2933
|
|
|
2724
2934
|
mop = env->me_pghead;
|
|
2725
|
-
mop_len = mop ? mop[0] : 0;
|
|
2935
|
+
mop_len = (mop ? mop[0] : 0) + txn->mt_loose_count;
|
|
2726
2936
|
|
|
2727
2937
|
/* Reserve records for me_pghead[]. Split it if multi-page,
|
|
2728
2938
|
* to avoid searching freeDB for a page range. Use keys in
|
|
@@ -2762,6 +2972,28 @@ mdb_freelist_save(MDB_txn *txn)
|
|
|
2762
2972
|
total_room += head_room;
|
|
2763
2973
|
}
|
|
2764
2974
|
|
|
2975
|
+
/* Return loose page numbers to me_pghead, though usually none are
|
|
2976
|
+
* left at this point. The pages themselves remain in dirty_list.
|
|
2977
|
+
*/
|
|
2978
|
+
if (txn->mt_loose_pgs) {
|
|
2979
|
+
MDB_page *mp = txn->mt_loose_pgs;
|
|
2980
|
+
unsigned count = txn->mt_loose_count;
|
|
2981
|
+
MDB_IDL loose;
|
|
2982
|
+
/* Room for loose pages + temp IDL with same */
|
|
2983
|
+
if ((rc = mdb_midl_need(&env->me_pghead, 2*count+1)) != 0)
|
|
2984
|
+
return rc;
|
|
2985
|
+
mop = env->me_pghead;
|
|
2986
|
+
loose = mop + MDB_IDL_ALLOCLEN(mop) - count;
|
|
2987
|
+
for (count = 0; mp; mp = NEXT_LOOSE_PAGE(mp))
|
|
2988
|
+
loose[ ++count ] = mp->mp_pgno;
|
|
2989
|
+
loose[0] = count;
|
|
2990
|
+
mdb_midl_sort(loose);
|
|
2991
|
+
mdb_midl_xmerge(mop, loose);
|
|
2992
|
+
txn->mt_loose_pgs = NULL;
|
|
2993
|
+
txn->mt_loose_count = 0;
|
|
2994
|
+
mop_len = mop[0];
|
|
2995
|
+
}
|
|
2996
|
+
|
|
2765
2997
|
/* Fill in the reserved me_pghead records */
|
|
2766
2998
|
rc = MDB_SUCCESS;
|
|
2767
2999
|
if (mop_len) {
|
|
@@ -2823,8 +3055,8 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
|
2823
3055
|
while (++i <= pagecount) {
|
|
2824
3056
|
dp = dl[i].mptr;
|
|
2825
3057
|
/* Don't flush this page yet */
|
|
2826
|
-
if (dp->mp_flags & P_KEEP) {
|
|
2827
|
-
dp->mp_flags
|
|
3058
|
+
if (dp->mp_flags & (P_LOOSE|P_KEEP)) {
|
|
3059
|
+
dp->mp_flags &= ~P_KEEP;
|
|
2828
3060
|
dl[++j] = dl[i];
|
|
2829
3061
|
continue;
|
|
2830
3062
|
}
|
|
@@ -2838,8 +3070,8 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
|
2838
3070
|
if (++i <= pagecount) {
|
|
2839
3071
|
dp = dl[i].mptr;
|
|
2840
3072
|
/* Don't flush this page yet */
|
|
2841
|
-
if (dp->mp_flags & P_KEEP) {
|
|
2842
|
-
dp->mp_flags
|
|
3073
|
+
if (dp->mp_flags & (P_LOOSE|P_KEEP)) {
|
|
3074
|
+
dp->mp_flags &= ~P_KEEP;
|
|
2843
3075
|
dl[i].mid = 0;
|
|
2844
3076
|
continue;
|
|
2845
3077
|
}
|
|
@@ -2914,6 +3146,12 @@ mdb_page_flush(MDB_txn *txn, int keep)
|
|
|
2914
3146
|
#endif /* _WIN32 */
|
|
2915
3147
|
}
|
|
2916
3148
|
|
|
3149
|
+
/* MIPS has cache coherency issues, this is a no-op everywhere else
|
|
3150
|
+
* Note: for any size >= on-chip cache size, entire on-chip cache is
|
|
3151
|
+
* flushed.
|
|
3152
|
+
*/
|
|
3153
|
+
CACHEFLUSH(env->me_map, txn->mt_next_pgno * env->me_psize, DCACHE);
|
|
3154
|
+
|
|
2917
3155
|
for (i = keep; ++i <= pagecount; ) {
|
|
2918
3156
|
dp = dl[i].mptr;
|
|
2919
3157
|
/* This is a page we skipped above */
|
|
@@ -2968,6 +3206,7 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
2968
3206
|
|
|
2969
3207
|
if (txn->mt_parent) {
|
|
2970
3208
|
MDB_txn *parent = txn->mt_parent;
|
|
3209
|
+
MDB_page **lp;
|
|
2971
3210
|
MDB_ID2L dst, src;
|
|
2972
3211
|
MDB_IDL pspill;
|
|
2973
3212
|
unsigned x, y, len, ps_len;
|
|
@@ -3065,6 +3304,12 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
3065
3304
|
}
|
|
3066
3305
|
}
|
|
3067
3306
|
|
|
3307
|
+
/* Append our loose page list to parent's */
|
|
3308
|
+
for (lp = &parent->mt_loose_pgs; *lp; lp = &NEXT_LOOSE_PAGE(lp))
|
|
3309
|
+
;
|
|
3310
|
+
*lp = txn->mt_loose_pgs;
|
|
3311
|
+
parent->mt_loose_count += txn->mt_loose_count;
|
|
3312
|
+
|
|
3068
3313
|
parent->mt_child = NULL;
|
|
3069
3314
|
mdb_midl_free(((MDB_ntxn *)txn)->mnt_pgstate.mf_pghead);
|
|
3070
3315
|
free(txn);
|
|
@@ -3096,6 +3341,10 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
3096
3341
|
mdb_cursor_init(&mc, txn, MAIN_DBI, NULL);
|
|
3097
3342
|
for (i = 2; i < txn->mt_numdbs; i++) {
|
|
3098
3343
|
if (txn->mt_dbflags[i] & DB_DIRTY) {
|
|
3344
|
+
if (TXN_DBI_CHANGED(txn, i)) {
|
|
3345
|
+
rc = MDB_BAD_DBI;
|
|
3346
|
+
goto fail;
|
|
3347
|
+
}
|
|
3099
3348
|
data.mv_data = &txn->mt_dbs[i];
|
|
3100
3349
|
rc = mdb_cursor_put(&mc, &txn->mt_dbxs[i].md_name, &data, 0);
|
|
3101
3350
|
if (rc)
|
|
@@ -3122,6 +3371,10 @@ mdb_txn_commit(MDB_txn *txn)
|
|
|
3122
3371
|
(rc = mdb_env_write_meta(txn)))
|
|
3123
3372
|
goto fail;
|
|
3124
3373
|
|
|
3374
|
+
/* Free P_LOOSE pages left behind in dirty_list */
|
|
3375
|
+
if (!(env->me_flags & MDB_WRITEMAP))
|
|
3376
|
+
mdb_dlist_free(txn);
|
|
3377
|
+
|
|
3125
3378
|
done:
|
|
3126
3379
|
env->me_pglast = 0;
|
|
3127
3380
|
env->me_txn = NULL;
|
|
@@ -3129,7 +3382,8 @@ done:
|
|
|
3129
3382
|
|
|
3130
3383
|
if (env->me_txns)
|
|
3131
3384
|
UNLOCK_MUTEX_W(env);
|
|
3132
|
-
|
|
3385
|
+
if (txn != env->me_txn0)
|
|
3386
|
+
free(txn);
|
|
3133
3387
|
|
|
3134
3388
|
return MDB_SUCCESS;
|
|
3135
3389
|
|
|
@@ -3144,7 +3398,7 @@ fail:
|
|
|
3144
3398
|
* @param[out] meta address of where to store the meta information
|
|
3145
3399
|
* @return 0 on success, non-zero on failure.
|
|
3146
3400
|
*/
|
|
3147
|
-
static int
|
|
3401
|
+
static int ESECT
|
|
3148
3402
|
mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
|
3149
3403
|
{
|
|
3150
3404
|
MDB_metabuf pbuf;
|
|
@@ -3202,12 +3456,26 @@ mdb_env_read_header(MDB_env *env, MDB_meta *meta)
|
|
|
3202
3456
|
return 0;
|
|
3203
3457
|
}
|
|
3204
3458
|
|
|
3459
|
+
static void ESECT
|
|
3460
|
+
mdb_env_init_meta0(MDB_env *env, MDB_meta *meta)
|
|
3461
|
+
{
|
|
3462
|
+
meta->mm_magic = MDB_MAGIC;
|
|
3463
|
+
meta->mm_version = MDB_DATA_VERSION;
|
|
3464
|
+
meta->mm_mapsize = env->me_mapsize;
|
|
3465
|
+
meta->mm_psize = env->me_psize;
|
|
3466
|
+
meta->mm_last_pg = 1;
|
|
3467
|
+
meta->mm_flags = env->me_flags & 0xffff;
|
|
3468
|
+
meta->mm_flags |= MDB_INTEGERKEY;
|
|
3469
|
+
meta->mm_dbs[0].md_root = P_INVALID;
|
|
3470
|
+
meta->mm_dbs[1].md_root = P_INVALID;
|
|
3471
|
+
}
|
|
3472
|
+
|
|
3205
3473
|
/** Write the environment parameters of a freshly created DB environment.
|
|
3206
3474
|
* @param[in] env the environment handle
|
|
3207
3475
|
* @param[out] meta address of where to store the meta information
|
|
3208
3476
|
* @return 0 on success, non-zero on failure.
|
|
3209
3477
|
*/
|
|
3210
|
-
static int
|
|
3478
|
+
static int ESECT
|
|
3211
3479
|
mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
|
3212
3480
|
{
|
|
3213
3481
|
MDB_page *p, *q;
|
|
@@ -3231,15 +3499,7 @@ mdb_env_init_meta(MDB_env *env, MDB_meta *meta)
|
|
|
3231
3499
|
|
|
3232
3500
|
psize = env->me_psize;
|
|
3233
3501
|
|
|
3234
|
-
meta
|
|
3235
|
-
meta->mm_version = MDB_DATA_VERSION;
|
|
3236
|
-
meta->mm_mapsize = env->me_mapsize;
|
|
3237
|
-
meta->mm_psize = psize;
|
|
3238
|
-
meta->mm_last_pg = 1;
|
|
3239
|
-
meta->mm_flags = env->me_flags & 0xffff;
|
|
3240
|
-
meta->mm_flags |= MDB_INTEGERKEY;
|
|
3241
|
-
meta->mm_dbs[0].md_root = P_INVALID;
|
|
3242
|
-
meta->mm_dbs[1].md_root = P_INVALID;
|
|
3502
|
+
mdb_env_init_meta0(env, meta);
|
|
3243
3503
|
|
|
3244
3504
|
p = calloc(2, psize);
|
|
3245
3505
|
p->mp_pgno = 0;
|
|
@@ -3271,6 +3531,7 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
3271
3531
|
{
|
|
3272
3532
|
MDB_env *env;
|
|
3273
3533
|
MDB_meta meta, metab, *mp;
|
|
3534
|
+
size_t mapsize;
|
|
3274
3535
|
off_t off;
|
|
3275
3536
|
int rc, len, toggle;
|
|
3276
3537
|
char *ptr;
|
|
@@ -3287,11 +3548,13 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
3287
3548
|
|
|
3288
3549
|
env = txn->mt_env;
|
|
3289
3550
|
mp = env->me_metas[toggle];
|
|
3551
|
+
mapsize = env->me_metas[toggle ^ 1]->mm_mapsize;
|
|
3552
|
+
/* Persist any increases of mapsize config */
|
|
3553
|
+
if (mapsize < env->me_mapsize)
|
|
3554
|
+
mapsize = env->me_mapsize;
|
|
3290
3555
|
|
|
3291
3556
|
if (env->me_flags & MDB_WRITEMAP) {
|
|
3292
|
-
|
|
3293
|
-
if (env->me_mapsize > mp->mm_mapsize)
|
|
3294
|
-
mp->mm_mapsize = env->me_mapsize;
|
|
3557
|
+
mp->mm_mapsize = mapsize;
|
|
3295
3558
|
mp->mm_dbs[0] = txn->mt_dbs[0];
|
|
3296
3559
|
mp->mm_dbs[1] = txn->mt_dbs[1];
|
|
3297
3560
|
mp->mm_last_pg = txn->mt_next_pgno - 1;
|
|
@@ -3318,22 +3581,15 @@ mdb_env_write_meta(MDB_txn *txn)
|
|
|
3318
3581
|
metab.mm_txnid = env->me_metas[toggle]->mm_txnid;
|
|
3319
3582
|
metab.mm_last_pg = env->me_metas[toggle]->mm_last_pg;
|
|
3320
3583
|
|
|
3321
|
-
|
|
3322
|
-
if (env->me_mapsize > mp->mm_mapsize) {
|
|
3323
|
-
/* Persist any increases of mapsize config */
|
|
3324
|
-
meta.mm_mapsize = env->me_mapsize;
|
|
3325
|
-
off = offsetof(MDB_meta, mm_mapsize);
|
|
3326
|
-
} else {
|
|
3327
|
-
off = offsetof(MDB_meta, mm_dbs[0].md_depth);
|
|
3328
|
-
}
|
|
3329
|
-
len = sizeof(MDB_meta) - off;
|
|
3330
|
-
|
|
3331
|
-
ptr += off;
|
|
3584
|
+
meta.mm_mapsize = mapsize;
|
|
3332
3585
|
meta.mm_dbs[0] = txn->mt_dbs[0];
|
|
3333
3586
|
meta.mm_dbs[1] = txn->mt_dbs[1];
|
|
3334
3587
|
meta.mm_last_pg = txn->mt_next_pgno - 1;
|
|
3335
3588
|
meta.mm_txnid = txn->mt_txnid;
|
|
3336
3589
|
|
|
3590
|
+
off = offsetof(MDB_meta, mm_mapsize);
|
|
3591
|
+
ptr = (char *)&meta + off;
|
|
3592
|
+
len = sizeof(MDB_meta) - off;
|
|
3337
3593
|
if (toggle)
|
|
3338
3594
|
off += env->me_psize;
|
|
3339
3595
|
off += PAGEHDRSZ;
|
|
@@ -3372,6 +3628,8 @@ fail:
|
|
|
3372
3628
|
env->me_flags |= MDB_FATAL_ERROR;
|
|
3373
3629
|
return rc;
|
|
3374
3630
|
}
|
|
3631
|
+
/* MIPS has cache coherency issues, this is a no-op everywhere else */
|
|
3632
|
+
CACHEFLUSH(env->me_map + off, len, DCACHE);
|
|
3375
3633
|
done:
|
|
3376
3634
|
/* Memory ordering issues are irrelevant; since the entire writer
|
|
3377
3635
|
* is wrapped by wmutex, all of these changes will become visible
|
|
@@ -3395,7 +3653,7 @@ mdb_env_pick_meta(const MDB_env *env)
|
|
|
3395
3653
|
return (env->me_metas[0]->mm_txnid < env->me_metas[1]->mm_txnid);
|
|
3396
3654
|
}
|
|
3397
3655
|
|
|
3398
|
-
int
|
|
3656
|
+
int ESECT
|
|
3399
3657
|
mdb_env_create(MDB_env **env)
|
|
3400
3658
|
{
|
|
3401
3659
|
MDB_env *e;
|
|
@@ -3420,8 +3678,8 @@ mdb_env_create(MDB_env **env)
|
|
|
3420
3678
|
return MDB_SUCCESS;
|
|
3421
3679
|
}
|
|
3422
3680
|
|
|
3423
|
-
static int
|
|
3424
|
-
mdb_env_map(MDB_env *env, void *addr
|
|
3681
|
+
static int ESECT
|
|
3682
|
+
mdb_env_map(MDB_env *env, void *addr)
|
|
3425
3683
|
{
|
|
3426
3684
|
MDB_page *p;
|
|
3427
3685
|
unsigned int flags = env->me_flags;
|
|
@@ -3429,18 +3687,28 @@ mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
|
3429
3687
|
int rc;
|
|
3430
3688
|
HANDLE mh;
|
|
3431
3689
|
LONG sizelo, sizehi;
|
|
3432
|
-
|
|
3433
|
-
sizehi = env->me_mapsize >> 16 >> 16; /* only needed on Win64 */
|
|
3690
|
+
size_t msize;
|
|
3434
3691
|
|
|
3435
|
-
|
|
3436
|
-
|
|
3437
|
-
|
|
3438
|
-
|
|
3692
|
+
if (flags & MDB_RDONLY) {
|
|
3693
|
+
/* Don't set explicit map size, use whatever exists */
|
|
3694
|
+
msize = 0;
|
|
3695
|
+
sizelo = 0;
|
|
3696
|
+
sizehi = 0;
|
|
3697
|
+
} else {
|
|
3698
|
+
msize = env->me_mapsize;
|
|
3699
|
+
sizelo = msize & 0xffffffff;
|
|
3700
|
+
sizehi = msize >> 16 >> 16; /* only needed on Win64 */
|
|
3701
|
+
|
|
3702
|
+
/* Windows won't create mappings for zero length files.
|
|
3703
|
+
* and won't map more than the file size.
|
|
3704
|
+
* Just set the maxsize right now.
|
|
3705
|
+
*/
|
|
3439
3706
|
if (SetFilePointer(env->me_fd, sizelo, &sizehi, 0) != (DWORD)sizelo
|
|
3440
3707
|
|| !SetEndOfFile(env->me_fd)
|
|
3441
3708
|
|| SetFilePointer(env->me_fd, 0, NULL, 0) != 0)
|
|
3442
3709
|
return ErrCode();
|
|
3443
3710
|
}
|
|
3711
|
+
|
|
3444
3712
|
mh = CreateFileMapping(env->me_fd, NULL, flags & MDB_WRITEMAP ?
|
|
3445
3713
|
PAGE_READWRITE : PAGE_READONLY,
|
|
3446
3714
|
sizehi, sizelo, NULL);
|
|
@@ -3448,7 +3716,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
|
3448
3716
|
return ErrCode();
|
|
3449
3717
|
env->me_map = MapViewOfFileEx(mh, flags & MDB_WRITEMAP ?
|
|
3450
3718
|
FILE_MAP_WRITE : FILE_MAP_READ,
|
|
3451
|
-
0, 0,
|
|
3719
|
+
0, 0, msize, addr);
|
|
3452
3720
|
rc = env->me_map ? 0 : ErrCode();
|
|
3453
3721
|
CloseHandle(mh);
|
|
3454
3722
|
if (rc)
|
|
@@ -3494,7 +3762,7 @@ mdb_env_map(MDB_env *env, void *addr, int newsize)
|
|
|
3494
3762
|
return MDB_SUCCESS;
|
|
3495
3763
|
}
|
|
3496
3764
|
|
|
3497
|
-
int
|
|
3765
|
+
int ESECT
|
|
3498
3766
|
mdb_env_set_mapsize(MDB_env *env, size_t size)
|
|
3499
3767
|
{
|
|
3500
3768
|
/* If env is already open, caller is responsible for making
|
|
@@ -3518,7 +3786,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
|
|
|
3518
3786
|
munmap(env->me_map, env->me_mapsize);
|
|
3519
3787
|
env->me_mapsize = size;
|
|
3520
3788
|
old = (env->me_flags & MDB_FIXEDMAP) ? env->me_map : NULL;
|
|
3521
|
-
rc = mdb_env_map(env, old
|
|
3789
|
+
rc = mdb_env_map(env, old);
|
|
3522
3790
|
if (rc)
|
|
3523
3791
|
return rc;
|
|
3524
3792
|
}
|
|
@@ -3528,7 +3796,7 @@ mdb_env_set_mapsize(MDB_env *env, size_t size)
|
|
|
3528
3796
|
return MDB_SUCCESS;
|
|
3529
3797
|
}
|
|
3530
3798
|
|
|
3531
|
-
int
|
|
3799
|
+
int ESECT
|
|
3532
3800
|
mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
|
|
3533
3801
|
{
|
|
3534
3802
|
if (env->me_map)
|
|
@@ -3537,7 +3805,7 @@ mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs)
|
|
|
3537
3805
|
return MDB_SUCCESS;
|
|
3538
3806
|
}
|
|
3539
3807
|
|
|
3540
|
-
int
|
|
3808
|
+
int ESECT
|
|
3541
3809
|
mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
|
|
3542
3810
|
{
|
|
3543
3811
|
if (env->me_map || readers < 1)
|
|
@@ -3546,7 +3814,7 @@ mdb_env_set_maxreaders(MDB_env *env, unsigned int readers)
|
|
|
3546
3814
|
return MDB_SUCCESS;
|
|
3547
3815
|
}
|
|
3548
3816
|
|
|
3549
|
-
int
|
|
3817
|
+
int ESECT
|
|
3550
3818
|
mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
|
|
3551
3819
|
{
|
|
3552
3820
|
if (!env || !readers)
|
|
@@ -3555,9 +3823,9 @@ mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers)
|
|
|
3555
3823
|
return MDB_SUCCESS;
|
|
3556
3824
|
}
|
|
3557
3825
|
|
|
3558
|
-
/** Further setup required for opening an
|
|
3826
|
+
/** Further setup required for opening an LMDB environment
|
|
3559
3827
|
*/
|
|
3560
|
-
static int
|
|
3828
|
+
static int ESECT
|
|
3561
3829
|
mdb_env_open2(MDB_env *env)
|
|
3562
3830
|
{
|
|
3563
3831
|
unsigned int flags = env->me_flags;
|
|
@@ -3602,7 +3870,7 @@ mdb_env_open2(MDB_env *env)
|
|
|
3602
3870
|
env->me_mapsize = minsize;
|
|
3603
3871
|
}
|
|
3604
3872
|
|
|
3605
|
-
rc = mdb_env_map(env,
|
|
3873
|
+
rc = mdb_env_map(env, (flags & MDB_FIXEDMAP) ? meta.mm_address : NULL);
|
|
3606
3874
|
if (rc)
|
|
3607
3875
|
return rc;
|
|
3608
3876
|
|
|
@@ -3714,7 +3982,7 @@ PIMAGE_TLS_CALLBACK mdb_tls_cbp = mdb_tls_callback;
|
|
|
3714
3982
|
#endif
|
|
3715
3983
|
|
|
3716
3984
|
/** Downgrade the exclusive lock on the region back to shared */
|
|
3717
|
-
static int
|
|
3985
|
+
static int ESECT
|
|
3718
3986
|
mdb_env_share_locks(MDB_env *env, int *excl)
|
|
3719
3987
|
{
|
|
3720
3988
|
int rc = 0, toggle = mdb_env_pick_meta(env);
|
|
@@ -3756,7 +4024,7 @@ mdb_env_share_locks(MDB_env *env, int *excl)
|
|
|
3756
4024
|
/** Try to get exlusive lock, otherwise shared.
|
|
3757
4025
|
* Maintain *excl = -1: no/unknown lock, 0: shared, 1: exclusive.
|
|
3758
4026
|
*/
|
|
3759
|
-
static int
|
|
4027
|
+
static int ESECT
|
|
3760
4028
|
mdb_env_excl_lock(MDB_env *env, int *excl)
|
|
3761
4029
|
{
|
|
3762
4030
|
int rc = 0;
|
|
@@ -3891,14 +4159,14 @@ mdb_hash_enc(MDB_val *val, char *encbuf)
|
|
|
3891
4159
|
#endif
|
|
3892
4160
|
|
|
3893
4161
|
/** Open and/or initialize the lock region for the environment.
|
|
3894
|
-
* @param[in] env The
|
|
4162
|
+
* @param[in] env The LMDB environment.
|
|
3895
4163
|
* @param[in] lpath The pathname of the file used for the lock region.
|
|
3896
4164
|
* @param[in] mode The Unix permissions for the file, if we create it.
|
|
3897
4165
|
* @param[out] excl Resulting file lock type: -1 none, 0 shared, 1 exclusive
|
|
3898
4166
|
* @param[in,out] excl In -1, out lock type: -1 none, 0 shared, 1 exclusive
|
|
3899
4167
|
* @return 0 on success, non-zero on failure.
|
|
3900
4168
|
*/
|
|
3901
|
-
static int
|
|
4169
|
+
static int ESECT
|
|
3902
4170
|
mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
|
|
3903
4171
|
{
|
|
3904
4172
|
#ifdef _WIN32
|
|
@@ -4128,7 +4396,7 @@ fail:
|
|
|
4128
4396
|
# error "Persistent DB flags & env flags overlap, but both go in mm_flags"
|
|
4129
4397
|
#endif
|
|
4130
4398
|
|
|
4131
|
-
int
|
|
4399
|
+
int ESECT
|
|
4132
4400
|
mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
|
|
4133
4401
|
{
|
|
4134
4402
|
int oflags, rc, len, excl = -1;
|
|
@@ -4173,7 +4441,8 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
4173
4441
|
env->me_path = strdup(path);
|
|
4174
4442
|
env->me_dbxs = calloc(env->me_maxdbs, sizeof(MDB_dbx));
|
|
4175
4443
|
env->me_dbflags = calloc(env->me_maxdbs, sizeof(uint16_t));
|
|
4176
|
-
|
|
4444
|
+
env->me_dbiseqs = calloc(env->me_maxdbs, sizeof(unsigned int));
|
|
4445
|
+
if (!(env->me_dbxs && env->me_path && env->me_dbflags && env->me_dbiseqs)) {
|
|
4177
4446
|
rc = ENOMEM;
|
|
4178
4447
|
goto leave;
|
|
4179
4448
|
}
|
|
@@ -4245,6 +4514,22 @@ mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode
|
|
|
4245
4514
|
if (!((flags & MDB_RDONLY) ||
|
|
4246
4515
|
(env->me_pbuf = calloc(1, env->me_psize))))
|
|
4247
4516
|
rc = ENOMEM;
|
|
4517
|
+
if (!(flags & MDB_RDONLY)) {
|
|
4518
|
+
MDB_txn *txn;
|
|
4519
|
+
int tsize = sizeof(MDB_txn), size = tsize + env->me_maxdbs *
|
|
4520
|
+
(sizeof(MDB_db)+sizeof(MDB_cursor)+sizeof(unsigned int)+1);
|
|
4521
|
+
txn = calloc(1, size);
|
|
4522
|
+
if (txn) {
|
|
4523
|
+
txn->mt_dbs = (MDB_db *)((char *)txn + tsize);
|
|
4524
|
+
txn->mt_cursors = (MDB_cursor **)(txn->mt_dbs + env->me_maxdbs);
|
|
4525
|
+
txn->mt_dbiseqs = (unsigned int *)(txn->mt_cursors + env->me_maxdbs);
|
|
4526
|
+
txn->mt_dbflags = (unsigned char *)(txn->mt_dbiseqs + env->me_maxdbs);
|
|
4527
|
+
txn->mt_env = env;
|
|
4528
|
+
env->me_txn0 = txn;
|
|
4529
|
+
} else {
|
|
4530
|
+
rc = ENOMEM;
|
|
4531
|
+
}
|
|
4532
|
+
}
|
|
4248
4533
|
}
|
|
4249
4534
|
|
|
4250
4535
|
leave:
|
|
@@ -4256,7 +4541,7 @@ leave:
|
|
|
4256
4541
|
}
|
|
4257
4542
|
|
|
4258
4543
|
/** Destroy resources from mdb_env_open(), clear our readers & DBIs */
|
|
4259
|
-
static void
|
|
4544
|
+
static void ESECT
|
|
4260
4545
|
mdb_env_close0(MDB_env *env, int excl)
|
|
4261
4546
|
{
|
|
4262
4547
|
int i;
|
|
@@ -4269,6 +4554,7 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
|
4269
4554
|
free(env->me_dbxs[i].md_name.mv_data);
|
|
4270
4555
|
|
|
4271
4556
|
free(env->me_pbuf);
|
|
4557
|
+
free(env->me_dbiseqs);
|
|
4272
4558
|
free(env->me_dbflags);
|
|
4273
4559
|
free(env->me_dbxs);
|
|
4274
4560
|
free(env->me_path);
|
|
@@ -4344,186 +4630,41 @@ mdb_env_close0(MDB_env *env, int excl)
|
|
|
4344
4630
|
env->me_flags &= ~(MDB_ENV_ACTIVE|MDB_ENV_TXKEY);
|
|
4345
4631
|
}
|
|
4346
4632
|
|
|
4347
|
-
int
|
|
4348
|
-
mdb_env_copyfd(MDB_env *env, HANDLE fd)
|
|
4349
|
-
{
|
|
4350
|
-
MDB_txn *txn = NULL;
|
|
4351
|
-
int rc;
|
|
4352
|
-
size_t wsize;
|
|
4353
|
-
char *ptr;
|
|
4354
|
-
#ifdef _WIN32
|
|
4355
|
-
DWORD len, w2;
|
|
4356
|
-
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
|
4357
|
-
#else
|
|
4358
|
-
ssize_t len;
|
|
4359
|
-
size_t w2;
|
|
4360
|
-
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
|
4361
|
-
#endif
|
|
4362
|
-
|
|
4363
|
-
/* Do the lock/unlock of the reader mutex before starting the
|
|
4364
|
-
* write txn. Otherwise other read txns could block writers.
|
|
4365
|
-
*/
|
|
4366
|
-
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
|
4367
|
-
if (rc)
|
|
4368
|
-
return rc;
|
|
4369
|
-
|
|
4370
|
-
if (env->me_txns) {
|
|
4371
|
-
/* We must start the actual read txn after blocking writers */
|
|
4372
|
-
mdb_txn_reset0(txn, "reset-stage1");
|
|
4373
4633
|
|
|
4374
|
-
|
|
4375
|
-
|
|
4634
|
+
void ESECT
|
|
4635
|
+
mdb_env_close(MDB_env *env)
|
|
4636
|
+
{
|
|
4637
|
+
MDB_page *dp;
|
|
4376
4638
|
|
|
4377
|
-
|
|
4378
|
-
|
|
4379
|
-
UNLOCK_MUTEX_W(env);
|
|
4380
|
-
goto leave;
|
|
4381
|
-
}
|
|
4382
|
-
}
|
|
4639
|
+
if (env == NULL)
|
|
4640
|
+
return;
|
|
4383
4641
|
|
|
4384
|
-
|
|
4385
|
-
|
|
4386
|
-
|
|
4387
|
-
|
|
4388
|
-
|
|
4389
|
-
if (!rc) {
|
|
4390
|
-
rc = ErrCode();
|
|
4391
|
-
break;
|
|
4392
|
-
} else if (len > 0) {
|
|
4393
|
-
rc = MDB_SUCCESS;
|
|
4394
|
-
ptr += len;
|
|
4395
|
-
w2 -= len;
|
|
4396
|
-
continue;
|
|
4397
|
-
} else {
|
|
4398
|
-
/* Non-blocking or async handles are not supported */
|
|
4399
|
-
rc = EIO;
|
|
4400
|
-
break;
|
|
4401
|
-
}
|
|
4642
|
+
VGMEMP_DESTROY(env);
|
|
4643
|
+
while ((dp = env->me_dpages) != NULL) {
|
|
4644
|
+
VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
|
|
4645
|
+
env->me_dpages = dp->mp_next;
|
|
4646
|
+
free(dp);
|
|
4402
4647
|
}
|
|
4403
|
-
if (env->me_txns)
|
|
4404
|
-
UNLOCK_MUTEX_W(env);
|
|
4405
|
-
|
|
4406
|
-
if (rc)
|
|
4407
|
-
goto leave;
|
|
4408
4648
|
|
|
4409
|
-
|
|
4410
|
-
|
|
4411
|
-
|
|
4412
|
-
w2 = MAX_WRITE;
|
|
4413
|
-
else
|
|
4414
|
-
w2 = wsize;
|
|
4415
|
-
DO_WRITE(rc, fd, ptr, w2, len);
|
|
4416
|
-
if (!rc) {
|
|
4417
|
-
rc = ErrCode();
|
|
4418
|
-
break;
|
|
4419
|
-
} else if (len > 0) {
|
|
4420
|
-
rc = MDB_SUCCESS;
|
|
4421
|
-
ptr += len;
|
|
4422
|
-
wsize -= len;
|
|
4423
|
-
continue;
|
|
4424
|
-
} else {
|
|
4425
|
-
rc = EIO;
|
|
4426
|
-
break;
|
|
4427
|
-
}
|
|
4428
|
-
}
|
|
4649
|
+
mdb_env_close0(env, 0);
|
|
4650
|
+
free(env);
|
|
4651
|
+
}
|
|
4429
4652
|
|
|
4430
|
-
|
|
4431
|
-
|
|
4432
|
-
|
|
4653
|
+
/** Compare two items pointing at aligned size_t's */
|
|
4654
|
+
static int
|
|
4655
|
+
mdb_cmp_long(const MDB_val *a, const MDB_val *b)
|
|
4656
|
+
{
|
|
4657
|
+
return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
|
|
4658
|
+
*(size_t *)a->mv_data > *(size_t *)b->mv_data;
|
|
4433
4659
|
}
|
|
4434
4660
|
|
|
4435
|
-
int
|
|
4436
|
-
|
|
4661
|
+
/** Compare two items pointing at aligned unsigned int's */
|
|
4662
|
+
static int
|
|
4663
|
+
mdb_cmp_int(const MDB_val *a, const MDB_val *b)
|
|
4437
4664
|
{
|
|
4438
|
-
int
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4442
|
-
if (env->me_flags & MDB_NOSUBDIR) {
|
|
4443
|
-
lpath = (char *)path;
|
|
4444
|
-
} else {
|
|
4445
|
-
len = strlen(path);
|
|
4446
|
-
len += sizeof(DATANAME);
|
|
4447
|
-
lpath = malloc(len);
|
|
4448
|
-
if (!lpath)
|
|
4449
|
-
return ENOMEM;
|
|
4450
|
-
sprintf(lpath, "%s" DATANAME, path);
|
|
4451
|
-
}
|
|
4452
|
-
|
|
4453
|
-
/* The destination path must exist, but the destination file must not.
|
|
4454
|
-
* We don't want the OS to cache the writes, since the source data is
|
|
4455
|
-
* already in the OS cache.
|
|
4456
|
-
*/
|
|
4457
|
-
#ifdef _WIN32
|
|
4458
|
-
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
|
|
4459
|
-
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
|
|
4460
|
-
#else
|
|
4461
|
-
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
|
|
4462
|
-
#endif
|
|
4463
|
-
if (newfd == INVALID_HANDLE_VALUE) {
|
|
4464
|
-
rc = ErrCode();
|
|
4465
|
-
goto leave;
|
|
4466
|
-
}
|
|
4467
|
-
|
|
4468
|
-
#ifdef O_DIRECT
|
|
4469
|
-
/* Set O_DIRECT if the file system supports it */
|
|
4470
|
-
if ((rc = fcntl(newfd, F_GETFL)) != -1)
|
|
4471
|
-
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
|
|
4472
|
-
#endif
|
|
4473
|
-
#ifdef F_NOCACHE /* __APPLE__ */
|
|
4474
|
-
rc = fcntl(newfd, F_NOCACHE, 1);
|
|
4475
|
-
if (rc) {
|
|
4476
|
-
rc = ErrCode();
|
|
4477
|
-
goto leave;
|
|
4478
|
-
}
|
|
4479
|
-
#endif
|
|
4480
|
-
|
|
4481
|
-
rc = mdb_env_copyfd(env, newfd);
|
|
4482
|
-
|
|
4483
|
-
leave:
|
|
4484
|
-
if (!(env->me_flags & MDB_NOSUBDIR))
|
|
4485
|
-
free(lpath);
|
|
4486
|
-
if (newfd != INVALID_HANDLE_VALUE)
|
|
4487
|
-
if (close(newfd) < 0 && rc == MDB_SUCCESS)
|
|
4488
|
-
rc = ErrCode();
|
|
4489
|
-
|
|
4490
|
-
return rc;
|
|
4491
|
-
}
|
|
4492
|
-
|
|
4493
|
-
void
|
|
4494
|
-
mdb_env_close(MDB_env *env)
|
|
4495
|
-
{
|
|
4496
|
-
MDB_page *dp;
|
|
4497
|
-
|
|
4498
|
-
if (env == NULL)
|
|
4499
|
-
return;
|
|
4500
|
-
|
|
4501
|
-
VGMEMP_DESTROY(env);
|
|
4502
|
-
while ((dp = env->me_dpages) != NULL) {
|
|
4503
|
-
VGMEMP_DEFINED(&dp->mp_next, sizeof(dp->mp_next));
|
|
4504
|
-
env->me_dpages = dp->mp_next;
|
|
4505
|
-
free(dp);
|
|
4506
|
-
}
|
|
4507
|
-
|
|
4508
|
-
mdb_env_close0(env, 0);
|
|
4509
|
-
free(env);
|
|
4510
|
-
}
|
|
4511
|
-
|
|
4512
|
-
/** Compare two items pointing at aligned size_t's */
|
|
4513
|
-
static int
|
|
4514
|
-
mdb_cmp_long(const MDB_val *a, const MDB_val *b)
|
|
4515
|
-
{
|
|
4516
|
-
return (*(size_t *)a->mv_data < *(size_t *)b->mv_data) ? -1 :
|
|
4517
|
-
*(size_t *)a->mv_data > *(size_t *)b->mv_data;
|
|
4518
|
-
}
|
|
4519
|
-
|
|
4520
|
-
/** Compare two items pointing at aligned unsigned int's */
|
|
4521
|
-
static int
|
|
4522
|
-
mdb_cmp_int(const MDB_val *a, const MDB_val *b)
|
|
4523
|
-
{
|
|
4524
|
-
return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 :
|
|
4525
|
-
*(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data;
|
|
4526
|
-
}
|
|
4665
|
+
return (*(unsigned int *)a->mv_data < *(unsigned int *)b->mv_data) ? -1 :
|
|
4666
|
+
*(unsigned int *)a->mv_data > *(unsigned int *)b->mv_data;
|
|
4667
|
+
}
|
|
4527
4668
|
|
|
4528
4669
|
/** Compare two items pointing at unsigned ints of unknown alignment.
|
|
4529
4670
|
* Nodes and keys are guaranteed to be 2-byte aligned.
|
|
@@ -4542,7 +4683,16 @@ mdb_cmp_cint(const MDB_val *a, const MDB_val *b)
|
|
|
4542
4683
|
} while(!x && u > (unsigned short *)a->mv_data);
|
|
4543
4684
|
return x;
|
|
4544
4685
|
#else
|
|
4545
|
-
|
|
4686
|
+
unsigned short *u, *c, *end;
|
|
4687
|
+
int x;
|
|
4688
|
+
|
|
4689
|
+
end = (unsigned short *) ((char *) a->mv_data + a->mv_size);
|
|
4690
|
+
u = (unsigned short *)a->mv_data;
|
|
4691
|
+
c = (unsigned short *)b->mv_data;
|
|
4692
|
+
do {
|
|
4693
|
+
x = *u++ - *c++;
|
|
4694
|
+
} while(!x && u < end);
|
|
4695
|
+
return x;
|
|
4546
4696
|
#endif
|
|
4547
4697
|
}
|
|
4548
4698
|
|
|
@@ -4924,6 +5074,8 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
|
|
|
4924
5074
|
/* Make sure we're using an up-to-date root */
|
|
4925
5075
|
if (*mc->mc_dbflag & DB_STALE) {
|
|
4926
5076
|
MDB_cursor mc2;
|
|
5077
|
+
if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi))
|
|
5078
|
+
return MDB_BAD_DBI;
|
|
4927
5079
|
mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, NULL);
|
|
4928
5080
|
rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, 0);
|
|
4929
5081
|
if (rc)
|
|
@@ -5264,8 +5416,10 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
|
|
|
5264
5416
|
if (op == MDB_PREV || op == MDB_PREV_DUP) {
|
|
5265
5417
|
rc = mdb_cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDB_PREV);
|
|
5266
5418
|
if (op != MDB_PREV || rc != MDB_NOTFOUND) {
|
|
5267
|
-
if (rc == MDB_SUCCESS)
|
|
5419
|
+
if (rc == MDB_SUCCESS) {
|
|
5268
5420
|
MDB_GET_KEY(leaf, key);
|
|
5421
|
+
mc->mc_flags &= ~C_EOF;
|
|
5422
|
+
}
|
|
5269
5423
|
return rc;
|
|
5270
5424
|
}
|
|
5271
5425
|
} else {
|
|
@@ -5457,8 +5611,10 @@ set1:
|
|
|
5457
5611
|
mc->mc_flags &= ~C_EOF;
|
|
5458
5612
|
|
|
5459
5613
|
if (IS_LEAF2(mp)) {
|
|
5460
|
-
|
|
5461
|
-
|
|
5614
|
+
if (op == MDB_SET_RANGE || op == MDB_SET_KEY) {
|
|
5615
|
+
key->mv_size = mc->mc_db->md_pad;
|
|
5616
|
+
key->mv_data = LEAF2KEY(mp, mc->mc_ki[mc->mc_top], key->mv_size);
|
|
5617
|
+
}
|
|
5462
5618
|
return MDB_SUCCESS;
|
|
5463
5619
|
}
|
|
5464
5620
|
|
|
@@ -5740,6 +5896,14 @@ fetchm:
|
|
|
5740
5896
|
rc = MDB_INCOMPATIBLE;
|
|
5741
5897
|
break;
|
|
5742
5898
|
}
|
|
5899
|
+
{
|
|
5900
|
+
MDB_node *leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
|
5901
|
+
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
5902
|
+
MDB_GET_KEY(leaf, key);
|
|
5903
|
+
rc = mdb_node_read(mc->mc_txn, leaf, data);
|
|
5904
|
+
break;
|
|
5905
|
+
}
|
|
5906
|
+
}
|
|
5743
5907
|
if (!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) {
|
|
5744
5908
|
rc = EINVAL;
|
|
5745
5909
|
break;
|
|
@@ -5776,6 +5940,8 @@ mdb_cursor_touch(MDB_cursor *mc)
|
|
|
5776
5940
|
if (mc->mc_dbi > MAIN_DBI && !(*mc->mc_dbflag & DB_DIRTY)) {
|
|
5777
5941
|
MDB_cursor mc2;
|
|
5778
5942
|
MDB_xcursor mcx;
|
|
5943
|
+
if (TXN_DBI_CHANGED(mc->mc_txn, mc->mc_dbi))
|
|
5944
|
+
return MDB_BAD_DBI;
|
|
5779
5945
|
mdb_cursor_init(&mc2, mc->mc_txn, MAIN_DBI, &mcx);
|
|
5780
5946
|
rc = mdb_page_search(&mc2, &mc->mc_dbx->md_name, MDB_PS_MODIFY);
|
|
5781
5947
|
if (rc)
|
|
@@ -5932,22 +6098,42 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
|
|
|
5932
6098
|
if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
|
|
5933
6099
|
LEAFSIZE(key, data) > env->me_nodemax)
|
|
5934
6100
|
{
|
|
5935
|
-
/* Too big for a node, insert in sub-DB
|
|
6101
|
+
/* Too big for a node, insert in sub-DB. Set up an empty
|
|
6102
|
+
* "old sub-page" for prep_subDB to expand to a full page.
|
|
6103
|
+
*/
|
|
5936
6104
|
fp_flags = P_LEAF|P_DIRTY;
|
|
5937
6105
|
fp = env->me_pbuf;
|
|
5938
6106
|
fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
|
|
5939
|
-
fp->mp_lower = fp->mp_upper =
|
|
6107
|
+
fp->mp_lower = fp->mp_upper = (PAGEHDRSZ-PAGEBASE);
|
|
6108
|
+
olddata.mv_size = PAGEHDRSZ;
|
|
5940
6109
|
goto prep_subDB;
|
|
5941
6110
|
}
|
|
5942
6111
|
} else {
|
|
5943
6112
|
/* there's only a key anyway, so this is a no-op */
|
|
5944
6113
|
if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
|
|
6114
|
+
char *ptr;
|
|
5945
6115
|
unsigned int ksize = mc->mc_db->md_pad;
|
|
5946
6116
|
if (key->mv_size != ksize)
|
|
5947
6117
|
return MDB_BAD_VALSIZE;
|
|
5948
|
-
|
|
5949
|
-
|
|
5950
|
-
|
|
6118
|
+
ptr = LEAF2KEY(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], ksize);
|
|
6119
|
+
memcpy(ptr, key->mv_data, ksize);
|
|
6120
|
+
fix_parent:
|
|
6121
|
+
/* if overwriting slot 0 of leaf, need to
|
|
6122
|
+
* update branch key if there is a parent page
|
|
6123
|
+
*/
|
|
6124
|
+
if (mc->mc_top && !mc->mc_ki[mc->mc_top]) {
|
|
6125
|
+
unsigned short top = mc->mc_top;
|
|
6126
|
+
mc->mc_top--;
|
|
6127
|
+
/* slot 0 is always an empty key, find real slot */
|
|
6128
|
+
while (mc->mc_top && !mc->mc_ki[mc->mc_top])
|
|
6129
|
+
mc->mc_top--;
|
|
6130
|
+
if (mc->mc_ki[mc->mc_top])
|
|
6131
|
+
rc2 = mdb_update_key(mc, key);
|
|
6132
|
+
else
|
|
6133
|
+
rc2 = MDB_SUCCESS;
|
|
6134
|
+
mc->mc_top = top;
|
|
6135
|
+
if (rc2)
|
|
6136
|
+
return rc2;
|
|
5951
6137
|
}
|
|
5952
6138
|
return MDB_SUCCESS;
|
|
5953
6139
|
}
|
|
@@ -5978,12 +6164,12 @@ more:
|
|
|
5978
6164
|
if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
|
|
5979
6165
|
mc->mc_dbx->md_dcmp = mdb_cmp_clong;
|
|
5980
6166
|
#endif
|
|
5981
|
-
/*
|
|
6167
|
+
/* does data match? */
|
|
5982
6168
|
if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
|
|
5983
6169
|
if (flags & MDB_NODUPDATA)
|
|
5984
6170
|
return MDB_KEYEXIST;
|
|
5985
|
-
|
|
5986
|
-
goto
|
|
6171
|
+
/* overwrite it */
|
|
6172
|
+
goto current;
|
|
5987
6173
|
}
|
|
5988
6174
|
|
|
5989
6175
|
/* Back up original data item */
|
|
@@ -5992,7 +6178,7 @@ more:
|
|
|
5992
6178
|
|
|
5993
6179
|
/* Make sub-page header for the dup items, with dummy body */
|
|
5994
6180
|
fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
|
|
5995
|
-
fp->mp_lower = PAGEHDRSZ;
|
|
6181
|
+
fp->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
|
5996
6182
|
xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
|
|
5997
6183
|
if (mc->mc_db->md_flags & MDB_DUPFIXED) {
|
|
5998
6184
|
fp->mp_flags |= P_LEAF2;
|
|
@@ -6002,8 +6188,8 @@ more:
|
|
|
6002
6188
|
xdata.mv_size += 2 * (sizeof(indx_t) + NODESIZE) +
|
|
6003
6189
|
(dkey.mv_size & 1) + (data->mv_size & 1);
|
|
6004
6190
|
}
|
|
6005
|
-
fp->mp_upper = xdata.mv_size;
|
|
6006
|
-
olddata.mv_size =
|
|
6191
|
+
fp->mp_upper = xdata.mv_size - PAGEBASE;
|
|
6192
|
+
olddata.mv_size = xdata.mv_size; /* pretend olddata is fp */
|
|
6007
6193
|
} else if (leaf->mn_flags & F_SUBDATA) {
|
|
6008
6194
|
/* Data is on sub-DB, just store it */
|
|
6009
6195
|
flags |= F_DUPDATA|F_SUBDATA;
|
|
@@ -6070,8 +6256,8 @@ prep_subDB:
|
|
|
6070
6256
|
if (fp_flags & P_LEAF2) {
|
|
6071
6257
|
memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
|
|
6072
6258
|
} else {
|
|
6073
|
-
memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper,
|
|
6074
|
-
olddata.mv_size - fp->mp_upper);
|
|
6259
|
+
memcpy((char *)mp + mp->mp_upper + PAGEBASE, (char *)fp + fp->mp_upper + PAGEBASE,
|
|
6260
|
+
olddata.mv_size - fp->mp_upper - PAGEBASE);
|
|
6075
6261
|
for (i=0; i<NUMKEYS(fp); i++)
|
|
6076
6262
|
mp->mp_ptrs[i] = fp->mp_ptrs[i] + offset;
|
|
6077
6263
|
}
|
|
@@ -6154,8 +6340,10 @@ current:
|
|
|
6154
6340
|
data->mv_data = olddata.mv_data;
|
|
6155
6341
|
else if (!(mc->mc_flags & C_SUB))
|
|
6156
6342
|
memcpy(olddata.mv_data, data->mv_data, data->mv_size);
|
|
6157
|
-
else
|
|
6343
|
+
else {
|
|
6158
6344
|
memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
|
|
6345
|
+
goto fix_parent;
|
|
6346
|
+
}
|
|
6159
6347
|
return MDB_SUCCESS;
|
|
6160
6348
|
}
|
|
6161
6349
|
mdb_node_del(mc, 0);
|
|
@@ -6259,7 +6447,6 @@ put_sub:
|
|
|
6259
6447
|
*/
|
|
6260
6448
|
mc->mc_flags |= C_INITIALIZED;
|
|
6261
6449
|
}
|
|
6262
|
-
next_sub:
|
|
6263
6450
|
if (flags & MDB_MULTIPLE) {
|
|
6264
6451
|
if (!rc) {
|
|
6265
6452
|
mcount++;
|
|
@@ -6393,8 +6580,8 @@ mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp)
|
|
|
6393
6580
|
DPRINTF(("allocated new mpage %"Z"u, page size %u",
|
|
6394
6581
|
np->mp_pgno, mc->mc_txn->mt_env->me_psize));
|
|
6395
6582
|
np->mp_flags = flags | P_DIRTY;
|
|
6396
|
-
np->mp_lower = PAGEHDRSZ;
|
|
6397
|
-
np->mp_upper = mc->mc_txn->mt_env->me_psize;
|
|
6583
|
+
np->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
|
6584
|
+
np->mp_upper = mc->mc_txn->mt_env->me_psize - PAGEBASE;
|
|
6398
6585
|
|
|
6399
6586
|
if (IS_BRANCH(np))
|
|
6400
6587
|
mc->mc_db->md_branch_pages++;
|
|
@@ -6647,7 +6834,7 @@ mdb_node_del(MDB_cursor *mc, int ksize)
|
|
|
6647
6834
|
}
|
|
6648
6835
|
}
|
|
6649
6836
|
|
|
6650
|
-
base = (char *)mp + mp->mp_upper;
|
|
6837
|
+
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
|
6651
6838
|
memmove(base + sz, base, ptr - mp->mp_upper);
|
|
6652
6839
|
|
|
6653
6840
|
mp->mp_lower -= sizeof(indx_t);
|
|
@@ -6701,7 +6888,7 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
|
|
|
6701
6888
|
mp->mp_ptrs[i] += delta;
|
|
6702
6889
|
}
|
|
6703
6890
|
|
|
6704
|
-
base = (char *)mp + mp->mp_upper;
|
|
6891
|
+
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
|
6705
6892
|
memmove(base + delta, base, ptr - mp->mp_upper + NODESIZE + NODEKSZ(node));
|
|
6706
6893
|
mp->mp_upper += delta;
|
|
6707
6894
|
}
|
|
@@ -6877,6 +7064,12 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp)
|
|
|
6877
7064
|
if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
|
|
6878
7065
|
return MDB_BAD_TXN;
|
|
6879
7066
|
|
|
7067
|
+
if (!(mc->mc_flags & C_INITIALIZED))
|
|
7068
|
+
return EINVAL;
|
|
7069
|
+
|
|
7070
|
+
if (!mc->mc_snum || (mc->mc_flags & C_EOF))
|
|
7071
|
+
return MDB_NOTFOUND;
|
|
7072
|
+
|
|
6880
7073
|
leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
|
|
6881
7074
|
if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
|
|
6882
7075
|
*countp = 1;
|
|
@@ -6973,7 +7166,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
|
|
|
6973
7166
|
mp->mp_ptrs[i] -= delta;
|
|
6974
7167
|
}
|
|
6975
7168
|
|
|
6976
|
-
base = (char *)mp + mp->mp_upper;
|
|
7169
|
+
base = (char *)mp + mp->mp_upper + PAGEBASE;
|
|
6977
7170
|
len = ptr - mp->mp_upper + NODESIZE;
|
|
6978
7171
|
memmove(base - delta, base, len);
|
|
6979
7172
|
mp->mp_upper -= delta;
|
|
@@ -7054,20 +7247,20 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7054
7247
|
MDB_node *s2;
|
|
7055
7248
|
MDB_val bkey;
|
|
7056
7249
|
/* must find the lowest key below dst */
|
|
7057
|
-
|
|
7250
|
+
mdb_cursor_copy(cdst, &mn);
|
|
7251
|
+
rc = mdb_page_search_lowest(&mn);
|
|
7058
7252
|
if (rc)
|
|
7059
7253
|
return rc;
|
|
7060
|
-
if (IS_LEAF2(
|
|
7061
|
-
bkey.mv_size =
|
|
7062
|
-
bkey.mv_data = LEAF2KEY(
|
|
7254
|
+
if (IS_LEAF2(mn.mc_pg[mn.mc_top])) {
|
|
7255
|
+
bkey.mv_size = mn.mc_db->md_pad;
|
|
7256
|
+
bkey.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, bkey.mv_size);
|
|
7063
7257
|
} else {
|
|
7064
|
-
s2 = NODEPTR(
|
|
7258
|
+
s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0);
|
|
7065
7259
|
bkey.mv_size = NODEKSZ(s2);
|
|
7066
7260
|
bkey.mv_data = NODEKEY(s2);
|
|
7067
7261
|
}
|
|
7068
|
-
|
|
7069
|
-
|
|
7070
|
-
mdb_cursor_copy(cdst, &mn);
|
|
7262
|
+
mn.mc_snum = snum--;
|
|
7263
|
+
mn.mc_top = snum;
|
|
7071
7264
|
mn.mc_ki[snum] = 0;
|
|
7072
7265
|
rc = mdb_update_key(&mn, &bkey);
|
|
7073
7266
|
if (rc)
|
|
@@ -7183,14 +7376,17 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7183
7376
|
static int
|
|
7184
7377
|
mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
7185
7378
|
{
|
|
7186
|
-
|
|
7187
|
-
|
|
7188
|
-
MDB_node *srcnode;
|
|
7379
|
+
MDB_page *psrc, *pdst;
|
|
7380
|
+
MDB_node *srcnode;
|
|
7189
7381
|
MDB_val key, data;
|
|
7190
|
-
unsigned
|
|
7382
|
+
unsigned nkeys;
|
|
7383
|
+
int rc;
|
|
7384
|
+
indx_t i, j;
|
|
7191
7385
|
|
|
7192
|
-
|
|
7193
|
-
|
|
7386
|
+
psrc = csrc->mc_pg[csrc->mc_top];
|
|
7387
|
+
pdst = cdst->mc_pg[cdst->mc_top];
|
|
7388
|
+
|
|
7389
|
+
DPRINTF(("merging page %"Z"u into %"Z"u", psrc->mp_pgno, pdst->mp_pgno));
|
|
7194
7390
|
|
|
7195
7391
|
mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
|
|
7196
7392
|
mdb_cassert(csrc, cdst->mc_snum > 1);
|
|
@@ -7201,36 +7397,35 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7201
7397
|
|
|
7202
7398
|
/* Move all nodes from src to dst.
|
|
7203
7399
|
*/
|
|
7204
|
-
j = nkeys = NUMKEYS(
|
|
7205
|
-
if (IS_LEAF2(
|
|
7400
|
+
j = nkeys = NUMKEYS(pdst);
|
|
7401
|
+
if (IS_LEAF2(psrc)) {
|
|
7206
7402
|
key.mv_size = csrc->mc_db->md_pad;
|
|
7207
|
-
key.mv_data = METADATA(
|
|
7208
|
-
for (i = 0; i < NUMKEYS(
|
|
7403
|
+
key.mv_data = METADATA(psrc);
|
|
7404
|
+
for (i = 0; i < NUMKEYS(psrc); i++, j++) {
|
|
7209
7405
|
rc = mdb_node_add(cdst, j, &key, NULL, 0, 0);
|
|
7210
7406
|
if (rc != MDB_SUCCESS)
|
|
7211
7407
|
return rc;
|
|
7212
7408
|
key.mv_data = (char *)key.mv_data + key.mv_size;
|
|
7213
7409
|
}
|
|
7214
7410
|
} else {
|
|
7215
|
-
for (i = 0; i < NUMKEYS(
|
|
7216
|
-
srcnode = NODEPTR(
|
|
7217
|
-
if (i == 0 && IS_BRANCH(
|
|
7218
|
-
|
|
7411
|
+
for (i = 0; i < NUMKEYS(psrc); i++, j++) {
|
|
7412
|
+
srcnode = NODEPTR(psrc, i);
|
|
7413
|
+
if (i == 0 && IS_BRANCH(psrc)) {
|
|
7414
|
+
MDB_cursor mn;
|
|
7219
7415
|
MDB_node *s2;
|
|
7416
|
+
mdb_cursor_copy(csrc, &mn);
|
|
7220
7417
|
/* must find the lowest key below src */
|
|
7221
|
-
rc = mdb_page_search_lowest(
|
|
7418
|
+
rc = mdb_page_search_lowest(&mn);
|
|
7222
7419
|
if (rc)
|
|
7223
7420
|
return rc;
|
|
7224
|
-
if (IS_LEAF2(
|
|
7225
|
-
key.mv_size =
|
|
7226
|
-
key.mv_data = LEAF2KEY(
|
|
7421
|
+
if (IS_LEAF2(mn.mc_pg[mn.mc_top])) {
|
|
7422
|
+
key.mv_size = mn.mc_db->md_pad;
|
|
7423
|
+
key.mv_data = LEAF2KEY(mn.mc_pg[mn.mc_top], 0, key.mv_size);
|
|
7227
7424
|
} else {
|
|
7228
|
-
s2 = NODEPTR(
|
|
7425
|
+
s2 = NODEPTR(mn.mc_pg[mn.mc_top], 0);
|
|
7229
7426
|
key.mv_size = NODEKSZ(s2);
|
|
7230
7427
|
key.mv_data = NODEKEY(s2);
|
|
7231
7428
|
}
|
|
7232
|
-
csrc->mc_snum = snum--;
|
|
7233
|
-
csrc->mc_top = snum;
|
|
7234
7429
|
} else {
|
|
7235
7430
|
key.mv_size = srcnode->mn_ksize;
|
|
7236
7431
|
key.mv_data = NODEKEY(srcnode);
|
|
@@ -7245,8 +7440,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7245
7440
|
}
|
|
7246
7441
|
|
|
7247
7442
|
DPRINTF(("dst page %"Z"u now has %u keys (%.1f%% filled)",
|
|
7248
|
-
|
|
7249
|
-
(float)PAGEFILL(cdst->mc_txn->mt_env,
|
|
7443
|
+
pdst->mp_pgno, NUMKEYS(pdst),
|
|
7444
|
+
(float)PAGEFILL(cdst->mc_txn->mt_env, pdst) / 10));
|
|
7250
7445
|
|
|
7251
7446
|
/* Unlink the src page from parent and add to free list.
|
|
7252
7447
|
*/
|
|
@@ -7262,11 +7457,14 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7262
7457
|
}
|
|
7263
7458
|
csrc->mc_top++;
|
|
7264
7459
|
|
|
7265
|
-
|
|
7266
|
-
|
|
7460
|
+
psrc = csrc->mc_pg[csrc->mc_top];
|
|
7461
|
+
/* If not operating on FreeDB, allow this page to be reused
|
|
7462
|
+
* in this txn. Otherwise just add to free list.
|
|
7463
|
+
*/
|
|
7464
|
+
rc = mdb_page_loose(csrc, psrc);
|
|
7267
7465
|
if (rc)
|
|
7268
7466
|
return rc;
|
|
7269
|
-
if (IS_LEAF(
|
|
7467
|
+
if (IS_LEAF(psrc))
|
|
7270
7468
|
csrc->mc_db->md_leaf_pages--;
|
|
7271
7469
|
else
|
|
7272
7470
|
csrc->mc_db->md_branch_pages--;
|
|
@@ -7274,7 +7472,6 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7274
7472
|
/* Adjust other cursors pointing to mp */
|
|
7275
7473
|
MDB_cursor *m2, *m3;
|
|
7276
7474
|
MDB_dbi dbi = csrc->mc_dbi;
|
|
7277
|
-
MDB_page *mp = cdst->mc_pg[cdst->mc_top];
|
|
7278
7475
|
|
|
7279
7476
|
for (m2 = csrc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
|
|
7280
7477
|
if (csrc->mc_flags & C_SUB)
|
|
@@ -7283,8 +7480,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
|
|
|
7283
7480
|
m3 = m2;
|
|
7284
7481
|
if (m3 == csrc) continue;
|
|
7285
7482
|
if (m3->mc_snum < csrc->mc_snum) continue;
|
|
7286
|
-
if (m3->mc_pg[csrc->mc_top] ==
|
|
7287
|
-
m3->mc_pg[csrc->mc_top] =
|
|
7483
|
+
if (m3->mc_pg[csrc->mc_top] == psrc) {
|
|
7484
|
+
m3->mc_pg[csrc->mc_top] = pdst;
|
|
7288
7485
|
m3->mc_ki[csrc->mc_top] += nkeys;
|
|
7289
7486
|
}
|
|
7290
7487
|
}
|
|
@@ -7525,8 +7722,10 @@ mdb_cursor_del0(MDB_cursor *mc)
|
|
|
7525
7722
|
/* if mc points past last node in page, find next sibling */
|
|
7526
7723
|
if (mc->mc_ki[mc->mc_top] >= nkeys) {
|
|
7527
7724
|
rc = mdb_cursor_sibling(mc, 1);
|
|
7528
|
-
if (rc == MDB_NOTFOUND)
|
|
7725
|
+
if (rc == MDB_NOTFOUND) {
|
|
7726
|
+
mc->mc_flags |= C_EOF;
|
|
7529
7727
|
rc = MDB_SUCCESS;
|
|
7728
|
+
}
|
|
7530
7729
|
}
|
|
7531
7730
|
|
|
7532
7731
|
/* Adjust other cursors pointing to mp */
|
|
@@ -7541,11 +7740,15 @@ mdb_cursor_del0(MDB_cursor *mc)
|
|
|
7541
7740
|
m3->mc_flags |= C_DEL;
|
|
7542
7741
|
if (m3->mc_ki[mc->mc_top] > ki)
|
|
7543
7742
|
m3->mc_ki[mc->mc_top]--;
|
|
7743
|
+
else if (mc->mc_db->md_flags & MDB_DUPSORT)
|
|
7744
|
+
m3->mc_xcursor->mx_cursor.mc_flags |= C_EOF;
|
|
7544
7745
|
}
|
|
7545
7746
|
if (m3->mc_ki[mc->mc_top] >= nkeys) {
|
|
7546
7747
|
rc = mdb_cursor_sibling(m3, 1);
|
|
7547
|
-
if (rc == MDB_NOTFOUND)
|
|
7748
|
+
if (rc == MDB_NOTFOUND) {
|
|
7749
|
+
m3->mc_flags |= C_EOF;
|
|
7548
7750
|
rc = MDB_SUCCESS;
|
|
7751
|
+
}
|
|
7549
7752
|
}
|
|
7550
7753
|
}
|
|
7551
7754
|
}
|
|
@@ -7760,8 +7963,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
7760
7963
|
}
|
|
7761
7964
|
copy->mp_pgno = mp->mp_pgno;
|
|
7762
7965
|
copy->mp_flags = mp->mp_flags;
|
|
7763
|
-
copy->mp_lower = PAGEHDRSZ;
|
|
7764
|
-
copy->mp_upper = env->me_psize;
|
|
7966
|
+
copy->mp_lower = (PAGEHDRSZ-PAGEBASE);
|
|
7967
|
+
copy->mp_upper = env->me_psize - PAGEBASE;
|
|
7765
7968
|
|
|
7766
7969
|
/* prepare to insert */
|
|
7767
7970
|
for (i=0, j=0; i<nkeys; i++) {
|
|
@@ -7801,7 +8004,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
7801
8004
|
psize += nsize;
|
|
7802
8005
|
node = NULL;
|
|
7803
8006
|
} else {
|
|
7804
|
-
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]);
|
|
8007
|
+
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE);
|
|
7805
8008
|
psize += NODESIZE + NODEKSZ(node) + sizeof(indx_t);
|
|
7806
8009
|
if (IS_LEAF(mp)) {
|
|
7807
8010
|
if (F_ISSET(node->mn_flags, F_BIGDATA))
|
|
@@ -7821,7 +8024,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
7821
8024
|
sepkey.mv_size = newkey->mv_size;
|
|
7822
8025
|
sepkey.mv_data = newkey->mv_data;
|
|
7823
8026
|
} else {
|
|
7824
|
-
node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx]);
|
|
8027
|
+
node = (MDB_node *)((char *)mp + copy->mp_ptrs[split_indx] + PAGEBASE);
|
|
7825
8028
|
sepkey.mv_size = node->mn_ksize;
|
|
7826
8029
|
sepkey.mv_data = NODEKEY(node);
|
|
7827
8030
|
}
|
|
@@ -7902,7 +8105,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
7902
8105
|
/* Update index for the new key. */
|
|
7903
8106
|
mc->mc_ki[mc->mc_top] = j;
|
|
7904
8107
|
} else {
|
|
7905
|
-
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i]);
|
|
8108
|
+
node = (MDB_node *)((char *)mp + copy->mp_ptrs[i] + PAGEBASE);
|
|
7906
8109
|
rkey.mv_data = NODEKEY(node);
|
|
7907
8110
|
rkey.mv_size = node->mn_ksize;
|
|
7908
8111
|
if (IS_LEAF(mp)) {
|
|
@@ -7938,7 +8141,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
|
|
|
7938
8141
|
mp->mp_lower = copy->mp_lower;
|
|
7939
8142
|
mp->mp_upper = copy->mp_upper;
|
|
7940
8143
|
memcpy(NODEPTR(mp, nkeys-1), NODEPTR(copy, nkeys-1),
|
|
7941
|
-
env->me_psize - copy->mp_upper);
|
|
8144
|
+
env->me_psize - copy->mp_upper - PAGEBASE);
|
|
7942
8145
|
|
|
7943
8146
|
/* reset back to original page */
|
|
7944
8147
|
if (newindx < split_indx) {
|
|
@@ -8037,7 +8240,568 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
|
|
|
8037
8240
|
return mdb_cursor_put(&mc, key, data, flags);
|
|
8038
8241
|
}
|
|
8039
8242
|
|
|
8040
|
-
|
|
8243
|
+
#ifndef MDB_WBUF
|
|
8244
|
+
#define MDB_WBUF (1024*1024)
|
|
8245
|
+
#endif
|
|
8246
|
+
|
|
8247
|
+
/** State needed for a compacting copy. */
|
|
8248
|
+
typedef struct mdb_copy {
|
|
8249
|
+
pthread_mutex_t mc_mutex;
|
|
8250
|
+
pthread_cond_t mc_cond;
|
|
8251
|
+
char *mc_wbuf[2];
|
|
8252
|
+
char *mc_over[2];
|
|
8253
|
+
MDB_env *mc_env;
|
|
8254
|
+
MDB_txn *mc_txn;
|
|
8255
|
+
int mc_wlen[2];
|
|
8256
|
+
int mc_olen[2];
|
|
8257
|
+
pgno_t mc_next_pgno;
|
|
8258
|
+
HANDLE mc_fd;
|
|
8259
|
+
int mc_status;
|
|
8260
|
+
volatile int mc_new;
|
|
8261
|
+
int mc_toggle;
|
|
8262
|
+
|
|
8263
|
+
} mdb_copy;
|
|
8264
|
+
|
|
8265
|
+
/** Dedicated writer thread for compacting copy. */
|
|
8266
|
+
static THREAD_RET ESECT
|
|
8267
|
+
mdb_env_copythr(void *arg)
|
|
8268
|
+
{
|
|
8269
|
+
mdb_copy *my = arg;
|
|
8270
|
+
char *ptr;
|
|
8271
|
+
int toggle = 0, wsize, rc;
|
|
8272
|
+
#ifdef _WIN32
|
|
8273
|
+
DWORD len;
|
|
8274
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
|
8275
|
+
#else
|
|
8276
|
+
int len;
|
|
8277
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
|
8278
|
+
#endif
|
|
8279
|
+
|
|
8280
|
+
pthread_mutex_lock(&my->mc_mutex);
|
|
8281
|
+
my->mc_new = 0;
|
|
8282
|
+
pthread_cond_signal(&my->mc_cond);
|
|
8283
|
+
for(;;) {
|
|
8284
|
+
while (!my->mc_new)
|
|
8285
|
+
pthread_cond_wait(&my->mc_cond, &my->mc_mutex);
|
|
8286
|
+
if (my->mc_new < 0) {
|
|
8287
|
+
my->mc_new = 0;
|
|
8288
|
+
break;
|
|
8289
|
+
}
|
|
8290
|
+
my->mc_new = 0;
|
|
8291
|
+
wsize = my->mc_wlen[toggle];
|
|
8292
|
+
ptr = my->mc_wbuf[toggle];
|
|
8293
|
+
again:
|
|
8294
|
+
while (wsize > 0) {
|
|
8295
|
+
DO_WRITE(rc, my->mc_fd, ptr, wsize, len);
|
|
8296
|
+
if (!rc) {
|
|
8297
|
+
rc = ErrCode();
|
|
8298
|
+
break;
|
|
8299
|
+
} else if (len > 0) {
|
|
8300
|
+
rc = MDB_SUCCESS;
|
|
8301
|
+
ptr += len;
|
|
8302
|
+
wsize -= len;
|
|
8303
|
+
continue;
|
|
8304
|
+
} else {
|
|
8305
|
+
rc = EIO;
|
|
8306
|
+
break;
|
|
8307
|
+
}
|
|
8308
|
+
}
|
|
8309
|
+
if (rc) {
|
|
8310
|
+
my->mc_status = rc;
|
|
8311
|
+
break;
|
|
8312
|
+
}
|
|
8313
|
+
/* If there's an overflow page tail, write it too */
|
|
8314
|
+
if (my->mc_olen[toggle]) {
|
|
8315
|
+
wsize = my->mc_olen[toggle];
|
|
8316
|
+
ptr = my->mc_over[toggle];
|
|
8317
|
+
my->mc_olen[toggle] = 0;
|
|
8318
|
+
goto again;
|
|
8319
|
+
}
|
|
8320
|
+
my->mc_wlen[toggle] = 0;
|
|
8321
|
+
toggle ^= 1;
|
|
8322
|
+
pthread_cond_signal(&my->mc_cond);
|
|
8323
|
+
}
|
|
8324
|
+
pthread_cond_signal(&my->mc_cond);
|
|
8325
|
+
pthread_mutex_unlock(&my->mc_mutex);
|
|
8326
|
+
return (THREAD_RET)0;
|
|
8327
|
+
#undef DO_WRITE
|
|
8328
|
+
}
|
|
8329
|
+
|
|
8330
|
+
/** Tell the writer thread there's a buffer ready to write */
|
|
8331
|
+
static int ESECT
|
|
8332
|
+
mdb_env_cthr_toggle(mdb_copy *my, int st)
|
|
8333
|
+
{
|
|
8334
|
+
int toggle = my->mc_toggle ^ 1;
|
|
8335
|
+
pthread_mutex_lock(&my->mc_mutex);
|
|
8336
|
+
if (my->mc_status) {
|
|
8337
|
+
pthread_mutex_unlock(&my->mc_mutex);
|
|
8338
|
+
return my->mc_status;
|
|
8339
|
+
}
|
|
8340
|
+
while (my->mc_new == 1)
|
|
8341
|
+
pthread_cond_wait(&my->mc_cond, &my->mc_mutex);
|
|
8342
|
+
my->mc_new = st;
|
|
8343
|
+
my->mc_toggle = toggle;
|
|
8344
|
+
pthread_cond_signal(&my->mc_cond);
|
|
8345
|
+
pthread_mutex_unlock(&my->mc_mutex);
|
|
8346
|
+
return 0;
|
|
8347
|
+
}
|
|
8348
|
+
|
|
8349
|
+
/** Depth-first tree traversal for compacting copy. */
|
|
8350
|
+
static int ESECT
|
|
8351
|
+
mdb_env_cwalk(mdb_copy *my, pgno_t *pg, int flags)
|
|
8352
|
+
{
|
|
8353
|
+
MDB_cursor mc;
|
|
8354
|
+
MDB_txn *txn = my->mc_txn;
|
|
8355
|
+
MDB_node *ni;
|
|
8356
|
+
MDB_page *mo, *mp, *leaf;
|
|
8357
|
+
char *buf, *ptr;
|
|
8358
|
+
int rc, toggle;
|
|
8359
|
+
unsigned int i;
|
|
8360
|
+
|
|
8361
|
+
/* Empty DB, nothing to do */
|
|
8362
|
+
if (*pg == P_INVALID)
|
|
8363
|
+
return MDB_SUCCESS;
|
|
8364
|
+
|
|
8365
|
+
mc.mc_snum = 1;
|
|
8366
|
+
mc.mc_top = 0;
|
|
8367
|
+
mc.mc_txn = txn;
|
|
8368
|
+
|
|
8369
|
+
rc = mdb_page_get(my->mc_txn, *pg, &mc.mc_pg[0], NULL);
|
|
8370
|
+
if (rc)
|
|
8371
|
+
return rc;
|
|
8372
|
+
rc = mdb_page_search_root(&mc, NULL, MDB_PS_FIRST);
|
|
8373
|
+
if (rc)
|
|
8374
|
+
return rc;
|
|
8375
|
+
|
|
8376
|
+
/* Make cursor pages writable */
|
|
8377
|
+
buf = ptr = malloc(my->mc_env->me_psize * mc.mc_snum);
|
|
8378
|
+
if (buf == NULL)
|
|
8379
|
+
return ENOMEM;
|
|
8380
|
+
|
|
8381
|
+
for (i=0; i<mc.mc_top; i++) {
|
|
8382
|
+
mdb_page_copy((MDB_page *)ptr, mc.mc_pg[i], my->mc_env->me_psize);
|
|
8383
|
+
mc.mc_pg[i] = (MDB_page *)ptr;
|
|
8384
|
+
ptr += my->mc_env->me_psize;
|
|
8385
|
+
}
|
|
8386
|
+
|
|
8387
|
+
/* This is writable space for a leaf page. Usually not needed. */
|
|
8388
|
+
leaf = (MDB_page *)ptr;
|
|
8389
|
+
|
|
8390
|
+
toggle = my->mc_toggle;
|
|
8391
|
+
while (mc.mc_snum > 0) {
|
|
8392
|
+
unsigned n;
|
|
8393
|
+
mp = mc.mc_pg[mc.mc_top];
|
|
8394
|
+
n = NUMKEYS(mp);
|
|
8395
|
+
|
|
8396
|
+
if (IS_LEAF(mp)) {
|
|
8397
|
+
if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) {
|
|
8398
|
+
for (i=0; i<n; i++) {
|
|
8399
|
+
ni = NODEPTR(mp, i);
|
|
8400
|
+
if (ni->mn_flags & F_BIGDATA) {
|
|
8401
|
+
MDB_page *omp;
|
|
8402
|
+
pgno_t pg;
|
|
8403
|
+
|
|
8404
|
+
/* Need writable leaf */
|
|
8405
|
+
if (mp != leaf) {
|
|
8406
|
+
mc.mc_pg[mc.mc_top] = leaf;
|
|
8407
|
+
mdb_page_copy(leaf, mp, my->mc_env->me_psize);
|
|
8408
|
+
mp = leaf;
|
|
8409
|
+
ni = NODEPTR(mp, i);
|
|
8410
|
+
}
|
|
8411
|
+
|
|
8412
|
+
memcpy(&pg, NODEDATA(ni), sizeof(pg));
|
|
8413
|
+
rc = mdb_page_get(txn, pg, &omp, NULL);
|
|
8414
|
+
if (rc)
|
|
8415
|
+
goto done;
|
|
8416
|
+
if (my->mc_wlen[toggle] >= MDB_WBUF) {
|
|
8417
|
+
rc = mdb_env_cthr_toggle(my, 1);
|
|
8418
|
+
if (rc)
|
|
8419
|
+
goto done;
|
|
8420
|
+
toggle = my->mc_toggle;
|
|
8421
|
+
}
|
|
8422
|
+
mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
|
8423
|
+
memcpy(mo, omp, my->mc_env->me_psize);
|
|
8424
|
+
mo->mp_pgno = my->mc_next_pgno;
|
|
8425
|
+
my->mc_next_pgno += omp->mp_pages;
|
|
8426
|
+
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
|
8427
|
+
if (omp->mp_pages > 1) {
|
|
8428
|
+
my->mc_olen[toggle] = my->mc_env->me_psize * (omp->mp_pages - 1);
|
|
8429
|
+
my->mc_over[toggle] = (char *)omp + my->mc_env->me_psize;
|
|
8430
|
+
rc = mdb_env_cthr_toggle(my, 1);
|
|
8431
|
+
if (rc)
|
|
8432
|
+
goto done;
|
|
8433
|
+
toggle = my->mc_toggle;
|
|
8434
|
+
}
|
|
8435
|
+
memcpy(NODEDATA(ni), &mo->mp_pgno, sizeof(pgno_t));
|
|
8436
|
+
} else if (ni->mn_flags & F_SUBDATA) {
|
|
8437
|
+
MDB_db db;
|
|
8438
|
+
|
|
8439
|
+
/* Need writable leaf */
|
|
8440
|
+
if (mp != leaf) {
|
|
8441
|
+
mc.mc_pg[mc.mc_top] = leaf;
|
|
8442
|
+
mdb_page_copy(leaf, mp, my->mc_env->me_psize);
|
|
8443
|
+
mp = leaf;
|
|
8444
|
+
ni = NODEPTR(mp, i);
|
|
8445
|
+
}
|
|
8446
|
+
|
|
8447
|
+
memcpy(&db, NODEDATA(ni), sizeof(db));
|
|
8448
|
+
my->mc_toggle = toggle;
|
|
8449
|
+
rc = mdb_env_cwalk(my, &db.md_root, ni->mn_flags & F_DUPDATA);
|
|
8450
|
+
if (rc)
|
|
8451
|
+
goto done;
|
|
8452
|
+
toggle = my->mc_toggle;
|
|
8453
|
+
memcpy(NODEDATA(ni), &db, sizeof(db));
|
|
8454
|
+
}
|
|
8455
|
+
}
|
|
8456
|
+
}
|
|
8457
|
+
} else {
|
|
8458
|
+
mc.mc_ki[mc.mc_top]++;
|
|
8459
|
+
if (mc.mc_ki[mc.mc_top] < n) {
|
|
8460
|
+
pgno_t pg;
|
|
8461
|
+
again:
|
|
8462
|
+
ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]);
|
|
8463
|
+
pg = NODEPGNO(ni);
|
|
8464
|
+
rc = mdb_page_get(txn, pg, &mp, NULL);
|
|
8465
|
+
if (rc)
|
|
8466
|
+
goto done;
|
|
8467
|
+
mc.mc_top++;
|
|
8468
|
+
mc.mc_snum++;
|
|
8469
|
+
mc.mc_ki[mc.mc_top] = 0;
|
|
8470
|
+
if (IS_BRANCH(mp)) {
|
|
8471
|
+
/* Whenever we advance to a sibling branch page,
|
|
8472
|
+
* we must proceed all the way down to its first leaf.
|
|
8473
|
+
*/
|
|
8474
|
+
mdb_page_copy(mc.mc_pg[mc.mc_top], mp, my->mc_env->me_psize);
|
|
8475
|
+
goto again;
|
|
8476
|
+
} else
|
|
8477
|
+
mc.mc_pg[mc.mc_top] = mp;
|
|
8478
|
+
continue;
|
|
8479
|
+
}
|
|
8480
|
+
}
|
|
8481
|
+
if (my->mc_wlen[toggle] >= MDB_WBUF) {
|
|
8482
|
+
rc = mdb_env_cthr_toggle(my, 1);
|
|
8483
|
+
if (rc)
|
|
8484
|
+
goto done;
|
|
8485
|
+
toggle = my->mc_toggle;
|
|
8486
|
+
}
|
|
8487
|
+
mo = (MDB_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
|
8488
|
+
mdb_page_copy(mo, mp, my->mc_env->me_psize);
|
|
8489
|
+
mo->mp_pgno = my->mc_next_pgno++;
|
|
8490
|
+
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
|
8491
|
+
if (mc.mc_top) {
|
|
8492
|
+
/* Update parent if there is one */
|
|
8493
|
+
ni = NODEPTR(mc.mc_pg[mc.mc_top-1], mc.mc_ki[mc.mc_top-1]);
|
|
8494
|
+
SETPGNO(ni, mo->mp_pgno);
|
|
8495
|
+
mdb_cursor_pop(&mc);
|
|
8496
|
+
} else {
|
|
8497
|
+
/* Otherwise we're done */
|
|
8498
|
+
*pg = mo->mp_pgno;
|
|
8499
|
+
break;
|
|
8500
|
+
}
|
|
8501
|
+
}
|
|
8502
|
+
done:
|
|
8503
|
+
free(buf);
|
|
8504
|
+
return rc;
|
|
8505
|
+
}
|
|
8506
|
+
|
|
8507
|
+
/** Copy environment with compaction. */
|
|
8508
|
+
static int ESECT
|
|
8509
|
+
mdb_env_copyfd1(MDB_env *env, HANDLE fd)
|
|
8510
|
+
{
|
|
8511
|
+
MDB_meta *mm;
|
|
8512
|
+
MDB_page *mp;
|
|
8513
|
+
mdb_copy my;
|
|
8514
|
+
MDB_txn *txn = NULL;
|
|
8515
|
+
pthread_t thr;
|
|
8516
|
+
int rc;
|
|
8517
|
+
|
|
8518
|
+
#ifdef _WIN32
|
|
8519
|
+
my.mc_mutex = CreateMutex(NULL, FALSE, NULL);
|
|
8520
|
+
my.mc_cond = CreateEvent(NULL, FALSE, FALSE, NULL);
|
|
8521
|
+
my.mc_wbuf[0] = _aligned_malloc(MDB_WBUF*2, env->me_os_psize);
|
|
8522
|
+
if (my.mc_wbuf[0] == NULL)
|
|
8523
|
+
return errno;
|
|
8524
|
+
#else
|
|
8525
|
+
pthread_mutex_init(&my.mc_mutex, NULL);
|
|
8526
|
+
pthread_cond_init(&my.mc_cond, NULL);
|
|
8527
|
+
#ifdef HAVE_MEMALIGN
|
|
8528
|
+
my.mc_wbuf[0] = memalign(env->me_os_psize, MDB_WBUF*2);
|
|
8529
|
+
if (my.mc_wbuf[0] == NULL)
|
|
8530
|
+
return errno;
|
|
8531
|
+
#else
|
|
8532
|
+
rc = posix_memalign((void **)&my.mc_wbuf[0], env->me_os_psize, MDB_WBUF*2);
|
|
8533
|
+
if (rc)
|
|
8534
|
+
return rc;
|
|
8535
|
+
#endif
|
|
8536
|
+
#endif
|
|
8537
|
+
memset(my.mc_wbuf[0], 0, MDB_WBUF*2);
|
|
8538
|
+
my.mc_wbuf[1] = my.mc_wbuf[0] + MDB_WBUF;
|
|
8539
|
+
my.mc_wlen[0] = 0;
|
|
8540
|
+
my.mc_wlen[1] = 0;
|
|
8541
|
+
my.mc_olen[0] = 0;
|
|
8542
|
+
my.mc_olen[1] = 0;
|
|
8543
|
+
my.mc_next_pgno = 2;
|
|
8544
|
+
my.mc_status = 0;
|
|
8545
|
+
my.mc_new = 1;
|
|
8546
|
+
my.mc_toggle = 0;
|
|
8547
|
+
my.mc_env = env;
|
|
8548
|
+
my.mc_fd = fd;
|
|
8549
|
+
THREAD_CREATE(thr, mdb_env_copythr, &my);
|
|
8550
|
+
|
|
8551
|
+
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
|
8552
|
+
if (rc)
|
|
8553
|
+
return rc;
|
|
8554
|
+
|
|
8555
|
+
mp = (MDB_page *)my.mc_wbuf[0];
|
|
8556
|
+
memset(mp, 0, 2*env->me_psize);
|
|
8557
|
+
mp->mp_pgno = 0;
|
|
8558
|
+
mp->mp_flags = P_META;
|
|
8559
|
+
mm = (MDB_meta *)METADATA(mp);
|
|
8560
|
+
mdb_env_init_meta0(env, mm);
|
|
8561
|
+
mm->mm_address = env->me_metas[0]->mm_address;
|
|
8562
|
+
|
|
8563
|
+
mp = (MDB_page *)(my.mc_wbuf[0] + env->me_psize);
|
|
8564
|
+
mp->mp_pgno = 1;
|
|
8565
|
+
mp->mp_flags = P_META;
|
|
8566
|
+
*(MDB_meta *)METADATA(mp) = *mm;
|
|
8567
|
+
mm = (MDB_meta *)METADATA(mp);
|
|
8568
|
+
|
|
8569
|
+
/* Count the number of free pages, subtract from lastpg to find
|
|
8570
|
+
* number of active pages
|
|
8571
|
+
*/
|
|
8572
|
+
{
|
|
8573
|
+
MDB_ID freecount = 0;
|
|
8574
|
+
MDB_cursor mc;
|
|
8575
|
+
MDB_val key, data;
|
|
8576
|
+
mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
|
|
8577
|
+
while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
|
|
8578
|
+
freecount += *(MDB_ID *)data.mv_data;
|
|
8579
|
+
freecount += txn->mt_dbs[0].md_branch_pages +
|
|
8580
|
+
txn->mt_dbs[0].md_leaf_pages +
|
|
8581
|
+
txn->mt_dbs[0].md_overflow_pages;
|
|
8582
|
+
|
|
8583
|
+
/* Set metapage 1 */
|
|
8584
|
+
mm->mm_last_pg = txn->mt_next_pgno - freecount - 1;
|
|
8585
|
+
mm->mm_dbs[1] = txn->mt_dbs[1];
|
|
8586
|
+
mm->mm_dbs[1].md_root = mm->mm_last_pg;
|
|
8587
|
+
mm->mm_txnid = 1;
|
|
8588
|
+
}
|
|
8589
|
+
my.mc_wlen[0] = env->me_psize * 2;
|
|
8590
|
+
my.mc_txn = txn;
|
|
8591
|
+
pthread_mutex_lock(&my.mc_mutex);
|
|
8592
|
+
while(my.mc_new)
|
|
8593
|
+
pthread_cond_wait(&my.mc_cond, &my.mc_mutex);
|
|
8594
|
+
pthread_mutex_unlock(&my.mc_mutex);
|
|
8595
|
+
rc = mdb_env_cwalk(&my, &txn->mt_dbs[1].md_root, 0);
|
|
8596
|
+
if (rc == MDB_SUCCESS && my.mc_wlen[my.mc_toggle])
|
|
8597
|
+
rc = mdb_env_cthr_toggle(&my, 1);
|
|
8598
|
+
mdb_env_cthr_toggle(&my, -1);
|
|
8599
|
+
pthread_mutex_lock(&my.mc_mutex);
|
|
8600
|
+
while(my.mc_new)
|
|
8601
|
+
pthread_cond_wait(&my.mc_cond, &my.mc_mutex);
|
|
8602
|
+
pthread_mutex_unlock(&my.mc_mutex);
|
|
8603
|
+
THREAD_FINISH(thr);
|
|
8604
|
+
|
|
8605
|
+
mdb_txn_abort(txn);
|
|
8606
|
+
#ifdef _WIN32
|
|
8607
|
+
CloseHandle(my.mc_cond);
|
|
8608
|
+
CloseHandle(my.mc_mutex);
|
|
8609
|
+
_aligned_free(my.mc_wbuf[0]);
|
|
8610
|
+
#else
|
|
8611
|
+
pthread_cond_destroy(&my.mc_cond);
|
|
8612
|
+
pthread_mutex_destroy(&my.mc_mutex);
|
|
8613
|
+
free(my.mc_wbuf[0]);
|
|
8614
|
+
#endif
|
|
8615
|
+
return rc;
|
|
8616
|
+
}
|
|
8617
|
+
|
|
8618
|
+
/** Copy environment as-is. */
|
|
8619
|
+
static int ESECT
|
|
8620
|
+
mdb_env_copyfd0(MDB_env *env, HANDLE fd)
|
|
8621
|
+
{
|
|
8622
|
+
MDB_txn *txn = NULL;
|
|
8623
|
+
int rc;
|
|
8624
|
+
size_t wsize;
|
|
8625
|
+
char *ptr;
|
|
8626
|
+
#ifdef _WIN32
|
|
8627
|
+
DWORD len, w2;
|
|
8628
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) rc = WriteFile(fd, ptr, w2, &len, NULL)
|
|
8629
|
+
#else
|
|
8630
|
+
ssize_t len;
|
|
8631
|
+
size_t w2;
|
|
8632
|
+
#define DO_WRITE(rc, fd, ptr, w2, len) len = write(fd, ptr, w2); rc = (len >= 0)
|
|
8633
|
+
#endif
|
|
8634
|
+
|
|
8635
|
+
/* Do the lock/unlock of the reader mutex before starting the
|
|
8636
|
+
* write txn. Otherwise other read txns could block writers.
|
|
8637
|
+
*/
|
|
8638
|
+
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
|
8639
|
+
if (rc)
|
|
8640
|
+
return rc;
|
|
8641
|
+
|
|
8642
|
+
if (env->me_txns) {
|
|
8643
|
+
/* We must start the actual read txn after blocking writers */
|
|
8644
|
+
mdb_txn_reset0(txn, "reset-stage1");
|
|
8645
|
+
|
|
8646
|
+
/* Temporarily block writers until we snapshot the meta pages */
|
|
8647
|
+
LOCK_MUTEX_W(env);
|
|
8648
|
+
|
|
8649
|
+
rc = mdb_txn_renew0(txn);
|
|
8650
|
+
if (rc) {
|
|
8651
|
+
UNLOCK_MUTEX_W(env);
|
|
8652
|
+
goto leave;
|
|
8653
|
+
}
|
|
8654
|
+
}
|
|
8655
|
+
|
|
8656
|
+
wsize = env->me_psize * 2;
|
|
8657
|
+
ptr = env->me_map;
|
|
8658
|
+
w2 = wsize;
|
|
8659
|
+
while (w2 > 0) {
|
|
8660
|
+
DO_WRITE(rc, fd, ptr, w2, len);
|
|
8661
|
+
if (!rc) {
|
|
8662
|
+
rc = ErrCode();
|
|
8663
|
+
break;
|
|
8664
|
+
} else if (len > 0) {
|
|
8665
|
+
rc = MDB_SUCCESS;
|
|
8666
|
+
ptr += len;
|
|
8667
|
+
w2 -= len;
|
|
8668
|
+
continue;
|
|
8669
|
+
} else {
|
|
8670
|
+
/* Non-blocking or async handles are not supported */
|
|
8671
|
+
rc = EIO;
|
|
8672
|
+
break;
|
|
8673
|
+
}
|
|
8674
|
+
}
|
|
8675
|
+
if (env->me_txns)
|
|
8676
|
+
UNLOCK_MUTEX_W(env);
|
|
8677
|
+
|
|
8678
|
+
if (rc)
|
|
8679
|
+
goto leave;
|
|
8680
|
+
|
|
8681
|
+
w2 = txn->mt_next_pgno * env->me_psize;
|
|
8682
|
+
#ifdef WIN32
|
|
8683
|
+
{
|
|
8684
|
+
LARGE_INTEGER fsize;
|
|
8685
|
+
GetFileSizeEx(env->me_fd, &fsize);
|
|
8686
|
+
if (w2 > fsize.QuadPart)
|
|
8687
|
+
w2 = fsize.QuadPart;
|
|
8688
|
+
}
|
|
8689
|
+
#else
|
|
8690
|
+
{
|
|
8691
|
+
struct stat st;
|
|
8692
|
+
fstat(env->me_fd, &st);
|
|
8693
|
+
if (w2 > (size_t)st.st_size)
|
|
8694
|
+
w2 = st.st_size;
|
|
8695
|
+
}
|
|
8696
|
+
#endif
|
|
8697
|
+
wsize = w2 - wsize;
|
|
8698
|
+
while (wsize > 0) {
|
|
8699
|
+
if (wsize > MAX_WRITE)
|
|
8700
|
+
w2 = MAX_WRITE;
|
|
8701
|
+
else
|
|
8702
|
+
w2 = wsize;
|
|
8703
|
+
DO_WRITE(rc, fd, ptr, w2, len);
|
|
8704
|
+
if (!rc) {
|
|
8705
|
+
rc = ErrCode();
|
|
8706
|
+
break;
|
|
8707
|
+
} else if (len > 0) {
|
|
8708
|
+
rc = MDB_SUCCESS;
|
|
8709
|
+
ptr += len;
|
|
8710
|
+
wsize -= len;
|
|
8711
|
+
continue;
|
|
8712
|
+
} else {
|
|
8713
|
+
rc = EIO;
|
|
8714
|
+
break;
|
|
8715
|
+
}
|
|
8716
|
+
}
|
|
8717
|
+
|
|
8718
|
+
leave:
|
|
8719
|
+
mdb_txn_abort(txn);
|
|
8720
|
+
return rc;
|
|
8721
|
+
}
|
|
8722
|
+
|
|
8723
|
+
int ESECT
|
|
8724
|
+
mdb_env_copyfd2(MDB_env *env, HANDLE fd, unsigned int flags)
|
|
8725
|
+
{
|
|
8726
|
+
if (flags & MDB_CP_COMPACT)
|
|
8727
|
+
return mdb_env_copyfd1(env, fd);
|
|
8728
|
+
else
|
|
8729
|
+
return mdb_env_copyfd0(env, fd);
|
|
8730
|
+
}
|
|
8731
|
+
|
|
8732
|
+
int ESECT
|
|
8733
|
+
mdb_env_copyfd(MDB_env *env, HANDLE fd)
|
|
8734
|
+
{
|
|
8735
|
+
return mdb_env_copyfd2(env, fd, 0);
|
|
8736
|
+
}
|
|
8737
|
+
|
|
8738
|
+
int ESECT
|
|
8739
|
+
mdb_env_copy2(MDB_env *env, const char *path, unsigned int flags)
|
|
8740
|
+
{
|
|
8741
|
+
int rc, len;
|
|
8742
|
+
char *lpath;
|
|
8743
|
+
HANDLE newfd = INVALID_HANDLE_VALUE;
|
|
8744
|
+
|
|
8745
|
+
if (env->me_flags & MDB_NOSUBDIR) {
|
|
8746
|
+
lpath = (char *)path;
|
|
8747
|
+
} else {
|
|
8748
|
+
len = strlen(path);
|
|
8749
|
+
len += sizeof(DATANAME);
|
|
8750
|
+
lpath = malloc(len);
|
|
8751
|
+
if (!lpath)
|
|
8752
|
+
return ENOMEM;
|
|
8753
|
+
sprintf(lpath, "%s" DATANAME, path);
|
|
8754
|
+
}
|
|
8755
|
+
|
|
8756
|
+
/* The destination path must exist, but the destination file must not.
|
|
8757
|
+
* We don't want the OS to cache the writes, since the source data is
|
|
8758
|
+
* already in the OS cache.
|
|
8759
|
+
*/
|
|
8760
|
+
#ifdef _WIN32
|
|
8761
|
+
newfd = CreateFile(lpath, GENERIC_WRITE, 0, NULL, CREATE_NEW,
|
|
8762
|
+
FILE_FLAG_NO_BUFFERING|FILE_FLAG_WRITE_THROUGH, NULL);
|
|
8763
|
+
#else
|
|
8764
|
+
newfd = open(lpath, O_WRONLY|O_CREAT|O_EXCL, 0666);
|
|
8765
|
+
#endif
|
|
8766
|
+
if (newfd == INVALID_HANDLE_VALUE) {
|
|
8767
|
+
rc = ErrCode();
|
|
8768
|
+
goto leave;
|
|
8769
|
+
}
|
|
8770
|
+
|
|
8771
|
+
if (env->me_psize >= env->me_os_psize) {
|
|
8772
|
+
#ifdef O_DIRECT
|
|
8773
|
+
/* Set O_DIRECT if the file system supports it */
|
|
8774
|
+
if ((rc = fcntl(newfd, F_GETFL)) != -1)
|
|
8775
|
+
(void) fcntl(newfd, F_SETFL, rc | O_DIRECT);
|
|
8776
|
+
#endif
|
|
8777
|
+
#ifdef F_NOCACHE /* __APPLE__ */
|
|
8778
|
+
rc = fcntl(newfd, F_NOCACHE, 1);
|
|
8779
|
+
if (rc) {
|
|
8780
|
+
rc = ErrCode();
|
|
8781
|
+
goto leave;
|
|
8782
|
+
}
|
|
8783
|
+
#endif
|
|
8784
|
+
}
|
|
8785
|
+
|
|
8786
|
+
rc = mdb_env_copyfd2(env, newfd, flags);
|
|
8787
|
+
|
|
8788
|
+
leave:
|
|
8789
|
+
if (!(env->me_flags & MDB_NOSUBDIR))
|
|
8790
|
+
free(lpath);
|
|
8791
|
+
if (newfd != INVALID_HANDLE_VALUE)
|
|
8792
|
+
if (close(newfd) < 0 && rc == MDB_SUCCESS)
|
|
8793
|
+
rc = ErrCode();
|
|
8794
|
+
|
|
8795
|
+
return rc;
|
|
8796
|
+
}
|
|
8797
|
+
|
|
8798
|
+
int ESECT
|
|
8799
|
+
mdb_env_copy(MDB_env *env, const char *path)
|
|
8800
|
+
{
|
|
8801
|
+
return mdb_env_copy2(env, path, 0);
|
|
8802
|
+
}
|
|
8803
|
+
|
|
8804
|
+
int ESECT
|
|
8041
8805
|
mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
|
|
8042
8806
|
{
|
|
8043
8807
|
if ((flag & CHANGEABLE) != flag)
|
|
@@ -8049,7 +8813,7 @@ mdb_env_set_flags(MDB_env *env, unsigned int flag, int onoff)
|
|
|
8049
8813
|
return MDB_SUCCESS;
|
|
8050
8814
|
}
|
|
8051
8815
|
|
|
8052
|
-
int
|
|
8816
|
+
int ESECT
|
|
8053
8817
|
mdb_env_get_flags(MDB_env *env, unsigned int *arg)
|
|
8054
8818
|
{
|
|
8055
8819
|
if (!env || !arg)
|
|
@@ -8059,7 +8823,7 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg)
|
|
|
8059
8823
|
return MDB_SUCCESS;
|
|
8060
8824
|
}
|
|
8061
8825
|
|
|
8062
|
-
int
|
|
8826
|
+
int ESECT
|
|
8063
8827
|
mdb_env_set_userctx(MDB_env *env, void *ctx)
|
|
8064
8828
|
{
|
|
8065
8829
|
if (!env)
|
|
@@ -8068,13 +8832,13 @@ mdb_env_set_userctx(MDB_env *env, void *ctx)
|
|
|
8068
8832
|
return MDB_SUCCESS;
|
|
8069
8833
|
}
|
|
8070
8834
|
|
|
8071
|
-
void *
|
|
8835
|
+
void * ESECT
|
|
8072
8836
|
mdb_env_get_userctx(MDB_env *env)
|
|
8073
8837
|
{
|
|
8074
8838
|
return env ? env->me_userctx : NULL;
|
|
8075
8839
|
}
|
|
8076
8840
|
|
|
8077
|
-
int
|
|
8841
|
+
int ESECT
|
|
8078
8842
|
mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
|
|
8079
8843
|
{
|
|
8080
8844
|
if (!env)
|
|
@@ -8085,7 +8849,7 @@ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
|
|
|
8085
8849
|
return MDB_SUCCESS;
|
|
8086
8850
|
}
|
|
8087
8851
|
|
|
8088
|
-
int
|
|
8852
|
+
int ESECT
|
|
8089
8853
|
mdb_env_get_path(MDB_env *env, const char **arg)
|
|
8090
8854
|
{
|
|
8091
8855
|
if (!env || !arg)
|
|
@@ -8095,7 +8859,7 @@ mdb_env_get_path(MDB_env *env, const char **arg)
|
|
|
8095
8859
|
return MDB_SUCCESS;
|
|
8096
8860
|
}
|
|
8097
8861
|
|
|
8098
|
-
int
|
|
8862
|
+
int ESECT
|
|
8099
8863
|
mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg)
|
|
8100
8864
|
{
|
|
8101
8865
|
if (!env || !arg)
|
|
@@ -8111,7 +8875,7 @@ mdb_env_get_fd(MDB_env *env, mdb_filehandle_t *arg)
|
|
|
8111
8875
|
* @param[out] arg the address of an #MDB_stat structure to receive the stats.
|
|
8112
8876
|
* @return 0, this function always succeeds.
|
|
8113
8877
|
*/
|
|
8114
|
-
static int
|
|
8878
|
+
static int ESECT
|
|
8115
8879
|
mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
|
|
8116
8880
|
{
|
|
8117
8881
|
arg->ms_psize = env->me_psize;
|
|
@@ -8123,7 +8887,8 @@ mdb_stat0(MDB_env *env, MDB_db *db, MDB_stat *arg)
|
|
|
8123
8887
|
|
|
8124
8888
|
return MDB_SUCCESS;
|
|
8125
8889
|
}
|
|
8126
|
-
|
|
8890
|
+
|
|
8891
|
+
int ESECT
|
|
8127
8892
|
mdb_env_stat(MDB_env *env, MDB_stat *arg)
|
|
8128
8893
|
{
|
|
8129
8894
|
int toggle;
|
|
@@ -8136,7 +8901,7 @@ mdb_env_stat(MDB_env *env, MDB_stat *arg)
|
|
|
8136
8901
|
return mdb_stat0(env, &env->me_metas[toggle]->mm_dbs[MAIN_DBI], arg);
|
|
8137
8902
|
}
|
|
8138
8903
|
|
|
8139
|
-
int
|
|
8904
|
+
int ESECT
|
|
8140
8905
|
mdb_env_info(MDB_env *env, MDB_envinfo *arg)
|
|
8141
8906
|
{
|
|
8142
8907
|
int toggle;
|
|
@@ -8145,7 +8910,7 @@ mdb_env_info(MDB_env *env, MDB_envinfo *arg)
|
|
|
8145
8910
|
return EINVAL;
|
|
8146
8911
|
|
|
8147
8912
|
toggle = mdb_env_pick_meta(env);
|
|
8148
|
-
arg->me_mapaddr =
|
|
8913
|
+
arg->me_mapaddr = env->me_metas[toggle]->mm_address;
|
|
8149
8914
|
arg->me_mapsize = env->me_mapsize;
|
|
8150
8915
|
arg->me_maxreaders = env->me_maxreaders;
|
|
8151
8916
|
|
|
@@ -8187,8 +8952,9 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
|
8187
8952
|
MDB_val key, data;
|
|
8188
8953
|
MDB_dbi i;
|
|
8189
8954
|
MDB_cursor mc;
|
|
8955
|
+
MDB_db dummy;
|
|
8190
8956
|
int rc, dbflag, exact;
|
|
8191
|
-
unsigned int unused = 0;
|
|
8957
|
+
unsigned int unused = 0, seq;
|
|
8192
8958
|
size_t len;
|
|
8193
8959
|
|
|
8194
8960
|
if (txn->mt_dbxs[FREE_DBI].md_cmp == NULL) {
|
|
@@ -8256,7 +9022,6 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
|
8256
9022
|
return MDB_INCOMPATIBLE;
|
|
8257
9023
|
} else if (rc == MDB_NOTFOUND && (flags & MDB_CREATE)) {
|
|
8258
9024
|
/* Create if requested */
|
|
8259
|
-
MDB_db dummy;
|
|
8260
9025
|
data.mv_size = sizeof(MDB_db);
|
|
8261
9026
|
data.mv_data = &dummy;
|
|
8262
9027
|
memset(&dummy, 0, sizeof(dummy));
|
|
@@ -8273,6 +9038,12 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
|
|
|
8273
9038
|
txn->mt_dbxs[slot].md_name.mv_size = len;
|
|
8274
9039
|
txn->mt_dbxs[slot].md_rel = NULL;
|
|
8275
9040
|
txn->mt_dbflags[slot] = dbflag;
|
|
9041
|
+
/* txn-> and env-> are the same in read txns, use
|
|
9042
|
+
* tmp variable to avoid undefined assignment
|
|
9043
|
+
*/
|
|
9044
|
+
seq = ++txn->mt_env->me_dbiseqs[slot];
|
|
9045
|
+
txn->mt_dbiseqs[slot] = seq;
|
|
9046
|
+
|
|
8276
9047
|
memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
|
|
8277
9048
|
*dbi = slot;
|
|
8278
9049
|
mdb_default_cmp(txn, slot);
|
|
@@ -8307,10 +9078,14 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
|
|
|
8307
9078
|
if (dbi <= MAIN_DBI || dbi >= env->me_maxdbs)
|
|
8308
9079
|
return;
|
|
8309
9080
|
ptr = env->me_dbxs[dbi].md_name.mv_data;
|
|
8310
|
-
|
|
8311
|
-
|
|
8312
|
-
|
|
8313
|
-
|
|
9081
|
+
/* If there was no name, this was already closed */
|
|
9082
|
+
if (ptr) {
|
|
9083
|
+
env->me_dbxs[dbi].md_name.mv_data = NULL;
|
|
9084
|
+
env->me_dbxs[dbi].md_name.mv_size = 0;
|
|
9085
|
+
env->me_dbflags[dbi] = 0;
|
|
9086
|
+
env->me_dbiseqs[dbi]++;
|
|
9087
|
+
free(ptr);
|
|
9088
|
+
}
|
|
8314
9089
|
}
|
|
8315
9090
|
|
|
8316
9091
|
int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags)
|
|
@@ -8420,6 +9195,9 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
|
|
|
8420
9195
|
if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
|
|
8421
9196
|
return EACCES;
|
|
8422
9197
|
|
|
9198
|
+
if (dbi > MAIN_DBI && TXN_DBI_CHANGED(txn, dbi))
|
|
9199
|
+
return MDB_BAD_DBI;
|
|
9200
|
+
|
|
8423
9201
|
rc = mdb_cursor_open(txn, dbi, &mc);
|
|
8424
9202
|
if (rc)
|
|
8425
9203
|
return rc;
|
|
@@ -8493,12 +9271,14 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
|
|
|
8493
9271
|
return MDB_SUCCESS;
|
|
8494
9272
|
}
|
|
8495
9273
|
|
|
8496
|
-
int
|
|
9274
|
+
int ESECT
|
|
9275
|
+
mdb_env_get_maxkeysize(MDB_env *env)
|
|
8497
9276
|
{
|
|
8498
9277
|
return ENV_MAXKEY(env);
|
|
8499
9278
|
}
|
|
8500
9279
|
|
|
8501
|
-
int
|
|
9280
|
+
int ESECT
|
|
9281
|
+
mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
|
8502
9282
|
{
|
|
8503
9283
|
unsigned int i, rdrs;
|
|
8504
9284
|
MDB_reader *mr;
|
|
@@ -8538,7 +9318,8 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
|
|
|
8538
9318
|
/** Insert pid into list if not already present.
|
|
8539
9319
|
* return -1 if already present.
|
|
8540
9320
|
*/
|
|
8541
|
-
static int
|
|
9321
|
+
static int ESECT
|
|
9322
|
+
mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
|
|
8542
9323
|
{
|
|
8543
9324
|
/* binary search of pid in list */
|
|
8544
9325
|
unsigned base = 0;
|
|
@@ -8574,7 +9355,8 @@ static int mdb_pid_insert(MDB_PID_T *ids, MDB_PID_T pid)
|
|
|
8574
9355
|
return 0;
|
|
8575
9356
|
}
|
|
8576
9357
|
|
|
8577
|
-
int
|
|
9358
|
+
int ESECT
|
|
9359
|
+
mdb_reader_check(MDB_env *env, int *dead)
|
|
8578
9360
|
{
|
|
8579
9361
|
unsigned int i, j, rdrs;
|
|
8580
9362
|
MDB_reader *mr;
|