lmdb 0.4.1 → 0.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 92f31229585bce51aaa1f94fab800c5f98488869
4
- data.tar.gz: e57e72dac8a031aeb3163c917252160b2c3c21c2
3
+ metadata.gz: 19495dad2d084f39462d5938acbc27ad061f797e
4
+ data.tar.gz: 8dfca9b055ccf2fa62a8bddbd822b8a14b0cb6f5
5
5
  SHA512:
6
- metadata.gz: c7fe9fba9eae7efe4c0a15387906dd006e7f1f9659b8df9b8d7459386e9ea833df4b32e3df8d131d060299d6e948212d26dca9cb2b593d3f9106d55530b4c1b8
7
- data.tar.gz: fc1108b3f94451fd44b29b9f37a30d0cd6641c8c5193552751f970ed46b67f91da32a7da3b044d4443371e515d781b537f6cc2df975169ea1beb6e50bc338514
6
+ metadata.gz: 9d671fbfae42a4d3ed1022deb479cebc57e31b740ba86fb2f125960b8fee38f8020fa0f008b2a35b9a6b2e62b6497b31bcb656dba9f95caafeab1d1f90710d54
7
+ data.tar.gz: 9912e57edd9db4d966fc13c2603488cc1f50acd0560b678e98a5eb965d285b2974b1c06874d89ac69cead95b42b64634b3c2d8d240df81710693b309a792bfdc
@@ -5,11 +5,9 @@ rvm:
5
5
  - 2.0.0
6
6
  - 2.1.0
7
7
  - ruby-head
8
- - rbx-18mode
9
- - rbx-19mode
8
+ - rbx
10
9
  matrix:
11
10
  allow_failures:
12
11
  - rvm: ruby-head
13
12
  - rvm: 1.8.7
14
- - rvm: rbx-18mode
15
- - rvm: rbx-19mode
13
+ - rvm: rbx
data/CHANGES CHANGED
@@ -1,3 +1,8 @@
1
+ 0.4.2
2
+
3
+ * Fix #11, #12, #14.
4
+ * Import lmdb 0.9.12 source.
5
+
1
6
  0.4.1
2
7
 
3
8
  * Fix #10
@@ -4,3 +4,4 @@ Evgeniy Dolzhenko <evgeniy.dolzhenko@blacksquaremedia.com>
4
4
  Julien Ammous <schmurfy@gmail.com>
5
5
  Nathaniel Pierce <nwpierce@gmail.com>
6
6
  Richard Golding <golding@chrysaetos.org>
7
+ Joel VanderWerf <vjoel@users.sourceforge.net>
data/Rakefile CHANGED
@@ -1,5 +1,9 @@
1
1
  #!/usr/bin/env rake
2
2
 
3
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
4
+ GEMSPEC = Dir['*.gemspec'].first
5
+ PRJ = File.basename(GEMSPEC, ".gemspec")
6
+
3
7
  require 'bundler/setup'
4
8
  require 'rspec/core/rake_task'
5
9
  require 'rake/extensiontask'
@@ -8,3 +12,58 @@ RSpec::Core::RakeTask.new :spec
8
12
  Rake::ExtensionTask.new :lmdb_ext
9
13
 
10
14
  task :default => [:compile, :spec]
15
+
16
+ def version
17
+ @version ||= begin
18
+ require "#{PRJ}/version"
19
+ warn "LMDB::VERSION not a string" unless LMDB::VERSION.kind_of? String
20
+ LMDB::VERSION
21
+ end
22
+ end
23
+
24
+ def tag
25
+ @tag ||= "v#{version}"
26
+ end
27
+
28
+ def latest
29
+ @latest ||= `git describe --abbrev=0 --tags --match 'v*'`.chomp
30
+ end
31
+
32
+ desc "Commit, tag, and push repo; build and push gem"
33
+ task :release => "release:is_new_version" do
34
+ require 'tempfile'
35
+
36
+ sh "gem build #{GEMSPEC}"
37
+
38
+ file = Tempfile.new "template"
39
+ begin
40
+ file.puts "release #{version}"
41
+ file.close
42
+ sh "git commit --allow-empty -a -v -t #{file.path}"
43
+ ensure
44
+ file.close unless file.closed?
45
+ file.unlink
46
+ end
47
+
48
+ sh "git tag #{tag}"
49
+ sh "git push"
50
+ sh "git push --tags"
51
+
52
+ sh "gem push #{tag}.gem"
53
+ end
54
+
55
+ namespace :release do
56
+ desc "Diff to latest release"
57
+ task :diff do
58
+ sh "git diff #{latest}"
59
+ end
60
+
61
+ desc "Log to latest release"
62
+ task :log do
63
+ sh "git log #{latest}.."
64
+ end
65
+
66
+ task :is_new_version do
67
+ abort "#{tag} exists; update version!" unless `git tag -l #{tag}`.empty?
68
+ end
69
+ end
@@ -1,5 +1,29 @@
1
1
  LMDB 0.9 Change Log
2
2
 
3
+ LMDB 0.9.12 Release (2014/06/13)
4
+ Fix MDB_GET_BOTH regression (ITS#7875,#7681)
5
+ Fix MDB_MULTIPLE writing multiple keys (ITS#7834)
6
+ Fix mdb_rebalance (ITS#7829)
7
+ Fix mdb_page_split (ITS#7815)
8
+ Fix md_entries count (ITS#7861,#7828,#7793)
9
+ Fix MDB_CURRENT (ITS#7793)
10
+ Fix possible crash on Windows DLL detach
11
+ Misc code cleanup
12
+ Documentation
13
+ mdb_cursor_put: cursor moves on error (ITS#7771)
14
+
15
+
16
+ LMDB 0.9.11 Release (2014/01/15)
17
+ Add mdb_env_set_assert() (ITS#7775)
18
+ Fix: invalidate txn on page allocation errors (ITS#7377)
19
+ Fix xcursor tracking in mdb_cursor_del0() (ITS#7771)
20
+ Fix corruption from deletes (ITS#7756)
21
+ Fix Windows/MSVC build issues
22
+ Raise safe limit of max MDB_MAXKEYSIZE
23
+ Misc code cleanup
24
+ Documentation
25
+ Remove spurious note about non-overlapping flags (ITS#7665)
26
+
3
27
  LMDB 0.9.10 Release (2013/11/12)
4
28
  Add MDB_NOMEMINIT option
5
29
  Fix mdb_page_split() again (ITS#7589)
@@ -16,7 +40,7 @@ LMDB 0.9.9 Release (2013/10/24)
16
40
  Fix mdb_page_merge() cursor fixup (ITS#7722)
17
41
  Fix mdb_cursor_del() on last delete (ITS#7718)
18
42
  Fix adding WRITEMAP on existing env (ITS#7715)
19
- Fixes for nested txns (ITS#7515)
43
+ Fix nested txns (ITS#7515)
20
44
  Fix mdb_env_copy() O_DIRECT bug (ITS#7682)
21
45
  Fix mdb_cursor_set(SET_RANGE) return code (ITS#7681)
22
46
  Fix mdb_rebalance() cursor fixup (ITS#7701)
@@ -1,4 +1,4 @@
1
- Copyright 2011-2013 Howard Chu, Symas Corp.
1
+ Copyright 2011-2014 Howard Chu, Symas Corp.
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
@@ -119,7 +119,7 @@
119
119
  *
120
120
  * @author Howard Chu, Symas Corporation.
121
121
  *
122
- * @copyright Copyright 2011-2013 Howard Chu, Symas Corp. All rights reserved.
122
+ * @copyright Copyright 2011-2014 Howard Chu, Symas Corp. All rights reserved.
123
123
  *
124
124
  * Redistribution and use in source and binary forms, with or without
125
125
  * modification, are permitted only as authorized by the OpenLDAP
@@ -184,7 +184,7 @@ typedef int mdb_filehandle_t;
184
184
  /** Library minor version */
185
185
  #define MDB_VERSION_MINOR 9
186
186
  /** Library patch version */
187
- #define MDB_VERSION_PATCH 10
187
+ #define MDB_VERSION_PATCH 12
188
188
 
189
189
  /** Combine args a,b,c into a single integer for easy version comparisons */
190
190
  #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
@@ -194,7 +194,7 @@ typedef int mdb_filehandle_t;
194
194
  MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
195
195
 
196
196
  /** The release date of this library version */
197
- #define MDB_VERSION_DATE "November 11, 2013"
197
+ #define MDB_VERSION_DATE "June 13, 2014"
198
198
 
199
199
  /** A stringifier for the version info */
200
200
  #define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
@@ -263,8 +263,6 @@ typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
263
263
  typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);
264
264
 
265
265
  /** @defgroup mdb_env Environment Flags
266
- *
267
- * Values do not overlap Database Flags.
268
266
  * @{
269
267
  */
270
268
  /** mmap at a fixed address (experimental) */
@@ -292,8 +290,6 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
292
290
  /** @} */
293
291
 
294
292
  /** @defgroup mdb_dbi_open Database Flags
295
- *
296
- * Values do not overlap Environment Flags.
297
293
  * @{
298
294
  */
299
295
  /** use reverse string keys */
@@ -412,7 +408,7 @@ typedef enum MDB_cursor_op {
412
408
  #define MDB_BAD_RSLOT (-30783)
413
409
  /** Transaction cannot recover - it must be aborted */
414
410
  #define MDB_BAD_TXN (-30782)
415
- /** Too big key/data, key is empty, or wrong DUPFIXED size */
411
+ /** Unsupported size of key/DB name/data, or wrong DUPFIXED size */
416
412
  #define MDB_BAD_VALSIZE (-30781)
417
413
  #define MDB_LAST_ERRCODE MDB_BAD_VALSIZE
418
414
  /** @} */
@@ -672,7 +668,8 @@ void mdb_env_close(MDB_env *env);
672
668
  /** @brief Set environment flags.
673
669
  *
674
670
  * This may be used to set some flags in addition to those from
675
- * #mdb_env_open(), or to unset these flags.
671
+ * #mdb_env_open(), or to unset these flags. If several threads
672
+ * change the flags at the same time, the result is undefined.
676
673
  * @param[in] env An environment handle returned by #mdb_env_create()
677
674
  * @param[in] flags The flags to change, bitwise OR'ed together
678
675
  * @param[in] onoff A non-zero value sets the flags, zero clears them.
@@ -787,6 +784,10 @@ int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
787
784
  * environment. Simpler applications that use the environment as a single
788
785
  * unnamed database can ignore this option.
789
786
  * This function may only be called after #mdb_env_create() and before #mdb_env_open().
787
+ *
788
+ * Currently a moderate number of slots are cheap but a huge number gets
789
+ * expensive: 7-120 words per transaction, and every #mdb_dbi_open()
790
+ * does a linear search of the opened slots.
790
791
  * @param[in] env An environment handle returned by #mdb_env_create()
791
792
  * @param[in] dbs The maximum number of databases
792
793
  * @return A non-zero error value on failure and 0 on success. Some possible
@@ -797,15 +798,47 @@ int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
797
798
  */
798
799
  int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
799
800
 
800
- /** @brief Get the maximum size of a key for the environment.
801
+ /** @brief Get the maximum size of keys and #MDB_DUPSORT data we can write.
801
802
  *
802
- * This is the compile-time constant #MDB_MAXKEYSIZE, default 511.
803
+ * Depends on the compile-time constant #MDB_MAXKEYSIZE. Default 511.
803
804
  * See @ref MDB_val.
804
805
  * @param[in] env An environment handle returned by #mdb_env_create()
805
- * @return The maximum size of a key
806
+ * @return The maximum size of a key we can write
806
807
  */
807
808
  int mdb_env_get_maxkeysize(MDB_env *env);
808
809
 
810
+ /** @brief Set application information associated with the #MDB_env.
811
+ *
812
+ * @param[in] env An environment handle returned by #mdb_env_create()
813
+ * @param[in] ctx An arbitrary pointer for whatever the application needs.
814
+ * @return A non-zero error value on failure and 0 on success.
815
+ */
816
+ int mdb_env_set_userctx(MDB_env *env, void *ctx);
817
+
818
+ /** @brief Get the application information associated with the #MDB_env.
819
+ *
820
+ * @param[in] env An environment handle returned by #mdb_env_create()
821
+ * @return The pointer set by #mdb_env_set_userctx().
822
+ */
823
+ void *mdb_env_get_userctx(MDB_env *env);
824
+
825
+ /** @brief A callback function for most MDB assert() failures,
826
+ * called before printing the message and aborting.
827
+ *
828
+ * @param[in] env An environment handle returned by #mdb_env_create().
829
+ * @param[in] msg The assertion message, not including newline.
830
+ */
831
+ typedef void MDB_assert_func(MDB_env *env, const char *msg);
832
+
833
+ /** Set or reset the assert() callback of the environment.
834
+ * Disabled if liblmdb is buillt with NDEBUG.
835
+ * @note This hack should become obsolete as lmdb's error handling matures.
836
+ * @param[in] env An environment handle returned by #mdb_env_create().
837
+ * @param[in] func An #MDB_assert_func function, or 0.
838
+ * @return A non-zero error value on failure and 0 on success.
839
+ */
840
+ int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
841
+
809
842
  /** @brief Create a transaction for use with the environment.
810
843
  *
811
844
  * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
@@ -922,7 +955,7 @@ int mdb_txn_renew(MDB_txn *txn);
922
955
  * independently of whether such a database exists.
923
956
  * The database handle may be discarded by calling #mdb_dbi_close().
924
957
  * The old database handle is returned if the database was already open.
925
- * The handle must only be closed once.
958
+ * The handle may only be closed once.
926
959
  * The database handle will be private to the current transaction until
927
960
  * the transaction is successfully committed. If the transaction is
928
961
  * aborted the handle will be closed automatically.
@@ -934,7 +967,8 @@ int mdb_txn_renew(MDB_txn *txn);
934
967
  * use this function.
935
968
  *
936
969
  * To use named databases (with name != NULL), #mdb_env_set_maxdbs()
937
- * must be called before opening the environment.
970
+ * must be called before opening the environment. Database names
971
+ * are kept as keys in the unnamed database.
938
972
  * @param[in] txn A transaction handle returned by #mdb_txn_begin()
939
973
  * @param[in] name The name of the database to open. If only a single
940
974
  * database is needed in the environment, this value may be NULL.
@@ -1004,12 +1038,19 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
1004
1038
  */
1005
1039
  int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags);
1006
1040
 
1007
- /** @brief Close a database handle.
1041
+ /** @brief Close a database handle. Normally unnecessary. Use with care:
1008
1042
  *
1009
1043
  * This call is not mutex protected. Handles should only be closed by
1010
1044
  * a single thread, and only if no other threads are going to reference
1011
1045
  * the database handle or one of its cursors any further. Do not close
1012
1046
  * a handle if an existing transaction has modified its database.
1047
+ * Doing so can cause misbehavior from database corruption to errors
1048
+ * like MDB_BAD_VALSIZE (since the DB name is gone).
1049
+ *
1050
+ * Closing a database handle is not necessary, but lets #mdb_dbi_open()
1051
+ * reuse the handle value. Usually it's better to set a bigger
1052
+ * #mdb_env_set_maxdbs(), unless that value would be large.
1053
+ *
1013
1054
  * @param[in] env An environment handle returned by #mdb_env_create()
1014
1055
  * @param[in] dbi A database handle returned by #mdb_dbi_open()
1015
1056
  */
@@ -1017,6 +1058,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi);
1017
1058
 
1018
1059
  /** @brief Empty or delete+close a database.
1019
1060
  *
1061
+ * See #mdb_dbi_close() for restrictions about closing the DB handle.
1020
1062
  * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1021
1063
  * @param[in] dbi A database handle returned by #mdb_dbi_open()
1022
1064
  * @param[in] del 0 to empty the DB, 1 to delete it from the
@@ -1294,9 +1336,9 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
1294
1336
  /** @brief Store by cursor.
1295
1337
  *
1296
1338
  * This function stores key/data pairs into the database.
1297
- * If the function fails for any reason, the state of the cursor will be
1298
- * unchanged. If the function succeeds and an item is inserted into the
1299
- * database, the cursor is always positioned to refer to the newly inserted item.
1339
+ * The cursor is positioned at the new item, or on failure usually near it.
1340
+ * @note Earlier documentation incorrectly said errors would leave the
1341
+ * state of the cursor unchanged.
1300
1342
  * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1301
1343
  * @param[in] key The key operated on.
1302
1344
  * @param[in] data The data operated on.
@@ -1305,7 +1347,9 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
1305
1347
  * <ul>
1306
1348
  * <li>#MDB_CURRENT - overwrite the data of the key/data pair to which
1307
1349
  * the cursor refers with the specified data item. The \b key
1308
- * parameter is ignored.
1350
+ * parameter is not used for positioning the cursor, but should
1351
+ * still be provided. If using sorted duplicates (#MDB_DUPSORT)
1352
+ * the data item must still sort into the same place.
1309
1353
  * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
1310
1354
  * already appear in the database. This flag may only be specified
1311
1355
  * if the database was opened with #MDB_DUPSORT. The function will
@@ -1409,7 +1453,7 @@ int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
1409
1453
  *
1410
1454
  * @param[in] msg The string to be printed.
1411
1455
  * @param[in] ctx An arbitrary context pointer for the callback.
1412
- * @return < 0 on failure, 0 on success.
1456
+ * @return < 0 on failure, >= 0 on success.
1413
1457
  */
1414
1458
  typedef int (MDB_msg_func)(const char *msg, void *ctx);
1415
1459
 
@@ -1418,7 +1462,7 @@ typedef int (MDB_msg_func)(const char *msg, void *ctx);
1418
1462
  * @param[in] env An environment handle returned by #mdb_env_create()
1419
1463
  * @param[in] func A #MDB_msg_func function
1420
1464
  * @param[in] ctx Anything the message function needs
1421
- * @return < 0 on failure, 0 on success.
1465
+ * @return < 0 on failure, >= 0 on success.
1422
1466
  */
1423
1467
  int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
1424
1468
 
@@ -65,7 +65,6 @@
65
65
  #include <fcntl.h>
66
66
  #endif
67
67
 
68
- #include <assert.h>
69
68
  #include <errno.h>
70
69
  #include <limits.h>
71
70
  #include <stddef.h>
@@ -149,13 +148,24 @@
149
148
  /** @defgroup internal MDB Internals
150
149
  * @{
151
150
  */
152
- /** @defgroup compat Windows Compatibility Macros
151
+ /** @defgroup compat Compatibility Macros
153
152
  * A bunch of macros to minimize the amount of platform-specific ifdefs
154
153
  * needed throughout the rest of the code. When the features this library
155
154
  * needs are similar enough to POSIX to be hidden in a one-or-two line
156
155
  * replacement, this macro approach is used.
157
156
  * @{
158
157
  */
158
+
159
+ /** Wrapper around __func__, which is a C99 feature */
160
+ #if __STDC_VERSION__ >= 199901L
161
+ # define mdb_func_ __func__
162
+ #elif __GNUC__ >= 2 || _MSC_VER >= 1300
163
+ # define mdb_func_ __FUNCTION__
164
+ #else
165
+ /* If a debug message says <mdb_unknown>(), update the #if statements above */
166
+ # define mdb_func_ "<mdb_unknown>"
167
+ #endif
168
+
159
169
  #ifdef _WIN32
160
170
  #define MDB_USE_HASH 1
161
171
  #define MDB_PIDLOCK 0
@@ -327,7 +337,7 @@ static txnid_t mdb_debug_start;
327
337
  */
328
338
  # define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args))
329
339
  # define DPRINTF0(fmt, ...) \
330
- fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__)
340
+ fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__)
331
341
  #else
332
342
  # define DPRINTF(args) ((void) 0)
333
343
  #endif
@@ -342,7 +352,7 @@ static txnid_t mdb_debug_start;
342
352
 
343
353
  /** @brief The maximum size of a database page.
344
354
  *
345
- * This is 32k, since it must fit in #MDB_page.#mp_upper.
355
+ * This is 32k, since it must fit in #MDB_page.%mp_upper.
346
356
  *
347
357
  * LMDB will use database pages < OS pages if needed.
348
358
  * That causes more I/O in write transactions: The OS must
@@ -382,20 +392,25 @@ static txnid_t mdb_debug_start;
382
392
  /** The version number for a database's lockfile format. */
383
393
  #define MDB_LOCK_VERSION 1
384
394
 
385
- /** @brief The maximum size of a key in the database.
386
- *
387
- * The library rejects bigger keys, and cannot deal with records
388
- * with bigger keys stored by a library with bigger max keysize.
395
+ /** @brief The max size of a key we can write, or 0 for dynamic max.
389
396
  *
390
- * We require that keys all fit onto a regular page. This limit
391
- * could be raised a bit further if needed; to something just
392
- * under (page size / #MDB_MINKEYS / 3).
397
+ * Define this as 0 to compute the max from the page size. 511
398
+ * is default for backwards compat: liblmdb <= 0.9.10 can break
399
+ * when modifying a DB with keys/dupsort data bigger than its max.
393
400
  *
394
- * Note that data items in an #MDB_DUPSORT database are actually keys
395
- * of a subDB, so they're also limited to this size.
401
+ * Data items in an #MDB_DUPSORT database are also limited to
402
+ * this size, since they're actually keys of a sub-DB. Keys and
403
+ * #MDB_DUPSORT data items must fit on a node in a regular page.
396
404
  */
397
405
  #ifndef MDB_MAXKEYSIZE
398
406
  #define MDB_MAXKEYSIZE 511
407
+ #endif
408
+
409
+ /** The maximum size of a key we can write to the environment. */
410
+ #if MDB_MAXKEYSIZE
411
+ #define ENV_MAXKEY(env) (MDB_MAXKEYSIZE)
412
+ #else
413
+ #define ENV_MAXKEY(env) ((env)->me_maxkey)
399
414
  #endif
400
415
 
401
416
  /** @brief The maximum size of a data item.
@@ -405,11 +420,15 @@ static txnid_t mdb_debug_start;
405
420
  #define MAXDATASIZE 0xffffffffUL
406
421
 
407
422
  #if MDB_DEBUG
423
+ /** Key size which fits in a #DKBUF.
424
+ * @ingroup debug
425
+ */
426
+ #define DKBUF_MAXKEYSIZE ((MDB_MAXKEYSIZE) > 0 ? (MDB_MAXKEYSIZE) : 511)
408
427
  /** A key buffer.
409
428
  * @ingroup debug
410
429
  * This is used for printing a hex dump of a key's contents.
411
430
  */
412
- #define DKBUF char kbuf[(MDB_MAXKEYSIZE*2+1)]
431
+ #define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1]
413
432
  /** Display a key in hex.
414
433
  * @ingroup debug
415
434
  * Invoke a function to display a key in hex.
@@ -428,6 +447,9 @@ static txnid_t mdb_debug_start;
428
447
  /** Test if the flags \b f are set in a flag word \b w. */
429
448
  #define F_ISSET(w, f) (((w) & (f)) == (f))
430
449
 
450
+ /** Round \b n up to an even number. */
451
+ #define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
452
+
431
453
  /** Used for offsets within a single page.
432
454
  * Since memory pages are typically 4 or 8KB in size, 12-13 bits,
433
455
  * this is plenty.
@@ -679,7 +701,8 @@ typedef struct MDB_page {
679
701
  #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
680
702
 
681
703
  /** Header for a single key/data pair within a page.
682
- * We guarantee 2-byte alignment for nodes.
704
+ * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2.
705
+ * We guarantee 2-byte alignment for 'MDB_node's.
683
706
  */
684
707
  typedef struct MDB_node {
685
708
  /** lo and hi are used for data size on leaf nodes and for
@@ -688,9 +711,11 @@ typedef struct MDB_node {
688
711
  * They are in host byte order in case that lets some
689
712
  * accesses be optimized into a 32-bit word access.
690
713
  */
691
- #define mn_lo mn_offset[BYTE_ORDER!=LITTLE_ENDIAN]
692
- #define mn_hi mn_offset[BYTE_ORDER==LITTLE_ENDIAN] /**< part of dsize or pgno */
693
- unsigned short mn_offset[2]; /**< storage for #mn_lo and #mn_hi */
714
+ #if BYTE_ORDER == LITTLE_ENDIAN
715
+ unsigned short mn_lo, mn_hi; /**< part of data size or pgno */
716
+ #else
717
+ unsigned short mn_hi, mn_lo;
718
+ #endif
694
719
  /** @defgroup mdb_node Node Flags
695
720
  * @ingroup internal
696
721
  * Flags for node headers.
@@ -911,12 +936,12 @@ struct MDB_txn {
911
936
  * @{
912
937
  */
913
938
  #define MDB_TXN_RDONLY 0x01 /**< read-only transaction */
914
- #define MDB_TXN_ERROR 0x02 /**< an error has occurred */
939
+ #define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */
915
940
  #define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
916
941
  #define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
917
942
  /** @} */
918
943
  unsigned int mt_flags; /**< @ref mdb_txn */
919
- /** dirty_list room: Array size - #dirty pages visible to this txn.
944
+ /** #dirty_list room: Array size - \#dirty pages visible to this txn.
920
945
  * Includes ancestor txns' dirty pages not hidden by other txns'
921
946
  * dirty/spilled pages. Thus commit(nested txn) has room to merge
922
947
  * dirty_list into mt_parent after freeing hidden mt_parent pages.
@@ -1009,8 +1034,6 @@ struct MDB_env {
1009
1034
  #define MDB_ENV_ACTIVE 0x20000000U
1010
1035
  /** me_txkey is set */
1011
1036
  #define MDB_ENV_TXKEY 0x10000000U
1012
- /** Have liveness lock in reader table */
1013
- #define MDB_LIVE_READER 0x08000000U
1014
1037
  uint32_t me_flags; /**< @ref mdb_env */
1015
1038
  unsigned int me_psize; /**< DB page size, inited from me_os_psize */
1016
1039
  unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */
@@ -1043,6 +1066,10 @@ struct MDB_env {
1043
1066
  int me_maxfree_1pg;
1044
1067
  /** Max size of a node on a page */
1045
1068
  unsigned int me_nodemax;
1069
+ #if !(MDB_MAXKEYSIZE)
1070
+ unsigned int me_maxkey; /**< max size of a key */
1071
+ #endif
1072
+ int me_live_reader; /**< have liveness lock in reader table */
1046
1073
  #ifdef _WIN32
1047
1074
  int me_pidquery; /**< Used in OpenProcess */
1048
1075
  HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
@@ -1051,6 +1078,8 @@ struct MDB_env {
1051
1078
  sem_t *me_rmutex; /* Shared mutexes are not supported */
1052
1079
  sem_t *me_wmutex;
1053
1080
  #endif
1081
+ void *me_userctx; /**< User-settable context */
1082
+ MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
1054
1083
  };
1055
1084
 
1056
1085
  /** Nested transaction */
@@ -1066,9 +1095,13 @@ typedef struct MDB_ntxn {
1066
1095
  #define MDB_COMMIT_PAGES IOV_MAX
1067
1096
  #endif
1068
1097
 
1069
- /* max bytes to write in one call */
1098
+ /** max bytes to write in one call */
1070
1099
  #define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4))
1071
1100
 
1101
+ /** Check \b txn and \b dbi arguments to a function */
1102
+ #define TXN_DBI_EXIST(txn, dbi) \
1103
+ ((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID))
1104
+
1072
1105
  static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
1073
1106
  static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
1074
1107
  static int mdb_page_touch(MDB_cursor *mc);
@@ -1099,7 +1132,7 @@ static void mdb_env_close0(MDB_env *env, int excl);
1099
1132
  static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp);
1100
1133
  static int mdb_node_add(MDB_cursor *mc, indx_t indx,
1101
1134
  MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags);
1102
- static void mdb_node_del(MDB_page *mp, indx_t indx, int ksize);
1135
+ static void mdb_node_del(MDB_cursor *mc, int ksize);
1103
1136
  static void mdb_node_shrink(MDB_page *mp, indx_t indx);
1104
1137
  static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst);
1105
1138
  static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
@@ -1112,7 +1145,8 @@ static int mdb_update_key(MDB_cursor *mc, MDB_val *key);
1112
1145
  static void mdb_cursor_pop(MDB_cursor *mc);
1113
1146
  static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
1114
1147
 
1115
- static int mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf);
1148
+ static int mdb_cursor_del0(MDB_cursor *mc);
1149
+ static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags);
1116
1150
  static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
1117
1151
  static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
1118
1152
  static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
@@ -1168,7 +1202,7 @@ static char *const mdb_errstr[] = {
1168
1202
  "MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed",
1169
1203
  "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot",
1170
1204
  "MDB_BAD_TXN: Transaction cannot recover - it must be aborted",
1171
- "MDB_BAD_VALSIZE: Too big key/data, key is empty, or wrong DUPFIXED size",
1205
+ "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
1172
1206
  };
1173
1207
 
1174
1208
  char *
@@ -1186,7 +1220,43 @@ mdb_strerror(int err)
1186
1220
  return strerror(err);
1187
1221
  }
1188
1222
 
1223
+ /** assert(3) variant in cursor context */
1224
+ #define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr, #expr)
1225
+ /** assert(3) variant in transaction context */
1226
+ #define mdb_tassert(mc, expr) mdb_assert0((txn)->mt_env, expr, #expr)
1227
+ /** assert(3) variant in environment context */
1228
+ #define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr)
1229
+
1230
+ #ifndef NDEBUG
1231
+ # define mdb_assert0(env, expr, expr_txt) ((expr) ? (void)0 : \
1232
+ mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__))
1233
+
1234
+ static void
1235
+ mdb_assert_fail(MDB_env *env, const char *expr_txt,
1236
+ const char *func, const char *file, int line)
1237
+ {
1238
+ char buf[400];
1239
+ sprintf(buf, "%.100s:%d: Assertion '%.200s' failed in %.40s()",
1240
+ file, line, expr_txt, func);
1241
+ if (env->me_assert_func)
1242
+ env->me_assert_func(env, buf);
1243
+ fprintf(stderr, "%s\n", buf);
1244
+ abort();
1245
+ }
1246
+ #else
1247
+ # define mdb_assert0(env, expr, expr_txt) ((void) 0)
1248
+ #endif /* NDEBUG */
1249
+
1189
1250
  #if MDB_DEBUG
1251
+ /** Return the page number of \b mp which may be sub-page, for debug output */
1252
+ static pgno_t
1253
+ mdb_dbg_pgno(MDB_page *mp)
1254
+ {
1255
+ pgno_t ret;
1256
+ COPY_PGNO(ret, mp->mp_pgno);
1257
+ return ret;
1258
+ }
1259
+
1190
1260
  /** Display a key in hexadecimal and return the address of the result.
1191
1261
  * @param[in] key the key to display
1192
1262
  * @param[in] buf the buffer to write into. Should always be #DKBUF.
@@ -1202,7 +1272,7 @@ mdb_dkey(MDB_val *key, char *buf)
1202
1272
  if (!key)
1203
1273
  return "";
1204
1274
 
1205
- if (key->mv_size > MDB_MAXKEYSIZE)
1275
+ if (key->mv_size > DKBUF_MAXKEYSIZE)
1206
1276
  return "MDB_MAXKEYSIZE";
1207
1277
  /* may want to make this a dynamic check: if the key is mostly
1208
1278
  * printable characters, print it as-is instead of converting to hex.
@@ -1217,33 +1287,77 @@ mdb_dkey(MDB_val *key, char *buf)
1217
1287
  return buf;
1218
1288
  }
1219
1289
 
1290
+ static const char *
1291
+ mdb_leafnode_type(MDB_node *n)
1292
+ {
1293
+ static char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}};
1294
+ return F_ISSET(n->mn_flags, F_BIGDATA) ? ": overflow page" :
1295
+ tp[F_ISSET(n->mn_flags, F_DUPDATA)][F_ISSET(n->mn_flags, F_SUBDATA)];
1296
+ }
1297
+
1220
1298
  /** Display all the keys in the page. */
1221
1299
  void
1222
1300
  mdb_page_list(MDB_page *mp)
1223
1301
  {
1302
+ pgno_t pgno = mdb_dbg_pgno(mp);
1303
+ const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : "";
1224
1304
  MDB_node *node;
1225
- unsigned int i, nkeys, nsize;
1305
+ unsigned int i, nkeys, nsize, total = 0;
1226
1306
  MDB_val key;
1227
1307
  DKBUF;
1228
1308
 
1309
+ switch (mp->mp_flags & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) {
1310
+ case P_BRANCH: type = "Branch page"; break;
1311
+ case P_LEAF: type = "Leaf page"; break;
1312
+ case P_LEAF|P_SUBP: type = "Sub-page"; break;
1313
+ case P_LEAF|P_LEAF2: type = "LEAF2 page"; break;
1314
+ case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break;
1315
+ case P_OVERFLOW:
1316
+ fprintf(stderr, "Overflow page %"Z"u pages %u%s\n",
1317
+ pgno, mp->mp_pages, state);
1318
+ return;
1319
+ case P_META:
1320
+ fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n",
1321
+ pgno, ((MDB_meta *)METADATA(mp))->mm_txnid);
1322
+ return;
1323
+ default:
1324
+ fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags);
1325
+ return;
1326
+ }
1327
+
1229
1328
  nkeys = NUMKEYS(mp);
1230
- fprintf(stderr, "Page %"Z"u numkeys %d\n", mp->mp_pgno, nkeys);
1329
+ fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state);
1330
+
1231
1331
  for (i=0; i<nkeys; i++) {
1332
+ if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
1333
+ key.mv_size = nsize = mp->mp_pad;
1334
+ key.mv_data = LEAF2KEY(mp, i, nsize);
1335
+ total += nsize;
1336
+ fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
1337
+ continue;
1338
+ }
1232
1339
  node = NODEPTR(mp, i);
1233
1340
  key.mv_size = node->mn_ksize;
1234
1341
  key.mv_data = node->mn_data;
1235
- nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t);
1342
+ nsize = NODESIZE + key.mv_size;
1236
1343
  if (IS_BRANCH(mp)) {
1237
1344
  fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
1238
1345
  DKEY(&key));
1346
+ total += nsize;
1239
1347
  } else {
1240
1348
  if (F_ISSET(node->mn_flags, F_BIGDATA))
1241
1349
  nsize += sizeof(pgno_t);
1242
1350
  else
1243
1351
  nsize += NODEDSZ(node);
1244
- fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
1352
+ total += nsize;
1353
+ nsize += sizeof(indx_t);
1354
+ fprintf(stderr, "key %d: nsize %d, %s%s\n",
1355
+ i, nsize, DKEY(&key), mdb_leafnode_type(node));
1245
1356
  }
1357
+ total = EVEN(total);
1246
1358
  }
1359
+ fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
1360
+ IS_LEAF2(mp) ? PAGEHDRSZ : mp->mp_lower, total, SIZELEFT(mp));
1247
1361
  }
1248
1362
 
1249
1363
  void
@@ -1269,6 +1383,7 @@ mdb_cursor_chk(MDB_cursor *mc)
1269
1383
  /** Count all the pages in each DB and in the freelist
1270
1384
  * and make sure it matches the actual number of pages
1271
1385
  * being used.
1386
+ * All named DBs must be open for a correct count.
1272
1387
  */
1273
1388
  static void mdb_audit(MDB_txn *txn)
1274
1389
  {
@@ -1282,10 +1397,13 @@ static void mdb_audit(MDB_txn *txn)
1282
1397
  mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
1283
1398
  while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
1284
1399
  freecount += *(MDB_ID *)data.mv_data;
1400
+ mdb_tassert(txn, rc == MDB_NOTFOUND);
1285
1401
 
1286
1402
  count = 0;
1287
1403
  for (i = 0; i<txn->mt_numdbs; i++) {
1288
1404
  MDB_xcursor mx;
1405
+ if (!(txn->mt_dbflags[i] & DB_VALID))
1406
+ continue;
1289
1407
  mdb_cursor_init(&mc, txn, i, &mx);
1290
1408
  if (txn->mt_dbs[i].md_root == P_INVALID)
1291
1409
  continue;
@@ -1293,8 +1411,8 @@ static void mdb_audit(MDB_txn *txn)
1293
1411
  txn->mt_dbs[i].md_leaf_pages +
1294
1412
  txn->mt_dbs[i].md_overflow_pages;
1295
1413
  if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) {
1296
- mdb_page_search(&mc, NULL, MDB_PS_FIRST);
1297
- do {
1414
+ rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST);
1415
+ for (; rc == MDB_SUCCESS; rc = mdb_cursor_sibling(&mc, 1)) {
1298
1416
  unsigned j;
1299
1417
  MDB_page *mp;
1300
1418
  mp = mc.mc_pg[mc.mc_top];
@@ -1308,7 +1426,7 @@ static void mdb_audit(MDB_txn *txn)
1308
1426
  }
1309
1427
  }
1310
1428
  }
1311
- while (mdb_cursor_sibling(&mc, 1) == 0);
1429
+ mdb_tassert(txn, rc == MDB_NOTFOUND);
1312
1430
  }
1313
1431
  }
1314
1432
  if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) {
@@ -1357,11 +1475,13 @@ mdb_page_malloc(MDB_txn *txn, unsigned num)
1357
1475
  off = sz - psize;
1358
1476
  }
1359
1477
  if ((ret = malloc(sz)) != NULL) {
1478
+ VGMEMP_ALLOC(env, ret, sz);
1360
1479
  if (!(env->me_flags & MDB_NOMEMINIT)) {
1361
1480
  memset((char *)ret + off, 0, psize);
1362
1481
  ret->mp_pad = 0;
1363
1482
  }
1364
- VGMEMP_ALLOC(env, ret, sz);
1483
+ } else {
1484
+ txn->mt_flags |= MDB_TXN_ERROR;
1365
1485
  }
1366
1486
  return ret;
1367
1487
  }
@@ -1627,7 +1747,7 @@ static void
1627
1747
  mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
1628
1748
  {
1629
1749
  MDB_ID2 mid;
1630
- int (*insert)(MDB_ID2L, MDB_ID2 *);
1750
+ int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
1631
1751
 
1632
1752
  if (txn->mt_env->me_flags & MDB_WRITEMAP) {
1633
1753
  insert = mdb_mid2l_append;
@@ -1636,7 +1756,8 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
1636
1756
  }
1637
1757
  mid.mid = mp->mp_pgno;
1638
1758
  mid.mptr = mp;
1639
- insert(txn->mt_u.dirty_list, &mid);
1759
+ rc = insert(txn->mt_u.dirty_list, &mid);
1760
+ mdb_tassert(txn, rc == 0);
1640
1761
  txn->mt_dirty_room--;
1641
1762
  }
1642
1763
 
@@ -1669,11 +1790,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1669
1790
  #else
1670
1791
  enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
1671
1792
  #endif
1672
- int rc, n2 = num-1, retry = Max_retries;
1793
+ int rc, retry = Max_retries;
1673
1794
  MDB_txn *txn = mc->mc_txn;
1674
1795
  MDB_env *env = txn->mt_env;
1675
1796
  pgno_t pgno, *mop = env->me_pghead;
1676
- unsigned i, j, k, mop_len = mop ? mop[0] : 0;
1797
+ unsigned i, j, k, mop_len = mop ? mop[0] : 0, n2 = num-1;
1677
1798
  MDB_page *np;
1678
1799
  txnid_t oldest = 0, last;
1679
1800
  MDB_cursor_op op;
@@ -1682,8 +1803,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1682
1803
  *mp = NULL;
1683
1804
 
1684
1805
  /* If our dirty list is already full, we can't do anything */
1685
- if (txn->mt_dirty_room == 0)
1686
- return MDB_TXN_FULL;
1806
+ if (txn->mt_dirty_room == 0) {
1807
+ rc = MDB_TXN_FULL;
1808
+ goto fail;
1809
+ }
1687
1810
 
1688
1811
  for (op = MDB_FIRST;; op = MDB_NEXT) {
1689
1812
  MDB_val key, data;
@@ -1693,13 +1816,13 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1693
1816
  /* Seek a big enough contiguous page range. Prefer
1694
1817
  * pages at the tail, just truncating the list.
1695
1818
  */
1696
- if (mop_len >= (unsigned)num) {
1819
+ if (mop_len > n2) {
1697
1820
  i = mop_len;
1698
1821
  do {
1699
1822
  pgno = mop[i];
1700
1823
  if (mop[i-n2] == pgno+n2)
1701
1824
  goto search_done;
1702
- } while (--i >= (unsigned)num);
1825
+ } while (--i > n2);
1703
1826
  if (Max_retries < INT_MAX && --retry < 0)
1704
1827
  break;
1705
1828
  }
@@ -1728,7 +1851,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1728
1851
  if (rc) {
1729
1852
  if (rc == MDB_NOTFOUND)
1730
1853
  break;
1731
- return rc;
1854
+ goto fail;
1732
1855
  }
1733
1856
  last = *(txnid_t*)key.mv_data;
1734
1857
  if (oldest <= last)
@@ -1741,11 +1864,13 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1741
1864
  idl = (MDB_ID *) data.mv_data;
1742
1865
  i = idl[0];
1743
1866
  if (!mop) {
1744
- if (!(env->me_pghead = mop = mdb_midl_alloc(i)))
1745
- return ENOMEM;
1867
+ if (!(env->me_pghead = mop = mdb_midl_alloc(i))) {
1868
+ rc = ENOMEM;
1869
+ goto fail;
1870
+ }
1746
1871
  } else {
1747
1872
  if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0)
1748
- return rc;
1873
+ goto fail;
1749
1874
  mop = env->me_pghead;
1750
1875
  }
1751
1876
  env->me_pglast = last;
@@ -1774,15 +1899,18 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1774
1899
  pgno = txn->mt_next_pgno;
1775
1900
  if (pgno + num >= env->me_maxpg) {
1776
1901
  DPUTS("DB size maxed out");
1777
- return MDB_MAP_FULL;
1902
+ rc = MDB_MAP_FULL;
1903
+ goto fail;
1778
1904
  }
1779
1905
 
1780
1906
  search_done:
1781
1907
  if (env->me_flags & MDB_WRITEMAP) {
1782
1908
  np = (MDB_page *)(env->me_map + env->me_psize * pgno);
1783
1909
  } else {
1784
- if (!(np = mdb_page_malloc(txn, num)))
1785
- return ENOMEM;
1910
+ if (!(np = mdb_page_malloc(txn, num))) {
1911
+ rc = ENOMEM;
1912
+ goto fail;
1913
+ }
1786
1914
  }
1787
1915
  if (i) {
1788
1916
  mop[0] = mop_len -= num;
@@ -1797,6 +1925,10 @@ search_done:
1797
1925
  *mp = np;
1798
1926
 
1799
1927
  return MDB_SUCCESS;
1928
+
1929
+ fail:
1930
+ txn->mt_flags |= MDB_TXN_ERROR;
1931
+ return rc;
1800
1932
  }
1801
1933
 
1802
1934
  /** Copy the used portions of a non-overflow page.
@@ -1827,7 +1959,7 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
1827
1959
  * If a page being referenced was spilled to disk in this txn, bring
1828
1960
  * it back and make it dirty/writable again.
1829
1961
  * @param[in] txn the transaction handle.
1830
- * @param[in] mp the page being referenced.
1962
+ * @param[in] mp the page being referenced. It must not be dirty.
1831
1963
  * @param[out] ret the writable page, if any. ret is unchanged if
1832
1964
  * mp wasn't spilled.
1833
1965
  */
@@ -1903,17 +2035,17 @@ mdb_page_touch(MDB_cursor *mc)
1903
2035
  np = NULL;
1904
2036
  rc = mdb_page_unspill(txn, mp, &np);
1905
2037
  if (rc)
1906
- return rc;
2038
+ goto fail;
1907
2039
  if (np)
1908
2040
  goto done;
1909
2041
  }
1910
2042
  if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) ||
1911
2043
  (rc = mdb_page_alloc(mc, 1, &np)))
1912
- return rc;
2044
+ goto fail;
1913
2045
  pgno = np->mp_pgno;
1914
2046
  DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc),
1915
2047
  mp->mp_pgno, pgno));
1916
- assert(mp->mp_pgno != pgno);
2048
+ mdb_cassert(mc, mp->mp_pgno != pgno);
1917
2049
  mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
1918
2050
  /* Update the parent page, if any, to point to the new page */
1919
2051
  if (mc->mc_top) {
@@ -1934,19 +2066,21 @@ mdb_page_touch(MDB_cursor *mc)
1934
2066
  if (x <= dl[0].mid && dl[x].mid == pgno) {
1935
2067
  if (mp != dl[x].mptr) { /* bad cursor? */
1936
2068
  mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
2069
+ txn->mt_flags |= MDB_TXN_ERROR;
1937
2070
  return MDB_CORRUPTED;
1938
2071
  }
1939
2072
  return 0;
1940
2073
  }
1941
2074
  }
1942
- assert(dl[0].mid < MDB_IDL_UM_MAX);
2075
+ mdb_cassert(mc, dl[0].mid < MDB_IDL_UM_MAX);
1943
2076
  /* No - copy it */
1944
2077
  np = mdb_page_malloc(txn, 1);
1945
2078
  if (!np)
1946
2079
  return ENOMEM;
1947
2080
  mid.mid = pgno;
1948
2081
  mid.mptr = np;
1949
- mdb_mid2l_insert(dl, &mid);
2082
+ rc = mdb_mid2l_insert(dl, &mid);
2083
+ mdb_cassert(mc, rc == 0);
1950
2084
  } else {
1951
2085
  return 0;
1952
2086
  }
@@ -1972,6 +2106,7 @@ done:
1972
2106
  if (m2->mc_pg[mc->mc_top] == mp) {
1973
2107
  m2->mc_pg[mc->mc_top] = np;
1974
2108
  if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
2109
+ IS_LEAF(np) &&
1975
2110
  m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top])
1976
2111
  {
1977
2112
  MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]);
@@ -1982,6 +2117,10 @@ done:
1982
2117
  }
1983
2118
  }
1984
2119
  return 0;
2120
+
2121
+ fail:
2122
+ txn->mt_flags |= MDB_TXN_ERROR;
2123
+ return rc;
1985
2124
  }
1986
2125
 
1987
2126
  int
@@ -2177,13 +2316,11 @@ mdb_txn_renew0(MDB_txn *txn)
2177
2316
  MDB_PID_T pid = env->me_pid;
2178
2317
  pthread_t tid = pthread_self();
2179
2318
 
2180
- if (!(env->me_flags & MDB_LIVE_READER)) {
2319
+ if (!env->me_live_reader) {
2181
2320
  rc = mdb_reader_pid(env, Pidset, pid);
2182
- if (rc) {
2183
- UNLOCK_MUTEX_R(env);
2321
+ if (rc)
2184
2322
  return rc;
2185
- }
2186
- env->me_flags |= MDB_LIVE_READER;
2323
+ env->me_live_reader = 1;
2187
2324
  }
2188
2325
 
2189
2326
  LOCK_MUTEX_R(env);
@@ -2543,7 +2680,7 @@ mdb_freelist_save(MDB_txn *txn)
2543
2680
  return rc;
2544
2681
  pglast = head_id = *(txnid_t *)key.mv_data;
2545
2682
  total_room = head_room = 0;
2546
- assert(pglast <= env->me_pglast);
2683
+ mdb_tassert(txn, pglast <= env->me_pglast);
2547
2684
  rc = mdb_cursor_del(&mc, 0);
2548
2685
  if (rc)
2549
2686
  return rc;
@@ -2633,22 +2770,20 @@ mdb_freelist_save(MDB_txn *txn)
2633
2770
  mop += mop_len;
2634
2771
  rc = mdb_cursor_first(&mc, &key, &data);
2635
2772
  for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) {
2636
- unsigned flags = MDB_CURRENT;
2637
2773
  txnid_t id = *(txnid_t *)key.mv_data;
2638
2774
  ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
2639
2775
  MDB_ID save;
2640
2776
 
2641
- assert(len >= 0 && id <= env->me_pglast);
2777
+ mdb_tassert(txn, len >= 0 && id <= env->me_pglast);
2642
2778
  key.mv_data = &id;
2643
2779
  if (len > mop_len) {
2644
2780
  len = mop_len;
2645
2781
  data.mv_size = (len + 1) * sizeof(MDB_ID);
2646
- flags = 0;
2647
2782
  }
2648
2783
  data.mv_data = mop -= len;
2649
2784
  save = mop[0];
2650
2785
  mop[0] = len;
2651
- rc = mdb_cursor_put(&mc, &key, &data, flags);
2786
+ rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
2652
2787
  mop[0] = save;
2653
2788
  if (rc || !(mop_len -= len))
2654
2789
  break;
@@ -2804,8 +2939,8 @@ mdb_txn_commit(MDB_txn *txn)
2804
2939
  unsigned int i;
2805
2940
  MDB_env *env;
2806
2941
 
2807
- assert(txn != NULL);
2808
- assert(txn->mt_env != NULL);
2942
+ if (txn == NULL || txn->mt_env == NULL)
2943
+ return EINVAL;
2809
2944
 
2810
2945
  if (txn->mt_child) {
2811
2946
  rc = mdb_txn_commit(txn->mt_child);
@@ -2913,7 +3048,7 @@ mdb_txn_commit(MDB_txn *txn)
2913
3048
  if (yp == dst[x].mid)
2914
3049
  free(dst[x--].mptr);
2915
3050
  }
2916
- assert(i == x);
3051
+ mdb_tassert(txn, i == x);
2917
3052
  dst[0].mid = len;
2918
3053
  free(txn->mt_u.dirty_list);
2919
3054
  parent->mt_dirty_room = txn->mt_dirty_room;
@@ -3146,9 +3281,6 @@ mdb_env_write_meta(MDB_txn *txn)
3146
3281
  int r2;
3147
3282
  #endif
3148
3283
 
3149
- assert(txn != NULL);
3150
- assert(txn->mt_env != NULL);
3151
-
3152
3284
  toggle = txn->mt_txnid & 1;
3153
3285
  DPRINTF(("writing meta page %d for root page %"Z"u",
3154
3286
  toggle, txn->mt_dbs[MAIN_DBI].md_root));
@@ -3470,7 +3602,7 @@ mdb_env_open2(MDB_env *env)
3470
3602
  env->me_mapsize = minsize;
3471
3603
  }
3472
3604
 
3473
- rc = mdb_env_map(env, meta.mm_address, newenv);
3605
+ rc = mdb_env_map(env, meta.mm_address, newenv || env->me_mapsize != meta.mm_mapsize);
3474
3606
  if (rc)
3475
3607
  return rc;
3476
3608
 
@@ -3482,10 +3614,15 @@ mdb_env_open2(MDB_env *env)
3482
3614
  return i;
3483
3615
  }
3484
3616
  }
3485
- env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
3486
- env->me_nodemax = (env->me_psize - PAGEHDRSZ) / MDB_MINKEYS;
3487
3617
 
3618
+ env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
3619
+ env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2)
3620
+ - sizeof(indx_t);
3621
+ #if !(MDB_MAXKEYSIZE)
3622
+ env->me_maxkey = env->me_nodemax - (NODESIZE + sizeof(MDB_db));
3623
+ #endif
3488
3624
  env->me_maxpg = env->me_mapsize / env->me_psize;
3625
+
3489
3626
  #if MDB_DEBUG
3490
3627
  {
3491
3628
  int toggle = mdb_env_pick_meta(env);
@@ -3540,7 +3677,9 @@ static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr)
3540
3677
  case DLL_THREAD_DETACH:
3541
3678
  for (i=0; i<mdb_tls_nkeys; i++) {
3542
3679
  MDB_reader *r = pthread_getspecific(mdb_tls_keys[i]);
3543
- mdb_env_reader_dest(r);
3680
+ if (r) {
3681
+ mdb_env_reader_dest(r);
3682
+ }
3544
3683
  }
3545
3684
  break;
3546
3685
  case DLL_PROCESS_DETACH: break;
@@ -3985,6 +4124,10 @@ fail:
3985
4124
  #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \
3986
4125
  MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
3987
4126
 
4127
+ #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
4128
+ # error "Persistent DB flags & env flags overlap, but both go in mm_flags"
4129
+ #endif
4130
+
3988
4131
  int
3989
4132
  mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
3990
4133
  {
@@ -4403,6 +4546,13 @@ mdb_cmp_cint(const MDB_val *a, const MDB_val *b)
4403
4546
  #endif
4404
4547
  }
4405
4548
 
4549
+ /** Compare two items pointing at size_t's of unknown alignment. */
4550
+ #ifdef MISALIGNED_OK
4551
+ # define mdb_cmp_clong mdb_cmp_long
4552
+ #else
4553
+ # define mdb_cmp_clong mdb_cmp_cint
4554
+ #endif
4555
+
4406
4556
  /** Compare two items lexically */
4407
4557
  static int
4408
4558
  mdb_cmp_memn(const MDB_val *a, const MDB_val *b)
@@ -4469,17 +4619,9 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
4469
4619
 
4470
4620
  nkeys = NUMKEYS(mp);
4471
4621
 
4472
- #if MDB_DEBUG
4473
- {
4474
- pgno_t pgno;
4475
- COPY_PGNO(pgno, mp->mp_pgno);
4476
4622
  DPRINTF(("searching %u keys in %s %spage %"Z"u",
4477
4623
  nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "",
4478
- pgno));
4479
- }
4480
- #endif
4481
-
4482
- assert(nkeys > 0);
4624
+ mdb_dbg_pgno(mp)));
4483
4625
 
4484
4626
  low = IS_LEAF(mp) ? 0 : 1;
4485
4627
  high = nkeys - 1;
@@ -4543,7 +4685,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
4543
4685
  node = NODEPTR(mp, i);
4544
4686
  }
4545
4687
  if (exactp)
4546
- *exactp = (rc == 0);
4688
+ *exactp = (rc == 0 && nkeys > 0);
4547
4689
  /* store the key index */
4548
4690
  mc->mc_ki[mc->mc_top] = i;
4549
4691
  if (i >= nkeys)
@@ -4593,7 +4735,7 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
4593
4735
  DDBI(mc), (void *) mc));
4594
4736
 
4595
4737
  if (mc->mc_snum >= CURSOR_STACK) {
4596
- assert(mc->mc_snum < CURSOR_STACK);
4738
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
4597
4739
  return MDB_CURSOR_FULL;
4598
4740
  }
4599
4741
 
@@ -4653,7 +4795,7 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl)
4653
4795
  p = (MDB_page *)(env->me_map + env->me_psize * pgno);
4654
4796
  } else {
4655
4797
  DPRINTF(("page %"Z"u not found", pgno));
4656
- assert(p != NULL);
4798
+ txn->mt_flags |= MDB_TXN_ERROR;
4657
4799
  return MDB_PAGE_NOTFOUND;
4658
4800
  }
4659
4801
 
@@ -4679,7 +4821,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
4679
4821
  indx_t i;
4680
4822
 
4681
4823
  DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp)));
4682
- assert(NUMKEYS(mp) > 1);
4824
+ mdb_cassert(mc, NUMKEYS(mp) > 1);
4683
4825
  DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0))));
4684
4826
 
4685
4827
  if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) {
@@ -4694,14 +4836,14 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
4694
4836
  else {
4695
4837
  i = mc->mc_ki[mc->mc_top];
4696
4838
  if (!exact) {
4697
- assert(i > 0);
4839
+ mdb_cassert(mc, i > 0);
4698
4840
  i--;
4699
4841
  }
4700
4842
  }
4701
4843
  DPRINTF(("following index %u for key [%s]", i, DKEY(key)));
4702
4844
  }
4703
4845
 
4704
- assert(i < NUMKEYS(mp));
4846
+ mdb_cassert(mc, i < NUMKEYS(mp));
4705
4847
  node = NODEPTR(mp, i);
4706
4848
 
4707
4849
  if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
@@ -4721,6 +4863,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
4721
4863
  if (!IS_LEAF(mp)) {
4722
4864
  DPRINTF(("internal error, index points to a %02X page!?",
4723
4865
  mp->mp_flags));
4866
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
4724
4867
  return MDB_CORRUPTED;
4725
4868
  }
4726
4869
 
@@ -4815,7 +4958,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
4815
4958
  }
4816
4959
  }
4817
4960
 
4818
- assert(root > 1);
4961
+ mdb_cassert(mc, root > 1);
4819
4962
  if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
4820
4963
  if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0)
4821
4964
  return rc;
@@ -4885,7 +5028,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
4885
5028
  iy = dl[x];
4886
5029
  dl[x] = ix;
4887
5030
  } else {
4888
- assert(x > 1);
5031
+ mdb_cassert(mc, x > 1);
4889
5032
  j = ++(dl[0].mid);
4890
5033
  dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */
4891
5034
  txn->mt_flags |= MDB_TXN_ERROR;
@@ -4953,20 +5096,14 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
4953
5096
  int exact = 0;
4954
5097
  DKBUF;
4955
5098
 
4956
- assert(key);
4957
- assert(data);
4958
5099
  DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
4959
5100
 
4960
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
5101
+ if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
4961
5102
  return EINVAL;
4962
5103
 
4963
5104
  if (txn->mt_flags & MDB_TXN_ERROR)
4964
5105
  return MDB_BAD_TXN;
4965
5106
 
4966
- if (key->mv_size > MDB_MAXKEYSIZE) {
4967
- return MDB_BAD_VALSIZE;
4968
- }
4969
-
4970
5107
  mdb_cursor_init(&mc, txn, dbi, &mx);
4971
5108
  return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
4972
5109
  }
@@ -5012,7 +5149,7 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
5012
5149
  DPRINTF(("just moving to %s index key %u",
5013
5150
  move_right ? "right" : "left", mc->mc_ki[mc->mc_top]));
5014
5151
  }
5015
- assert(IS_BRANCH(mc->mc_pg[mc->mc_top]));
5152
+ mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
5016
5153
 
5017
5154
  indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5018
5155
  if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) {
@@ -5040,7 +5177,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5040
5177
  return MDB_NOTFOUND;
5041
5178
  }
5042
5179
 
5043
- assert(mc->mc_flags & C_INITIALIZED);
5180
+ mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
5044
5181
 
5045
5182
  mp = mc->mc_pg[mc->mc_top];
5046
5183
 
@@ -5062,7 +5199,8 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5062
5199
  }
5063
5200
  }
5064
5201
 
5065
- DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", mp->mp_pgno, (void *) mc));
5202
+ DPRINTF(("cursor_next: top page is %"Z"u in cursor %p",
5203
+ mdb_dbg_pgno(mp), (void *) mc));
5066
5204
  if (mc->mc_flags & C_DEL)
5067
5205
  goto skip;
5068
5206
 
@@ -5079,7 +5217,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5079
5217
 
5080
5218
  skip:
5081
5219
  DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
5082
- mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5220
+ mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5083
5221
 
5084
5222
  if (IS_LEAF2(mp)) {
5085
5223
  key->mv_size = mc->mc_db->md_pad;
@@ -5087,7 +5225,7 @@ skip:
5087
5225
  return MDB_SUCCESS;
5088
5226
  }
5089
5227
 
5090
- assert(IS_LEAF(mp));
5228
+ mdb_cassert(mc, IS_LEAF(mp));
5091
5229
  leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
5092
5230
 
5093
5231
  if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
@@ -5116,7 +5254,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5116
5254
  MDB_node *leaf;
5117
5255
  int rc;
5118
5256
 
5119
- assert(mc->mc_flags & C_INITIALIZED);
5257
+ mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
5120
5258
 
5121
5259
  mp = mc->mc_pg[mc->mc_top];
5122
5260
 
@@ -5138,7 +5276,8 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5138
5276
  }
5139
5277
  }
5140
5278
 
5141
- DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", mp->mp_pgno, (void *) mc));
5279
+ DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p",
5280
+ mdb_dbg_pgno(mp), (void *) mc));
5142
5281
 
5143
5282
  if (mc->mc_ki[mc->mc_top] == 0) {
5144
5283
  DPUTS("=====> move to prev sibling page");
@@ -5154,7 +5293,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5154
5293
  mc->mc_flags &= ~C_EOF;
5155
5294
 
5156
5295
  DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
5157
- mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5296
+ mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5158
5297
 
5159
5298
  if (IS_LEAF2(mp)) {
5160
5299
  key->mv_size = mc->mc_db->md_pad;
@@ -5162,7 +5301,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5162
5301
  return MDB_SUCCESS;
5163
5302
  }
5164
5303
 
5165
- assert(IS_LEAF(mp));
5304
+ mdb_cassert(mc, IS_LEAF(mp));
5166
5305
  leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
5167
5306
 
5168
5307
  if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
@@ -5193,8 +5332,6 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5193
5332
  MDB_node *leaf = NULL;
5194
5333
  DKBUF;
5195
5334
 
5196
- assert(mc);
5197
- assert(key);
5198
5335
  if (key->mv_size == 0)
5199
5336
  return MDB_BAD_VALSIZE;
5200
5337
 
@@ -5284,7 +5421,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5284
5421
  if (!mc->mc_top) {
5285
5422
  /* There are no other pages */
5286
5423
  mc->mc_ki[mc->mc_top] = 0;
5287
- if (op == MDB_SET_RANGE) {
5424
+ if (op == MDB_SET_RANGE && !exactp) {
5288
5425
  rc = 0;
5289
5426
  goto set1;
5290
5427
  } else
@@ -5297,7 +5434,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5297
5434
  return rc;
5298
5435
 
5299
5436
  mp = mc->mc_pg[mc->mc_top];
5300
- assert(IS_LEAF(mp));
5437
+ mdb_cassert(mc, IS_LEAF(mp));
5301
5438
 
5302
5439
  set2:
5303
5440
  leaf = mdb_node_search(mc, key, exactp);
@@ -5311,7 +5448,7 @@ set2:
5311
5448
  if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS)
5312
5449
  return rc; /* no entries matched */
5313
5450
  mp = mc->mc_pg[mc->mc_top];
5314
- assert(IS_LEAF(mp));
5451
+ mdb_cassert(mc, IS_LEAF(mp));
5315
5452
  leaf = NODEPTR(mp, 0);
5316
5453
  }
5317
5454
 
@@ -5353,6 +5490,7 @@ set1:
5353
5490
  if (op == MDB_GET_BOTH || rc > 0)
5354
5491
  return MDB_NOTFOUND;
5355
5492
  rc = 0;
5493
+ *data = d2;
5356
5494
  }
5357
5495
 
5358
5496
  } else {
@@ -5386,7 +5524,7 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data)
5386
5524
  if (rc != MDB_SUCCESS)
5387
5525
  return rc;
5388
5526
  }
5389
- assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
5527
+ mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
5390
5528
 
5391
5529
  leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0);
5392
5530
  mc->mc_flags |= C_INITIALIZED;
@@ -5432,7 +5570,7 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data)
5432
5570
  if (rc != MDB_SUCCESS)
5433
5571
  return rc;
5434
5572
  }
5435
- assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
5573
+ mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
5436
5574
 
5437
5575
  }
5438
5576
  mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1;
@@ -5469,7 +5607,8 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5469
5607
  int exact = 0;
5470
5608
  int (*mfunc)(MDB_cursor *mc, MDB_val *key, MDB_val *data);
5471
5609
 
5472
- assert(mc);
5610
+ if (mc == NULL)
5611
+ return EINVAL;
5473
5612
 
5474
5613
  if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
5475
5614
  return MDB_BAD_TXN;
@@ -5521,12 +5660,10 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5521
5660
  case MDB_SET_RANGE:
5522
5661
  if (key == NULL) {
5523
5662
  rc = EINVAL;
5524
- } else if (key->mv_size > MDB_MAXKEYSIZE) {
5525
- rc = MDB_BAD_VALSIZE;
5526
- } else if (op == MDB_SET_RANGE)
5527
- rc = mdb_cursor_set(mc, key, data, op, NULL);
5528
- else
5529
- rc = mdb_cursor_set(mc, key, data, op, &exact);
5663
+ } else {
5664
+ rc = mdb_cursor_set(mc, key, data, op,
5665
+ op == MDB_SET_RANGE ? NULL : &exact);
5666
+ }
5530
5667
  break;
5531
5668
  case MDB_GET_MULTIPLE:
5532
5669
  if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) {
@@ -5663,18 +5800,24 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5663
5800
  unsigned int flags)
5664
5801
  {
5665
5802
  enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */
5666
- MDB_env *env = mc->mc_txn->mt_env;
5803
+ MDB_env *env;
5667
5804
  MDB_node *leaf = NULL;
5668
- MDB_val xdata, *rdata, dkey;
5805
+ MDB_page *fp, *mp;
5806
+ uint16_t fp_flags;
5807
+ MDB_val xdata, *rdata, dkey, olddata;
5669
5808
  MDB_db dummy;
5670
- int do_sub = 0, insert = 0;
5809
+ int do_sub = 0, insert_key, insert_data;
5671
5810
  unsigned int mcount = 0, dcount = 0, nospill;
5672
5811
  size_t nsize;
5673
5812
  int rc, rc2;
5674
- char dbuf[MDB_MAXKEYSIZE+1];
5675
5813
  unsigned int nflags;
5676
5814
  DKBUF;
5677
5815
 
5816
+ if (mc == NULL || key == NULL)
5817
+ return EINVAL;
5818
+
5819
+ env = mc->mc_txn->mt_env;
5820
+
5678
5821
  /* Check this first so counter will always be zero on any
5679
5822
  * early failures.
5680
5823
  */
@@ -5691,14 +5834,14 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5691
5834
  if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
5692
5835
  return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
5693
5836
 
5694
- if (flags != MDB_CURRENT && (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE))
5695
- return MDB_BAD_VALSIZE;
5696
-
5697
- if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT) && data->mv_size > MDB_MAXKEYSIZE)
5837
+ if (key->mv_size-1 >= ENV_MAXKEY(env))
5698
5838
  return MDB_BAD_VALSIZE;
5699
5839
 
5700
5840
  #if SIZE_MAX > MAXDATASIZE
5701
- if (data->mv_size > MAXDATASIZE)
5841
+ if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE))
5842
+ return MDB_BAD_VALSIZE;
5843
+ #else
5844
+ if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env))
5702
5845
  return MDB_BAD_VALSIZE;
5703
5846
  #endif
5704
5847
 
@@ -5782,11 +5925,21 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5782
5925
  return rc2;
5783
5926
  }
5784
5927
 
5785
- /* The key already exists */
5786
- if (rc == MDB_SUCCESS) {
5787
- MDB_page *fp, *mp;
5788
- MDB_val olddata;
5789
-
5928
+ insert_key = insert_data = rc;
5929
+ if (insert_key) {
5930
+ /* The key does not exist */
5931
+ DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
5932
+ if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
5933
+ LEAFSIZE(key, data) > env->me_nodemax)
5934
+ {
5935
+ /* Too big for a node, insert in sub-DB */
5936
+ fp_flags = P_LEAF|P_DIRTY;
5937
+ fp = env->me_pbuf;
5938
+ fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
5939
+ fp->mp_lower = fp->mp_upper = olddata.mv_size = PAGEHDRSZ;
5940
+ goto prep_subDB;
5941
+ }
5942
+ } else {
5790
5943
  /* there's only a key anyway, so this is a no-op */
5791
5944
  if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
5792
5945
  unsigned int ksize = mc->mc_db->md_pad;
@@ -5806,6 +5959,12 @@ more:
5806
5959
 
5807
5960
  /* DB has dups? */
5808
5961
  if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) {
5962
+ /* Prepare (sub-)page/sub-DB to accept the new item,
5963
+ * if needed. fp: old sub-page or a header faking
5964
+ * it. mp: new (sub-)page. offset: growth in page
5965
+ * size. xdata: node data with new page or DB.
5966
+ */
5967
+ unsigned i, offset = 0;
5809
5968
  mp = fp = xdata.mv_data = env->me_pbuf;
5810
5969
  mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno;
5811
5970
 
@@ -5815,29 +5974,23 @@ more:
5815
5974
  if (flags == MDB_CURRENT)
5816
5975
  goto current;
5817
5976
 
5818
- dkey = olddata;
5819
5977
  #if UINT_MAX < SIZE_MAX
5820
- if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t))
5821
- #ifdef MISALIGNED_OK
5822
- mc->mc_dbx->md_dcmp = mdb_cmp_long;
5823
- #else
5824
- mc->mc_dbx->md_dcmp = mdb_cmp_cint;
5825
- #endif
5978
+ if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
5979
+ mc->mc_dbx->md_dcmp = mdb_cmp_clong;
5826
5980
  #endif
5827
5981
  /* if data matches, skip it */
5828
- if (!mc->mc_dbx->md_dcmp(data, &dkey)) {
5982
+ if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
5829
5983
  if (flags & MDB_NODUPDATA)
5830
- rc = MDB_KEYEXIST;
5831
- else if (flags & MDB_MULTIPLE)
5832
- goto next_mult;
5833
- else
5834
- rc = MDB_SUCCESS;
5835
- return rc;
5984
+ return MDB_KEYEXIST;
5985
+ rc = MDB_SUCCESS;
5986
+ goto next_sub;
5836
5987
  }
5837
5988
 
5838
- /* create a fake page for the dup items */
5839
- memcpy(dbuf, dkey.mv_data, dkey.mv_size);
5840
- dkey.mv_data = dbuf;
5989
+ /* Back up original data item */
5990
+ dkey.mv_size = olddata.mv_size;
5991
+ dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size);
5992
+
5993
+ /* Make sub-page header for the dup items, with dummy body */
5841
5994
  fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
5842
5995
  fp->mp_lower = PAGEHDRSZ;
5843
5996
  xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
@@ -5850,22 +6003,19 @@ more:
5850
6003
  (dkey.mv_size & 1) + (data->mv_size & 1);
5851
6004
  }
5852
6005
  fp->mp_upper = xdata.mv_size;
6006
+ olddata.mv_size = fp->mp_upper; /* pretend olddata is fp */
5853
6007
  } else if (leaf->mn_flags & F_SUBDATA) {
5854
6008
  /* Data is on sub-DB, just store it */
5855
6009
  flags |= F_DUPDATA|F_SUBDATA;
5856
6010
  goto put_sub;
5857
6011
  } else {
5858
- /* See if we need to convert from fake page to subDB */
5859
- unsigned int offset;
5860
- unsigned int i;
5861
- uint16_t fp_flags;
5862
-
6012
+ /* Data is on sub-page */
5863
6013
  fp = olddata.mv_data;
5864
6014
  switch (flags) {
5865
6015
  default:
5866
6016
  if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) {
5867
- offset = NODESIZE + sizeof(indx_t) + data->mv_size;
5868
- offset += offset & 1;
6017
+ offset = EVEN(NODESIZE + sizeof(indx_t) +
6018
+ data->mv_size);
5869
6019
  break;
5870
6020
  }
5871
6021
  offset = fp->mp_pad;
@@ -5881,12 +6031,16 @@ more:
5881
6031
  flags |= F_DUPDATA;
5882
6032
  goto put_sub;
5883
6033
  }
5884
- fp_flags = fp->mp_flags;
5885
6034
  xdata.mv_size = olddata.mv_size + offset;
5886
- if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + xdata.mv_size
5887
- >= env->me_nodemax) {
5888
- /* yes, convert it */
6035
+ }
6036
+
6037
+ fp_flags = fp->mp_flags;
6038
+ if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) {
6039
+ /* Too big for a sub-page, convert to sub-DB */
6040
+ fp_flags &= ~P_SUBP;
6041
+ prep_subDB:
5889
6042
  if (mc->mc_db->md_flags & MDB_DUPFIXED) {
6043
+ fp_flags |= P_LEAF2;
5890
6044
  dummy.md_pad = fp->mp_pad;
5891
6045
  dummy.md_flags = MDB_DUPFIXED;
5892
6046
  if (mc->mc_db->md_flags & MDB_INTEGERDUP)
@@ -5907,13 +6061,13 @@ more:
5907
6061
  offset = env->me_psize - olddata.mv_size;
5908
6062
  flags |= F_DUPDATA|F_SUBDATA;
5909
6063
  dummy.md_root = mp->mp_pgno;
5910
- fp_flags &= ~P_SUBP;
5911
- }
6064
+ }
6065
+ if (mp != fp) {
5912
6066
  mp->mp_flags = fp_flags | P_DIRTY;
5913
6067
  mp->mp_pad = fp->mp_pad;
5914
6068
  mp->mp_lower = fp->mp_lower;
5915
6069
  mp->mp_upper = fp->mp_upper + offset;
5916
- if (IS_LEAF2(fp)) {
6070
+ if (fp_flags & P_LEAF2) {
5917
6071
  memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
5918
6072
  } else {
5919
6073
  memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper,
@@ -5926,7 +6080,8 @@ more:
5926
6080
  rdata = &xdata;
5927
6081
  flags |= F_DUPDATA;
5928
6082
  do_sub = 1;
5929
- mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
6083
+ if (!insert_key)
6084
+ mdb_node_del(mc, 0);
5930
6085
  goto new_sub;
5931
6086
  }
5932
6087
  current:
@@ -5966,7 +6121,8 @@ current:
5966
6121
  return ENOMEM;
5967
6122
  id2.mid = pg;
5968
6123
  id2.mptr = np;
5969
- mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
6124
+ rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
6125
+ mdb_cassert(mc, rc2 == 0);
5970
6126
  if (!(flags & MDB_RESERVE)) {
5971
6127
  /* Copy end of page, adjusting alignment so
5972
6128
  * compiler may copy words instead of bytes.
@@ -5984,7 +6140,7 @@ current:
5984
6140
  data->mv_data = METADATA(omp);
5985
6141
  else
5986
6142
  memcpy(METADATA(omp), data->mv_data, data->mv_size);
5987
- goto done;
6143
+ return MDB_SUCCESS;
5988
6144
  }
5989
6145
  }
5990
6146
  if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS)
@@ -5996,17 +6152,13 @@ current:
5996
6152
  */
5997
6153
  if (F_ISSET(flags, MDB_RESERVE))
5998
6154
  data->mv_data = olddata.mv_data;
5999
- else if (data->mv_size)
6155
+ else if (!(mc->mc_flags & C_SUB))
6000
6156
  memcpy(olddata.mv_data, data->mv_data, data->mv_size);
6001
6157
  else
6002
6158
  memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
6003
- goto done;
6159
+ return MDB_SUCCESS;
6004
6160
  }
6005
- mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
6006
- mc->mc_db->md_entries--;
6007
- } else {
6008
- DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
6009
- insert = 1;
6161
+ mdb_node_del(mc, 0);
6010
6162
  }
6011
6163
 
6012
6164
  rdata = data;
@@ -6016,14 +6168,14 @@ new_sub:
6016
6168
  nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata);
6017
6169
  if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) {
6018
6170
  if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA )
6019
- nflags &= ~MDB_APPEND;
6020
- if (!insert)
6171
+ nflags &= ~MDB_APPEND; /* sub-page may need room to grow */
6172
+ if (!insert_key)
6021
6173
  nflags |= MDB_SPLIT_REPLACE;
6022
6174
  rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags);
6023
6175
  } else {
6024
6176
  /* There is room already in this leaf page. */
6025
6177
  rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags);
6026
- if (rc == 0 && !do_sub && insert) {
6178
+ if (rc == 0 && insert_key) {
6027
6179
  /* Adjust other cursors pointing to mp */
6028
6180
  MDB_cursor *m2, *m3;
6029
6181
  MDB_dbi dbi = mc->mc_dbi;
@@ -6043,9 +6195,7 @@ new_sub:
6043
6195
  }
6044
6196
  }
6045
6197
 
6046
- if (rc != MDB_SUCCESS)
6047
- mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6048
- else {
6198
+ if (rc == MDB_SUCCESS) {
6049
6199
  /* Now store the actual data in the child DB. Note that we're
6050
6200
  * storing the user data in the keys field, so there are strict
6051
6201
  * size limits on dupdata. The actual data fields of the child
@@ -6053,6 +6203,7 @@ new_sub:
6053
6203
  */
6054
6204
  if (do_sub) {
6055
6205
  int xflags;
6206
+ size_t ecount;
6056
6207
  put_sub:
6057
6208
  xdata.mv_size = 0;
6058
6209
  xdata.mv_data = "";
@@ -6068,7 +6219,7 @@ put_sub:
6068
6219
  if (dkey.mv_size) {
6069
6220
  rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
6070
6221
  if (rc)
6071
- return rc;
6222
+ goto bad_sub;
6072
6223
  {
6073
6224
  /* Adjust other cursors pointing to mp */
6074
6225
  MDB_cursor *m2;
@@ -6086,6 +6237,7 @@ put_sub:
6086
6237
  /* we've done our job */
6087
6238
  dkey.mv_size = 0;
6088
6239
  }
6240
+ ecount = mc->mc_xcursor->mx_db.md_entries;
6089
6241
  if (flags & MDB_APPENDDUP)
6090
6242
  xflags |= MDB_APPEND;
6091
6243
  rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
@@ -6093,31 +6245,39 @@ put_sub:
6093
6245
  void *db = NODEDATA(leaf);
6094
6246
  memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
6095
6247
  }
6248
+ insert_data = mc->mc_xcursor->mx_db.md_entries - ecount;
6096
6249
  }
6097
- /* sub-writes might have failed so check rc again.
6098
- * Don't increment count if we just replaced an existing item.
6099
- */
6100
- if (!rc && !(flags & MDB_CURRENT))
6250
+ /* Increment count unless we just replaced an existing item. */
6251
+ if (insert_data)
6101
6252
  mc->mc_db->md_entries++;
6253
+ if (insert_key) {
6254
+ /* Invalidate txn if we created an empty sub-DB */
6255
+ if (rc)
6256
+ goto bad_sub;
6257
+ /* If we succeeded and the key didn't exist before,
6258
+ * make sure the cursor is marked valid.
6259
+ */
6260
+ mc->mc_flags |= C_INITIALIZED;
6261
+ }
6262
+ next_sub:
6102
6263
  if (flags & MDB_MULTIPLE) {
6103
6264
  if (!rc) {
6104
- next_mult:
6105
6265
  mcount++;
6106
6266
  /* let caller know how many succeeded, if any */
6107
6267
  data[1].mv_size = mcount;
6108
6268
  if (mcount < dcount) {
6109
6269
  data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size;
6270
+ insert_key = insert_data = 0;
6110
6271
  goto more;
6111
6272
  }
6112
6273
  }
6113
6274
  }
6275
+ return rc;
6276
+ bad_sub:
6277
+ if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */
6278
+ rc = MDB_CORRUPTED;
6114
6279
  }
6115
- done:
6116
- /* If we succeeded and the key didn't exist before, make sure
6117
- * the cursor is marked valid.
6118
- */
6119
- if (!rc && insert)
6120
- mc->mc_flags |= C_INITIALIZED;
6280
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6121
6281
  return rc;
6122
6282
  }
6123
6283
 
@@ -6145,14 +6305,21 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
6145
6305
  return rc;
6146
6306
 
6147
6307
  mp = mc->mc_pg[mc->mc_top];
6308
+ if (IS_LEAF2(mp))
6309
+ goto del_key;
6148
6310
  leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
6149
6311
 
6150
- if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_DUPDATA)) {
6151
- if (!(flags & MDB_NODUPDATA)) {
6312
+ if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
6313
+ if (flags & MDB_NODUPDATA) {
6314
+ /* mdb_cursor_del0() will subtract the final entry */
6315
+ mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1;
6316
+ } else {
6152
6317
  if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
6153
6318
  mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
6154
6319
  }
6155
6320
  rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
6321
+ if (rc)
6322
+ return rc;
6156
6323
  /* If sub-DB still has entries, we're done */
6157
6324
  if (mc->mc_xcursor->mx_db.md_entries) {
6158
6325
  if (leaf->mn_flags & F_SUBDATA) {
@@ -6183,14 +6350,28 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
6183
6350
  if (leaf->mn_flags & F_SUBDATA) {
6184
6351
  /* add all the child DB's pages to the free list */
6185
6352
  rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
6186
- if (rc == MDB_SUCCESS) {
6187
- mc->mc_db->md_entries -=
6188
- mc->mc_xcursor->mx_db.md_entries;
6189
- }
6353
+ if (rc)
6354
+ goto fail;
6190
6355
  }
6191
6356
  }
6192
6357
 
6193
- return mdb_cursor_del0(mc, leaf);
6358
+ /* add overflow pages to free list */
6359
+ if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
6360
+ MDB_page *omp;
6361
+ pgno_t pg;
6362
+
6363
+ memcpy(&pg, NODEDATA(leaf), sizeof(pg));
6364
+ if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
6365
+ (rc = mdb_ovpage_free(mc, omp)))
6366
+ goto fail;
6367
+ }
6368
+
6369
+ del_key:
6370
+ return mdb_cursor_del0(mc);
6371
+
6372
+ fail:
6373
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6374
+ return rc;
6194
6375
  }
6195
6376
 
6196
6377
  /** Allocate and initialize new pages for a database.
@@ -6245,13 +6426,12 @@ mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data)
6245
6426
  size_t sz;
6246
6427
 
6247
6428
  sz = LEAFSIZE(key, data);
6248
- if (sz >= env->me_nodemax) {
6429
+ if (sz > env->me_nodemax) {
6249
6430
  /* put on overflow page */
6250
6431
  sz -= data->mv_size - sizeof(pgno_t);
6251
6432
  }
6252
- sz += sz & 1;
6253
6433
 
6254
- return sz + sizeof(indx_t);
6434
+ return EVEN(sz + sizeof(indx_t));
6255
6435
  }
6256
6436
 
6257
6437
  /** Calculate the size of a branch node.
@@ -6270,7 +6450,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key)
6270
6450
  size_t sz;
6271
6451
 
6272
6452
  sz = INDXSIZE(key);
6273
- if (sz >= env->me_nodemax) {
6453
+ if (sz > env->me_nodemax) {
6274
6454
  /* put on overflow page */
6275
6455
  /* not implemented */
6276
6456
  /* sz -= key->size - sizeof(pgno_t); */
@@ -6307,12 +6487,12 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
6307
6487
  MDB_page *ofp = NULL; /* overflow page */
6308
6488
  DKBUF;
6309
6489
 
6310
- assert(mp->mp_upper >= mp->mp_lower);
6490
+ mdb_cassert(mc, mp->mp_upper >= mp->mp_lower);
6311
6491
 
6312
6492
  DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]",
6313
6493
  IS_LEAF(mp) ? "leaf" : "branch",
6314
6494
  IS_SUBP(mp) ? "sub-" : "",
6315
- mp->mp_pgno, indx, data ? data->mv_size : 0,
6495
+ mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0,
6316
6496
  key ? key->mv_size : 0, key ? DKEY(key) : "null"));
6317
6497
 
6318
6498
  if (IS_LEAF2(mp)) {
@@ -6335,17 +6515,17 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
6335
6515
  if (key != NULL)
6336
6516
  node_size += key->mv_size;
6337
6517
  if (IS_LEAF(mp)) {
6338
- assert(data);
6518
+ mdb_cassert(mc, data);
6339
6519
  if (F_ISSET(flags, F_BIGDATA)) {
6340
6520
  /* Data already on overflow page. */
6341
6521
  node_size += sizeof(pgno_t);
6342
- } else if (node_size + data->mv_size >= mc->mc_txn->mt_env->me_nodemax) {
6522
+ } else if (node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax) {
6343
6523
  int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize);
6344
6524
  int rc;
6345
6525
  /* Put data on overflow page. */
6346
6526
  DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page",
6347
6527
  data->mv_size, node_size+data->mv_size));
6348
- node_size += sizeof(pgno_t) + (node_size & 1);
6528
+ node_size = EVEN(node_size + sizeof(pgno_t));
6349
6529
  if ((ssize_t)node_size > room)
6350
6530
  goto full;
6351
6531
  if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
@@ -6357,7 +6537,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
6357
6537
  node_size += data->mv_size;
6358
6538
  }
6359
6539
  }
6360
- node_size += node_size & 1;
6540
+ node_size = EVEN(node_size);
6361
6541
  if ((ssize_t)node_size > room)
6362
6542
  goto full;
6363
6543
 
@@ -6368,7 +6548,7 @@ update:
6368
6548
 
6369
6549
  /* Adjust free space offsets. */
6370
6550
  ofs = mp->mp_upper - node_size;
6371
- assert(ofs >= mp->mp_lower + sizeof(indx_t));
6551
+ mdb_cassert(mc, ofs >= mp->mp_lower + sizeof(indx_t));
6372
6552
  mp->mp_ptrs[indx] = ofs;
6373
6553
  mp->mp_upper = ofs;
6374
6554
  mp->mp_lower += sizeof(indx_t);
@@ -6386,7 +6566,7 @@ update:
6386
6566
  memcpy(NODEKEY(node), key->mv_data, key->mv_size);
6387
6567
 
6388
6568
  if (IS_LEAF(mp)) {
6389
- assert(key);
6569
+ mdb_cassert(mc, key);
6390
6570
  if (ofp == NULL) {
6391
6571
  if (F_ISSET(flags, F_BIGDATA))
6392
6572
  memcpy(node->mn_data + key->mv_size, data->mv_data,
@@ -6410,38 +6590,35 @@ update:
6410
6590
 
6411
6591
  full:
6412
6592
  DPRINTF(("not enough room in page %"Z"u, got %u ptrs",
6413
- mp->mp_pgno, NUMKEYS(mp)));
6593
+ mdb_dbg_pgno(mp), NUMKEYS(mp)));
6414
6594
  DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room));
6415
6595
  DPRINTF(("node size = %"Z"u", node_size));
6596
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6416
6597
  return MDB_PAGE_FULL;
6417
6598
  }
6418
6599
 
6419
6600
  /** Delete the specified node from a page.
6420
- * @param[in] mp The page to operate on.
6421
- * @param[in] indx The index of the node to delete.
6601
+ * @param[in] mc Cursor pointing to the node to delete.
6422
6602
  * @param[in] ksize The size of a node. Only used if the page is
6423
6603
  * part of a #MDB_DUPFIXED database.
6424
6604
  */
6425
6605
  static void
6426
- mdb_node_del(MDB_page *mp, indx_t indx, int ksize)
6606
+ mdb_node_del(MDB_cursor *mc, int ksize)
6427
6607
  {
6608
+ MDB_page *mp = mc->mc_pg[mc->mc_top];
6609
+ indx_t indx = mc->mc_ki[mc->mc_top];
6428
6610
  unsigned int sz;
6429
6611
  indx_t i, j, numkeys, ptr;
6430
6612
  MDB_node *node;
6431
6613
  char *base;
6432
6614
 
6433
- #if MDB_DEBUG
6434
- {
6435
- pgno_t pgno;
6436
- COPY_PGNO(pgno, mp->mp_pgno);
6437
6615
  DPRINTF(("delete node %u on %s page %"Z"u", indx,
6438
- IS_LEAF(mp) ? "leaf" : "branch", pgno));
6439
- }
6440
- #endif
6441
- assert(indx < NUMKEYS(mp));
6616
+ IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp)));
6617
+ numkeys = NUMKEYS(mp);
6618
+ mdb_cassert(mc, indx < numkeys);
6442
6619
 
6443
6620
  if (IS_LEAF2(mp)) {
6444
- int x = NUMKEYS(mp) - 1 - indx;
6621
+ int x = numkeys - 1 - indx;
6445
6622
  base = LEAF2KEY(mp, indx, ksize);
6446
6623
  if (x)
6447
6624
  memmove(base, base + ksize, x * ksize);
@@ -6458,10 +6635,9 @@ mdb_node_del(MDB_page *mp, indx_t indx, int ksize)
6458
6635
  else
6459
6636
  sz += NODEDSZ(node);
6460
6637
  }
6461
- sz += sz & 1;
6638
+ sz = EVEN(sz);
6462
6639
 
6463
6640
  ptr = mp->mp_ptrs[indx];
6464
- numkeys = NUMKEYS(mp);
6465
6641
  for (i = j = 0; i < numkeys; i++) {
6466
6642
  if (i != indx) {
6467
6643
  mp->mp_ptrs[j] = mp->mp_ptrs[i];
@@ -6488,25 +6664,22 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
6488
6664
  MDB_node *node;
6489
6665
  MDB_page *sp, *xp;
6490
6666
  char *base;
6491
- int osize, nsize;
6492
- int delta;
6667
+ int nsize, delta;
6493
6668
  indx_t i, numkeys, ptr;
6494
6669
 
6495
6670
  node = NODEPTR(mp, indx);
6496
6671
  sp = (MDB_page *)NODEDATA(node);
6497
- osize = NODEDSZ(node);
6498
-
6499
- delta = sp->mp_upper - sp->mp_lower;
6500
- SETDSZ(node, osize - delta);
6672
+ delta = SIZELEFT(sp);
6501
6673
  xp = (MDB_page *)((char *)sp + delta);
6502
6674
 
6503
6675
  /* shift subpage upward */
6504
6676
  if (IS_LEAF2(sp)) {
6505
6677
  nsize = NUMKEYS(sp) * sp->mp_pad;
6678
+ if (nsize & 1)
6679
+ return; /* do not make the node uneven-sized */
6506
6680
  memmove(METADATA(xp), METADATA(sp), nsize);
6507
6681
  } else {
6508
6682
  int i;
6509
- nsize = osize - sp->mp_upper;
6510
6683
  numkeys = NUMKEYS(sp);
6511
6684
  for (i=numkeys-1; i>=0; i--)
6512
6685
  xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta;
@@ -6517,6 +6690,9 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
6517
6690
  xp->mp_pad = sp->mp_pad;
6518
6691
  COPY_PGNO(xp->mp_pgno, mp->mp_pgno);
6519
6692
 
6693
+ nsize = NODEDSZ(node) - delta;
6694
+ SETDSZ(node, nsize);
6695
+
6520
6696
  /* shift lower nodes upward */
6521
6697
  ptr = mp->mp_ptrs[indx];
6522
6698
  numkeys = NUMKEYS(mp);
@@ -6604,11 +6780,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
6604
6780
  mx->mx_dbflag = DB_VALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */
6605
6781
  #if UINT_MAX < SIZE_MAX
6606
6782
  if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
6607
- #ifdef MISALIGNED_OK
6608
- mx->mx_dbx.md_cmp = mdb_cmp_long;
6609
- #else
6610
- mx->mx_dbx.md_cmp = mdb_cmp_cint;
6611
- #endif
6783
+ mx->mx_dbx.md_cmp = mdb_cmp_clong;
6612
6784
  #endif
6613
6785
  }
6614
6786
 
@@ -6628,7 +6800,7 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
6628
6800
  mc->mc_pg[0] = 0;
6629
6801
  mc->mc_flags = 0;
6630
6802
  if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
6631
- assert(mx != NULL);
6803
+ mdb_tassert(txn, mx != NULL);
6632
6804
  mc->mc_xcursor = mx;
6633
6805
  mdb_xcursor_init0(mc);
6634
6806
  } else {
@@ -6645,7 +6817,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
6645
6817
  MDB_cursor *mc;
6646
6818
  size_t size = sizeof(MDB_cursor);
6647
6819
 
6648
- if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
6820
+ if (!ret || !TXN_DBI_EXIST(txn, dbi))
6649
6821
  return EINVAL;
6650
6822
 
6651
6823
  if (txn->mt_flags & MDB_TXN_ERROR)
@@ -6677,12 +6849,15 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
6677
6849
  int
6678
6850
  mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc)
6679
6851
  {
6680
- if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
6852
+ if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi))
6681
6853
  return EINVAL;
6682
6854
 
6683
6855
  if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors)
6684
6856
  return EINVAL;
6685
6857
 
6858
+ if (txn->mt_flags & MDB_TXN_ERROR)
6859
+ return MDB_BAD_TXN;
6860
+
6686
6861
  mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
6687
6862
  return MDB_SUCCESS;
6688
6863
  }
@@ -6699,6 +6874,9 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp)
6699
6874
  if (mc->mc_xcursor == NULL)
6700
6875
  return MDB_INCOMPATIBLE;
6701
6876
 
6877
+ if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
6878
+ return MDB_BAD_TXN;
6879
+
6702
6880
  leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
6703
6881
  if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
6704
6882
  *countp = 1;
@@ -6736,11 +6914,10 @@ mdb_cursor_txn(MDB_cursor *mc)
6736
6914
  MDB_dbi
6737
6915
  mdb_cursor_dbi(MDB_cursor *mc)
6738
6916
  {
6739
- assert(mc != NULL);
6740
6917
  return mc->mc_dbi;
6741
6918
  }
6742
6919
 
6743
- /** Replace the key for a node with a new key.
6920
+ /** Replace the key for a branch node with a new key.
6744
6921
  * @param[in] mc Cursor pointing to the node to operate on.
6745
6922
  * @param[in] key The new key to use.
6746
6923
  * @return 0 on success, non-zero on failure.
@@ -6752,7 +6929,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6752
6929
  MDB_node *node;
6753
6930
  char *base;
6754
6931
  size_t len;
6755
- int delta, delta0;
6932
+ int delta, ksize, oksize;
6756
6933
  indx_t ptr, i, numkeys, indx;
6757
6934
  DKBUF;
6758
6935
 
@@ -6763,7 +6940,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6763
6940
  #if MDB_DEBUG
6764
6941
  {
6765
6942
  MDB_val k2;
6766
- char kbuf2[(MDB_MAXKEYSIZE*2+1)];
6943
+ char kbuf2[DKBUF_MAXKEYSIZE*2+1];
6767
6944
  k2.mv_data = NODEKEY(node);
6768
6945
  k2.mv_size = node->mn_ksize;
6769
6946
  DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u",
@@ -6774,19 +6951,19 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6774
6951
  }
6775
6952
  #endif
6776
6953
 
6777
- delta0 = delta = key->mv_size - node->mn_ksize;
6954
+ /* Sizes must be 2-byte aligned. */
6955
+ ksize = EVEN(key->mv_size);
6956
+ oksize = EVEN(node->mn_ksize);
6957
+ delta = ksize - oksize;
6778
6958
 
6779
- /* Must be 2-byte aligned. If new key is
6780
- * shorter by 1, the shift will be skipped.
6781
- */
6782
- delta += (delta & 1);
6959
+ /* Shift node contents if EVEN(key length) changed. */
6783
6960
  if (delta) {
6784
6961
  if (delta > 0 && SIZELEFT(mp) < delta) {
6785
6962
  pgno_t pgno;
6786
6963
  /* not enough space left, do a delete and split */
6787
6964
  DPRINTF(("Not enough room, delta = %d, splitting...", delta));
6788
6965
  pgno = NODEPGNO(node);
6789
- mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
6966
+ mdb_node_del(mc, 0);
6790
6967
  return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE);
6791
6968
  }
6792
6969
 
@@ -6805,7 +6982,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6805
6982
  }
6806
6983
 
6807
6984
  /* But even if no shift was needed, update ksize */
6808
- if (delta0)
6985
+ if (node->mn_ksize != key->mv_size)
6809
6986
  node->mn_ksize = key->mv_size;
6810
6987
 
6811
6988
  if (key->mv_size)
@@ -6837,7 +7014,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6837
7014
  return rc;
6838
7015
 
6839
7016
  if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6840
- srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */
6841
7017
  key.mv_size = csrc->mc_db->md_pad;
6842
7018
  key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
6843
7019
  data.mv_size = 0;
@@ -6846,14 +7022,16 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6846
7022
  flags = 0;
6847
7023
  } else {
6848
7024
  srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]);
6849
- assert(!((size_t)srcnode&1));
7025
+ mdb_cassert(csrc, !((size_t)srcnode & 1));
6850
7026
  srcpg = NODEPGNO(srcnode);
6851
7027
  flags = srcnode->mn_flags;
6852
7028
  if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
6853
7029
  unsigned int snum = csrc->mc_snum;
6854
7030
  MDB_node *s2;
6855
7031
  /* must find the lowest key below src */
6856
- mdb_page_search_lowest(csrc);
7032
+ rc = mdb_page_search_lowest(csrc);
7033
+ if (rc)
7034
+ return rc;
6857
7035
  if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6858
7036
  key.mv_size = csrc->mc_db->md_pad;
6859
7037
  key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
@@ -6876,7 +7054,9 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6876
7054
  MDB_node *s2;
6877
7055
  MDB_val bkey;
6878
7056
  /* must find the lowest key below dst */
6879
- mdb_page_search_lowest(cdst);
7057
+ rc = mdb_page_search_lowest(cdst);
7058
+ if (rc)
7059
+ return rc;
6880
7060
  if (IS_LEAF2(cdst->mc_pg[cdst->mc_top])) {
6881
7061
  bkey.mv_size = cdst->mc_db->md_pad;
6882
7062
  bkey.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, bkey.mv_size);
@@ -6909,7 +7089,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6909
7089
 
6910
7090
  /* Delete the node from the source page.
6911
7091
  */
6912
- mdb_node_del(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
7092
+ mdb_node_del(csrc, key.mv_size);
6913
7093
 
6914
7094
  {
6915
7095
  /* Adjust other cursors pointing to mp */
@@ -6957,7 +7137,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6957
7137
  csrc->mc_ki[csrc->mc_top] = 0;
6958
7138
  rc = mdb_update_key(csrc, &nullkey);
6959
7139
  csrc->mc_ki[csrc->mc_top] = ix;
6960
- assert(rc == MDB_SUCCESS);
7140
+ mdb_cassert(csrc, rc == MDB_SUCCESS);
6961
7141
  }
6962
7142
  }
6963
7143
 
@@ -6985,7 +7165,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6985
7165
  cdst->mc_ki[cdst->mc_top] = 0;
6986
7166
  rc = mdb_update_key(cdst, &nullkey);
6987
7167
  cdst->mc_ki[cdst->mc_top] = ix;
6988
- assert(rc == MDB_SUCCESS);
7168
+ mdb_cassert(csrc, rc == MDB_SUCCESS);
6989
7169
  }
6990
7170
  }
6991
7171
 
@@ -6998,6 +7178,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6998
7178
  * the \b csrc page will be freed.
6999
7179
  * @param[in] csrc Cursor pointing to the source page.
7000
7180
  * @param[in] cdst Cursor pointing to the destination page.
7181
+ * @return 0 on success, non-zero on failure.
7001
7182
  */
7002
7183
  static int
7003
7184
  mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
@@ -7011,8 +7192,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7011
7192
  DPRINTF(("merging page %"Z"u into %"Z"u", csrc->mc_pg[csrc->mc_top]->mp_pgno,
7012
7193
  cdst->mc_pg[cdst->mc_top]->mp_pgno));
7013
7194
 
7014
- assert(csrc->mc_snum > 1); /* can't merge root page */
7015
- assert(cdst->mc_snum > 1);
7195
+ mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
7196
+ mdb_cassert(csrc, cdst->mc_snum > 1);
7016
7197
 
7017
7198
  /* Mark dst as dirty. */
7018
7199
  if ((rc = mdb_page_touch(cdst)))
@@ -7037,7 +7218,9 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7037
7218
  unsigned int snum = csrc->mc_snum;
7038
7219
  MDB_node *s2;
7039
7220
  /* must find the lowest key below src */
7040
- mdb_page_search_lowest(csrc);
7221
+ rc = mdb_page_search_lowest(csrc);
7222
+ if (rc)
7223
+ return rc;
7041
7224
  if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
7042
7225
  key.mv_size = csrc->mc_db->md_pad;
7043
7226
  key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
@@ -7067,15 +7250,17 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7067
7250
 
7068
7251
  /* Unlink the src page from parent and add to free list.
7069
7252
  */
7070
- mdb_node_del(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], 0);
7071
- if (csrc->mc_ki[csrc->mc_top-1] == 0) {
7253
+ csrc->mc_top--;
7254
+ mdb_node_del(csrc, 0);
7255
+ if (csrc->mc_ki[csrc->mc_top] == 0) {
7072
7256
  key.mv_size = 0;
7073
- csrc->mc_top--;
7074
7257
  rc = mdb_update_key(csrc, &key);
7075
- csrc->mc_top++;
7076
- if (rc)
7258
+ if (rc) {
7259
+ csrc->mc_top++;
7077
7260
  return rc;
7261
+ }
7078
7262
  }
7263
+ csrc->mc_top++;
7079
7264
 
7080
7265
  rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs,
7081
7266
  csrc->mc_pg[csrc->mc_top]->mp_pgno);
@@ -7104,9 +7289,18 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7104
7289
  }
7105
7290
  }
7106
7291
  }
7107
- mdb_cursor_pop(csrc);
7108
-
7109
- return mdb_rebalance(csrc);
7292
+ {
7293
+ unsigned int snum = cdst->mc_snum;
7294
+ uint16_t depth = cdst->mc_db->md_depth;
7295
+ mdb_cursor_pop(cdst);
7296
+ rc = mdb_rebalance(cdst);
7297
+ /* Did the tree shrink? */
7298
+ if (depth > cdst->mc_db->md_depth)
7299
+ snum--;
7300
+ cdst->mc_snum = snum;
7301
+ cdst->mc_top = snum-1;
7302
+ }
7303
+ return rc;
7110
7304
  }
7111
7305
 
7112
7306
  /** Copy the contents of a cursor.
@@ -7144,27 +7338,18 @@ mdb_rebalance(MDB_cursor *mc)
7144
7338
  int rc;
7145
7339
  unsigned int ptop, minkeys;
7146
7340
  MDB_cursor mn;
7341
+ indx_t oldki;
7147
7342
 
7148
7343
  minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top]));
7149
- #if MDB_DEBUG
7150
- {
7151
- pgno_t pgno;
7152
- COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
7153
7344
  DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)",
7154
7345
  IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
7155
- pgno, NUMKEYS(mc->mc_pg[mc->mc_top]),
7346
+ mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]),
7156
7347
  (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10));
7157
- }
7158
- #endif
7159
7348
 
7160
7349
  if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD &&
7161
7350
  NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
7162
- #if MDB_DEBUG
7163
- pgno_t pgno;
7164
- COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
7165
7351
  DPRINTF(("no need to rebalance page %"Z"u, above fill threshold",
7166
- pgno));
7167
- #endif
7352
+ mdb_dbg_pgno(mc->mc_pg[mc->mc_top])));
7168
7353
  return MDB_SUCCESS;
7169
7354
  }
7170
7355
 
@@ -7204,6 +7389,7 @@ mdb_rebalance(MDB_cursor *mc)
7204
7389
  }
7205
7390
  }
7206
7391
  } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) {
7392
+ int i;
7207
7393
  DPUTS("collapsing root page!");
7208
7394
  rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
7209
7395
  if (rc)
@@ -7215,6 +7401,10 @@ mdb_rebalance(MDB_cursor *mc)
7215
7401
  mc->mc_db->md_depth--;
7216
7402
  mc->mc_db->md_branch_pages--;
7217
7403
  mc->mc_ki[0] = mc->mc_ki[1];
7404
+ for (i = 1; i<mc->mc_db->md_depth; i++) {
7405
+ mc->mc_pg[i] = mc->mc_pg[i+1];
7406
+ mc->mc_ki[i] = mc->mc_ki[i+1];
7407
+ }
7218
7408
  {
7219
7409
  /* Adjust other cursors pointing to mp */
7220
7410
  MDB_cursor *m2, *m3;
@@ -7227,7 +7417,6 @@ mdb_rebalance(MDB_cursor *mc)
7227
7417
  m3 = m2;
7228
7418
  if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
7229
7419
  if (m3->mc_pg[0] == mp) {
7230
- int i;
7231
7420
  m3->mc_snum--;
7232
7421
  m3->mc_top--;
7233
7422
  for (i=0; i<m3->mc_snum; i++) {
@@ -7246,7 +7435,7 @@ mdb_rebalance(MDB_cursor *mc)
7246
7435
  * otherwise the tree is invalid.
7247
7436
  */
7248
7437
  ptop = mc->mc_top-1;
7249
- assert(NUMKEYS(mc->mc_pg[ptop]) > 1);
7438
+ mdb_cassert(mc, NUMKEYS(mc->mc_pg[ptop]) > 1);
7250
7439
 
7251
7440
  /* Leaf page fill factor is below the threshold.
7252
7441
  * Try to move keys from left or right neighbor, or
@@ -7258,6 +7447,7 @@ mdb_rebalance(MDB_cursor *mc)
7258
7447
  mdb_cursor_copy(mc, &mn);
7259
7448
  mn.mc_xcursor = NULL;
7260
7449
 
7450
+ oldki = mc->mc_ki[mc->mc_top];
7261
7451
  if (mc->mc_ki[ptop] == 0) {
7262
7452
  /* We're the leftmost leaf in our parent.
7263
7453
  */
@@ -7291,113 +7481,115 @@ mdb_rebalance(MDB_cursor *mc)
7291
7481
  * (A branch page must never have less than 2 keys.)
7292
7482
  */
7293
7483
  minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top]));
7294
- if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys)
7295
- return mdb_node_move(&mn, mc);
7296
- else {
7297
- if (mc->mc_ki[ptop] == 0)
7484
+ if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
7485
+ rc = mdb_node_move(&mn, mc);
7486
+ if (mc->mc_ki[ptop]) {
7487
+ oldki++;
7488
+ }
7489
+ } else {
7490
+ if (mc->mc_ki[ptop] == 0) {
7298
7491
  rc = mdb_page_merge(&mn, mc);
7299
- else {
7492
+ } else {
7493
+ oldki += NUMKEYS(mn.mc_pg[mn.mc_top]);
7300
7494
  mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
7301
7495
  rc = mdb_page_merge(mc, &mn);
7302
7496
  mdb_cursor_copy(&mn, mc);
7303
7497
  }
7304
- mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
7498
+ mc->mc_flags &= ~C_EOF;
7305
7499
  }
7500
+ mc->mc_ki[mc->mc_top] = oldki;
7306
7501
  return rc;
7307
7502
  }
7308
7503
 
7309
7504
  /** Complete a delete operation started by #mdb_cursor_del(). */
7310
7505
  static int
7311
- mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf)
7506
+ mdb_cursor_del0(MDB_cursor *mc)
7312
7507
  {
7313
7508
  int rc;
7314
7509
  MDB_page *mp;
7315
7510
  indx_t ki;
7316
7511
  unsigned int nkeys;
7317
7512
 
7318
- mp = mc->mc_pg[mc->mc_top];
7319
7513
  ki = mc->mc_ki[mc->mc_top];
7320
-
7321
- /* add overflow pages to free list */
7322
- if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_BIGDATA)) {
7323
- MDB_page *omp;
7324
- pgno_t pg;
7325
-
7326
- memcpy(&pg, NODEDATA(leaf), sizeof(pg));
7327
- if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
7328
- (rc = mdb_ovpage_free(mc, omp)))
7329
- return rc;
7330
- }
7331
- mdb_node_del(mp, ki, mc->mc_db->md_pad);
7514
+ mdb_node_del(mc, mc->mc_db->md_pad);
7332
7515
  mc->mc_db->md_entries--;
7333
7516
  rc = mdb_rebalance(mc);
7334
- if (rc != MDB_SUCCESS)
7335
- mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
7336
- else {
7337
- MDB_cursor *m2;
7517
+
7518
+ if (rc == MDB_SUCCESS) {
7519
+ MDB_cursor *m2, *m3;
7338
7520
  MDB_dbi dbi = mc->mc_dbi;
7339
7521
 
7340
7522
  mp = mc->mc_pg[mc->mc_top];
7341
7523
  nkeys = NUMKEYS(mp);
7342
7524
 
7343
7525
  /* if mc points past last node in page, find next sibling */
7344
- if (mc->mc_ki[mc->mc_top] >= nkeys)
7345
- mdb_cursor_sibling(mc, 1);
7526
+ if (mc->mc_ki[mc->mc_top] >= nkeys) {
7527
+ rc = mdb_cursor_sibling(mc, 1);
7528
+ if (rc == MDB_NOTFOUND)
7529
+ rc = MDB_SUCCESS;
7530
+ }
7346
7531
 
7347
7532
  /* Adjust other cursors pointing to mp */
7348
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
7349
- if (m2 == mc || m2->mc_snum < mc->mc_snum)
7533
+ for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) {
7534
+ m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
7535
+ if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED))
7350
7536
  continue;
7351
- if (!(m2->mc_flags & C_INITIALIZED))
7537
+ if (m3 == mc || m3->mc_snum < mc->mc_snum)
7352
7538
  continue;
7353
- if (m2->mc_pg[mc->mc_top] == mp) {
7354
- if (m2->mc_ki[mc->mc_top] >= ki) {
7355
- m2->mc_flags |= C_DEL;
7356
- if (m2->mc_ki[mc->mc_top] > ki)
7357
- m2->mc_ki[mc->mc_top]--;
7539
+ if (m3->mc_pg[mc->mc_top] == mp) {
7540
+ if (m3->mc_ki[mc->mc_top] >= ki) {
7541
+ m3->mc_flags |= C_DEL;
7542
+ if (m3->mc_ki[mc->mc_top] > ki)
7543
+ m3->mc_ki[mc->mc_top]--;
7544
+ }
7545
+ if (m3->mc_ki[mc->mc_top] >= nkeys) {
7546
+ rc = mdb_cursor_sibling(m3, 1);
7547
+ if (rc == MDB_NOTFOUND)
7548
+ rc = MDB_SUCCESS;
7358
7549
  }
7359
- if (m2->mc_ki[mc->mc_top] >= nkeys)
7360
- mdb_cursor_sibling(m2, 1);
7361
7550
  }
7362
7551
  }
7363
7552
  mc->mc_flags |= C_DEL;
7364
7553
  }
7365
7554
 
7555
+ if (rc)
7556
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
7366
7557
  return rc;
7367
7558
  }
7368
7559
 
7369
7560
  int
7370
7561
  mdb_del(MDB_txn *txn, MDB_dbi dbi,
7371
7562
  MDB_val *key, MDB_val *data)
7563
+ {
7564
+ if (!key || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
7565
+ return EINVAL;
7566
+
7567
+ if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
7568
+ return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
7569
+
7570
+ if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
7571
+ /* must ignore any data */
7572
+ data = NULL;
7573
+ }
7574
+
7575
+ return mdb_del0(txn, dbi, key, data, 0);
7576
+ }
7577
+
7578
+ static int
7579
+ mdb_del0(MDB_txn *txn, MDB_dbi dbi,
7580
+ MDB_val *key, MDB_val *data, unsigned flags)
7372
7581
  {
7373
7582
  MDB_cursor mc;
7374
7583
  MDB_xcursor mx;
7375
7584
  MDB_cursor_op op;
7376
7585
  MDB_val rdata, *xdata;
7377
- int rc, exact;
7586
+ int rc, exact = 0;
7378
7587
  DKBUF;
7379
7588
 
7380
- assert(key != NULL);
7381
-
7382
7589
  DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key)));
7383
7590
 
7384
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
7385
- return EINVAL;
7386
-
7387
- if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
7388
- return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
7389
-
7390
- if (key->mv_size > MDB_MAXKEYSIZE) {
7391
- return MDB_BAD_VALSIZE;
7392
- }
7393
-
7394
7591
  mdb_cursor_init(&mc, txn, dbi, &mx);
7395
7592
 
7396
- exact = 0;
7397
- if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
7398
- /* must ignore any data */
7399
- data = NULL;
7400
- }
7401
7593
  if (data) {
7402
7594
  op = MDB_GET_BOTH;
7403
7595
  rdata = *data;
@@ -7405,6 +7597,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
7405
7597
  } else {
7406
7598
  op = MDB_SET;
7407
7599
  xdata = NULL;
7600
+ flags |= MDB_NODUPDATA;
7408
7601
  }
7409
7602
  rc = mdb_cursor_set(&mc, key, xdata, op, &exact);
7410
7603
  if (rc == 0) {
@@ -7419,7 +7612,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
7419
7612
  mc.mc_flags |= C_UNTRACK;
7420
7613
  mc.mc_next = txn->mt_cursors[dbi];
7421
7614
  txn->mt_cursors[dbi] = &mc;
7422
- rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA);
7615
+ rc = mdb_cursor_del(&mc, flags);
7423
7616
  txn->mt_cursors[dbi] = mc.mc_next;
7424
7617
  }
7425
7618
  return rc;
@@ -7468,7 +7661,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7468
7661
 
7469
7662
  if (mc->mc_snum < 2) {
7470
7663
  if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
7471
- return rc;
7664
+ goto done;
7472
7665
  /* shift current top to make room for new parent */
7473
7666
  mc->mc_pg[1] = mc->mc_pg[0];
7474
7667
  mc->mc_ki[1] = mc->mc_ki[0];
@@ -7486,7 +7679,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7486
7679
  mc->mc_ki[0] = mc->mc_ki[1];
7487
7680
  mc->mc_db->md_root = mp->mp_pgno;
7488
7681
  mc->mc_db->md_depth--;
7489
- return rc;
7682
+ goto done;
7490
7683
  }
7491
7684
  mc->mc_snum = 2;
7492
7685
  mc->mc_top = 1;
@@ -7515,7 +7708,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7515
7708
  int x;
7516
7709
  unsigned int lsize, rsize, ksize;
7517
7710
  /* Move half of the keys to the right sibling */
7518
- copy = NULL;
7519
7711
  x = mc->mc_ki[mc->mc_top] - split_indx;
7520
7712
  ksize = mc->mc_db->md_pad;
7521
7713
  split = LEAF2KEY(mp, split_indx, ksize);
@@ -7558,12 +7750,14 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7558
7750
  nsize = mdb_leaf_size(env, newkey, newdata);
7559
7751
  else
7560
7752
  nsize = mdb_branch_size(env, newkey);
7561
- nsize += nsize & 1;
7753
+ nsize = EVEN(nsize);
7562
7754
 
7563
7755
  /* grab a page to hold a temporary copy */
7564
7756
  copy = mdb_page_malloc(mc->mc_txn, 1);
7565
- if (copy == NULL)
7566
- return ENOMEM;
7757
+ if (copy == NULL) {
7758
+ rc = ENOMEM;
7759
+ goto done;
7760
+ }
7567
7761
  copy->mp_pgno = mp->mp_pgno;
7568
7762
  copy->mp_flags = mp->mp_flags;
7569
7763
  copy->mp_lower = PAGEHDRSZ;
@@ -7615,7 +7809,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7615
7809
  else
7616
7810
  psize += NODEDSZ(node);
7617
7811
  }
7618
- psize += psize & 1;
7812
+ psize = EVEN(psize);
7619
7813
  }
7620
7814
  if (psize > pmax || i == k-j) {
7621
7815
  split_indx = i + (j<0);
@@ -7643,6 +7837,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7643
7837
  mn.mc_top--;
7644
7838
  did_split = 1;
7645
7839
  rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0);
7840
+ if (rc)
7841
+ goto done;
7646
7842
 
7647
7843
  /* root split? */
7648
7844
  if (mn.mc_snum == mc->mc_snum) {
@@ -7664,7 +7860,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7664
7860
  mc->mc_ki[i] = mn.mc_ki[i];
7665
7861
  }
7666
7862
  mc->mc_pg[ptop] = mn.mc_pg[ptop];
7667
- mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
7863
+ if (mn.mc_ki[ptop]) {
7864
+ mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
7865
+ } else {
7866
+ /* find right page's left sibling */
7867
+ mc->mc_ki[ptop] = mn.mc_ki[ptop];
7868
+ mdb_cursor_sibling(mc, 0);
7869
+ }
7668
7870
  }
7669
7871
  } else {
7670
7872
  mn.mc_top--;
@@ -7673,14 +7875,14 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7673
7875
  }
7674
7876
  mc->mc_flags ^= C_SPLITTING;
7675
7877
  if (rc != MDB_SUCCESS) {
7676
- return rc;
7878
+ goto done;
7677
7879
  }
7678
7880
  if (nflags & MDB_APPEND) {
7679
7881
  mc->mc_pg[mc->mc_top] = rp;
7680
7882
  mc->mc_ki[mc->mc_top] = 0;
7681
7883
  rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags);
7682
7884
  if (rc)
7683
- return rc;
7885
+ goto done;
7684
7886
  for (i=0; i<mc->mc_top; i++)
7685
7887
  mc->mc_ki[i] = mn.mc_ki[i];
7686
7888
  } else if (!IS_LEAF2(mp)) {
@@ -7718,11 +7920,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7718
7920
  }
7719
7921
 
7720
7922
  rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags);
7721
- if (rc) {
7722
- /* return tmp page to freelist */
7723
- mdb_page_free(env, copy);
7724
- return rc;
7725
- }
7923
+ if (rc)
7924
+ goto done;
7726
7925
  if (i == nkeys) {
7727
7926
  i = 0;
7728
7927
  j = 0;
@@ -7756,16 +7955,12 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7756
7955
  */
7757
7956
  if (mn.mc_pg[ptop] != mc->mc_pg[ptop] &&
7758
7957
  mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) {
7759
- for (i=0; i<ptop; i++) {
7958
+ for (i=0; i<=ptop; i++) {
7760
7959
  mc->mc_pg[i] = mn.mc_pg[i];
7761
7960
  mc->mc_ki[i] = mn.mc_ki[i];
7762
7961
  }
7763
- mc->mc_pg[ptop] = mn.mc_pg[ptop];
7764
- mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
7765
7962
  }
7766
7963
  }
7767
- /* return tmp page to freelist */
7768
- mdb_page_free(env, copy);
7769
7964
  }
7770
7965
 
7771
7966
  {
@@ -7816,6 +8011,12 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7816
8011
  }
7817
8012
  }
7818
8013
  DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp)));
8014
+
8015
+ done:
8016
+ if (copy) /* tmp page */
8017
+ mdb_page_free(env, copy);
8018
+ if (rc)
8019
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
7819
8020
  return rc;
7820
8021
  }
7821
8022
 
@@ -7826,10 +8027,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
7826
8027
  MDB_cursor mc;
7827
8028
  MDB_xcursor mx;
7828
8029
 
7829
- assert(key != NULL);
7830
- assert(data != NULL);
7831
-
7832
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8030
+ if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
7833
8031
  return EINVAL;
7834
8032
 
7835
8033
  if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags)
@@ -7861,6 +8059,32 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg)
7861
8059
  return MDB_SUCCESS;
7862
8060
  }
7863
8061
 
8062
+ int
8063
+ mdb_env_set_userctx(MDB_env *env, void *ctx)
8064
+ {
8065
+ if (!env)
8066
+ return EINVAL;
8067
+ env->me_userctx = ctx;
8068
+ return MDB_SUCCESS;
8069
+ }
8070
+
8071
+ void *
8072
+ mdb_env_get_userctx(MDB_env *env)
8073
+ {
8074
+ return env ? env->me_userctx : NULL;
8075
+ }
8076
+
8077
+ int
8078
+ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
8079
+ {
8080
+ if (!env)
8081
+ return EINVAL;
8082
+ #ifndef NDEBUG
8083
+ env->me_assert_func = func;
8084
+ #endif
8085
+ return MDB_SUCCESS;
8086
+ }
8087
+
7864
8088
  int
7865
8089
  mdb_env_get_path(MDB_env *env, const char **arg)
7866
8090
  {
@@ -8062,9 +8286,12 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
8062
8286
 
8063
8287
  int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg)
8064
8288
  {
8065
- if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
8289
+ if (!arg || !TXN_DBI_EXIST(txn, dbi))
8066
8290
  return EINVAL;
8067
8291
 
8292
+ if (txn->mt_flags & MDB_TXN_ERROR)
8293
+ return MDB_BAD_TXN;
8294
+
8068
8295
  if (txn->mt_dbflags[dbi] & DB_STALE) {
8069
8296
  MDB_cursor mc;
8070
8297
  MDB_xcursor mx;
@@ -8089,7 +8316,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
8089
8316
  int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags)
8090
8317
  {
8091
8318
  /* We could return the flags for the FREE_DBI too but what's the point? */
8092
- if (txn == NULL || dbi < MAIN_DBI || dbi >= txn->mt_numdbs)
8319
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8093
8320
  return EINVAL;
8094
8321
  *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS;
8095
8322
  return MDB_SUCCESS;
@@ -8129,22 +8356,22 @@ mdb_drop0(MDB_cursor *mc, int subs)
8129
8356
  memcpy(&pg, NODEDATA(ni), sizeof(pg));
8130
8357
  rc = mdb_page_get(txn, pg, &omp, NULL);
8131
8358
  if (rc != 0)
8132
- return rc;
8133
- assert(IS_OVERFLOW(omp));
8359
+ goto done;
8360
+ mdb_cassert(mc, IS_OVERFLOW(omp));
8134
8361
  rc = mdb_midl_append_range(&txn->mt_free_pgs,
8135
8362
  pg, omp->mp_pages);
8136
8363
  if (rc)
8137
- return rc;
8364
+ goto done;
8138
8365
  } else if (subs && (ni->mn_flags & F_SUBDATA)) {
8139
8366
  mdb_xcursor_init1(mc, ni);
8140
8367
  rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
8141
8368
  if (rc)
8142
- return rc;
8369
+ goto done;
8143
8370
  }
8144
8371
  }
8145
8372
  } else {
8146
8373
  if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0)
8147
- return rc;
8374
+ goto done;
8148
8375
  for (i=0; i<n; i++) {
8149
8376
  pgno_t pg;
8150
8377
  ni = NODEPTR(mp, i);
@@ -8158,6 +8385,8 @@ mdb_drop0(MDB_cursor *mc, int subs)
8158
8385
  mc->mc_ki[mc->mc_top] = i;
8159
8386
  rc = mdb_cursor_sibling(mc, 1);
8160
8387
  if (rc) {
8388
+ if (rc != MDB_NOTFOUND)
8389
+ goto done;
8161
8390
  /* no more siblings, go back to beginning
8162
8391
  * of previous level.
8163
8392
  */
@@ -8171,6 +8400,9 @@ mdb_drop0(MDB_cursor *mc, int subs)
8171
8400
  }
8172
8401
  /* free it */
8173
8402
  rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root);
8403
+ done:
8404
+ if (rc)
8405
+ txn->mt_flags |= MDB_TXN_ERROR;
8174
8406
  } else if (rc == MDB_NOTFOUND) {
8175
8407
  rc = MDB_SUCCESS;
8176
8408
  }
@@ -8182,7 +8414,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
8182
8414
  MDB_cursor *mc, *m2;
8183
8415
  int rc;
8184
8416
 
8185
- if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
8417
+ if ((unsigned)del > 1 || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8186
8418
  return EINVAL;
8187
8419
 
8188
8420
  if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
@@ -8201,10 +8433,12 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
8201
8433
 
8202
8434
  /* Can't delete the main DB */
8203
8435
  if (del && dbi > MAIN_DBI) {
8204
- rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL);
8436
+ rc = mdb_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, 0);
8205
8437
  if (!rc) {
8206
8438
  txn->mt_dbflags[dbi] = DB_STALE;
8207
8439
  mdb_dbi_close(txn->mt_env, dbi);
8440
+ } else {
8441
+ txn->mt_flags |= MDB_TXN_ERROR;
8208
8442
  }
8209
8443
  } else {
8210
8444
  /* reset the DB record, mark it dirty */
@@ -8225,7 +8459,7 @@ leave:
8225
8459
 
8226
8460
  int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8227
8461
  {
8228
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8462
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8229
8463
  return EINVAL;
8230
8464
 
8231
8465
  txn->mt_dbxs[dbi].md_cmp = cmp;
@@ -8234,7 +8468,7 @@ int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8234
8468
 
8235
8469
  int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8236
8470
  {
8237
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8471
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8238
8472
  return EINVAL;
8239
8473
 
8240
8474
  txn->mt_dbxs[dbi].md_dcmp = cmp;
@@ -8243,7 +8477,7 @@ int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8243
8477
 
8244
8478
  int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
8245
8479
  {
8246
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8480
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8247
8481
  return EINVAL;
8248
8482
 
8249
8483
  txn->mt_dbxs[dbi].md_rel = rel;
@@ -8252,7 +8486,7 @@ int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
8252
8486
 
8253
8487
  int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
8254
8488
  {
8255
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8489
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8256
8490
  return EINVAL;
8257
8491
 
8258
8492
  txn->mt_dbxs[dbi].md_relctx = ctx;
@@ -8261,7 +8495,7 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
8261
8495
 
8262
8496
  int mdb_env_get_maxkeysize(MDB_env *env)
8263
8497
  {
8264
- return MDB_MAXKEYSIZE;
8498
+ return ENV_MAXKEY(env);
8265
8499
  }
8266
8500
 
8267
8501
  int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
@@ -8269,7 +8503,7 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
8269
8503
  unsigned int i, rdrs;
8270
8504
  MDB_reader *mr;
8271
8505
  char buf[64];
8272
- int first = 1;
8506
+ int rc = 0, first = 1;
8273
8507
 
8274
8508
  if (!env || !func)
8275
8509
  return -1;
@@ -8280,27 +8514,25 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
8280
8514
  mr = env->me_txns->mti_readers;
8281
8515
  for (i=0; i<rdrs; i++) {
8282
8516
  if (mr[i].mr_pid) {
8283
- size_t tid;
8284
- int rc;
8285
- tid = mr[i].mr_tid;
8286
- if (mr[i].mr_txnid == (txnid_t)-1) {
8287
- sprintf(buf, "%10d %"Z"x -\n", mr[i].mr_pid, tid);
8288
- } else {
8289
- sprintf(buf, "%10d %"Z"x %"Z"u\n", mr[i].mr_pid, tid, mr[i].mr_txnid);
8290
- }
8517
+ txnid_t txnid = mr[i].mr_txnid;
8518
+ sprintf(buf, txnid == (txnid_t)-1 ?
8519
+ "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n",
8520
+ (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid);
8291
8521
  if (first) {
8292
8522
  first = 0;
8293
- func(" pid thread txnid\n", ctx);
8523
+ rc = func(" pid thread txnid\n", ctx);
8524
+ if (rc < 0)
8525
+ break;
8294
8526
  }
8295
8527
  rc = func(buf, ctx);
8296
8528
  if (rc < 0)
8297
- return rc;
8529
+ break;
8298
8530
  }
8299
8531
  }
8300
8532
  if (first) {
8301
- func("(no active readers)\n", ctx);
8533
+ rc = func("(no active readers)\n", ctx);
8302
8534
  }
8303
- return 0;
8535
+ return rc;
8304
8536
  }
8305
8537
 
8306
8538
  /** Insert pid into list if not already present.
@@ -8361,7 +8593,6 @@ int mdb_reader_check(MDB_env *env, int *dead)
8361
8593
  return ENOMEM;
8362
8594
  pids[0] = 0;
8363
8595
  mr = env->me_txns->mti_readers;
8364
- j = 0;
8365
8596
  for (i=0; i<rdrs; i++) {
8366
8597
  if (mr[i].mr_pid && mr[i].mr_pid != env->me_pid) {
8367
8598
  pid = mr[i].mr_pid;