lmdb 0.4.1 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 92f31229585bce51aaa1f94fab800c5f98488869
4
- data.tar.gz: e57e72dac8a031aeb3163c917252160b2c3c21c2
3
+ metadata.gz: 19495dad2d084f39462d5938acbc27ad061f797e
4
+ data.tar.gz: 8dfca9b055ccf2fa62a8bddbd822b8a14b0cb6f5
5
5
  SHA512:
6
- metadata.gz: c7fe9fba9eae7efe4c0a15387906dd006e7f1f9659b8df9b8d7459386e9ea833df4b32e3df8d131d060299d6e948212d26dca9cb2b593d3f9106d55530b4c1b8
7
- data.tar.gz: fc1108b3f94451fd44b29b9f37a30d0cd6641c8c5193552751f970ed46b67f91da32a7da3b044d4443371e515d781b537f6cc2df975169ea1beb6e50bc338514
6
+ metadata.gz: 9d671fbfae42a4d3ed1022deb479cebc57e31b740ba86fb2f125960b8fee38f8020fa0f008b2a35b9a6b2e62b6497b31bcb656dba9f95caafeab1d1f90710d54
7
+ data.tar.gz: 9912e57edd9db4d966fc13c2603488cc1f50acd0560b678e98a5eb965d285b2974b1c06874d89ac69cead95b42b64634b3c2d8d240df81710693b309a792bfdc
@@ -5,11 +5,9 @@ rvm:
5
5
  - 2.0.0
6
6
  - 2.1.0
7
7
  - ruby-head
8
- - rbx-18mode
9
- - rbx-19mode
8
+ - rbx
10
9
  matrix:
11
10
  allow_failures:
12
11
  - rvm: ruby-head
13
12
  - rvm: 1.8.7
14
- - rvm: rbx-18mode
15
- - rvm: rbx-19mode
13
+ - rvm: rbx
data/CHANGES CHANGED
@@ -1,3 +1,8 @@
1
+ 0.4.2
2
+
3
+ * Fix #11, #12, #14.
4
+ * Import lmdb 0.9.12 source.
5
+
1
6
  0.4.1
2
7
 
3
8
  * Fix #10
@@ -4,3 +4,4 @@ Evgeniy Dolzhenko <evgeniy.dolzhenko@blacksquaremedia.com>
4
4
  Julien Ammous <schmurfy@gmail.com>
5
5
  Nathaniel Pierce <nwpierce@gmail.com>
6
6
  Richard Golding <golding@chrysaetos.org>
7
+ Joel VanderWerf <vjoel@users.sourceforge.net>
data/Rakefile CHANGED
@@ -1,5 +1,9 @@
1
1
  #!/usr/bin/env rake
2
2
 
3
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "lib"))
4
+ GEMSPEC = Dir['*.gemspec'].first
5
+ PRJ = File.basename(GEMSPEC, ".gemspec")
6
+
3
7
  require 'bundler/setup'
4
8
  require 'rspec/core/rake_task'
5
9
  require 'rake/extensiontask'
@@ -8,3 +12,58 @@ RSpec::Core::RakeTask.new :spec
8
12
  Rake::ExtensionTask.new :lmdb_ext
9
13
 
10
14
  task :default => [:compile, :spec]
15
+
16
+ def version
17
+ @version ||= begin
18
+ require "#{PRJ}/version"
19
+ warn "LMDB::VERSION not a string" unless LMDB::VERSION.kind_of? String
20
+ LMDB::VERSION
21
+ end
22
+ end
23
+
24
+ def tag
25
+ @tag ||= "v#{version}"
26
+ end
27
+
28
+ def latest
29
+ @latest ||= `git describe --abbrev=0 --tags --match 'v*'`.chomp
30
+ end
31
+
32
+ desc "Commit, tag, and push repo; build and push gem"
33
+ task :release => "release:is_new_version" do
34
+ require 'tempfile'
35
+
36
+ sh "gem build #{GEMSPEC}"
37
+
38
+ file = Tempfile.new "template"
39
+ begin
40
+ file.puts "release #{version}"
41
+ file.close
42
+ sh "git commit --allow-empty -a -v -t #{file.path}"
43
+ ensure
44
+ file.close unless file.closed?
45
+ file.unlink
46
+ end
47
+
48
+ sh "git tag #{tag}"
49
+ sh "git push"
50
+ sh "git push --tags"
51
+
52
+ sh "gem push #{tag}.gem"
53
+ end
54
+
55
+ namespace :release do
56
+ desc "Diff to latest release"
57
+ task :diff do
58
+ sh "git diff #{latest}"
59
+ end
60
+
61
+ desc "Log to latest release"
62
+ task :log do
63
+ sh "git log #{latest}.."
64
+ end
65
+
66
+ task :is_new_version do
67
+ abort "#{tag} exists; update version!" unless `git tag -l #{tag}`.empty?
68
+ end
69
+ end
@@ -1,5 +1,29 @@
1
1
  LMDB 0.9 Change Log
2
2
 
3
+ LMDB 0.9.12 Release (2014/06/13)
4
+ Fix MDB_GET_BOTH regression (ITS#7875,#7681)
5
+ Fix MDB_MULTIPLE writing multiple keys (ITS#7834)
6
+ Fix mdb_rebalance (ITS#7829)
7
+ Fix mdb_page_split (ITS#7815)
8
+ Fix md_entries count (ITS#7861,#7828,#7793)
9
+ Fix MDB_CURRENT (ITS#7793)
10
+ Fix possible crash on Windows DLL detach
11
+ Misc code cleanup
12
+ Documentation
13
+ mdb_cursor_put: cursor moves on error (ITS#7771)
14
+
15
+
16
+ LMDB 0.9.11 Release (2014/01/15)
17
+ Add mdb_env_set_assert() (ITS#7775)
18
+ Fix: invalidate txn on page allocation errors (ITS#7377)
19
+ Fix xcursor tracking in mdb_cursor_del0() (ITS#7771)
20
+ Fix corruption from deletes (ITS#7756)
21
+ Fix Windows/MSVC build issues
22
+ Raise safe limit of max MDB_MAXKEYSIZE
23
+ Misc code cleanup
24
+ Documentation
25
+ Remove spurious note about non-overlapping flags (ITS#7665)
26
+
3
27
  LMDB 0.9.10 Release (2013/11/12)
4
28
  Add MDB_NOMEMINIT option
5
29
  Fix mdb_page_split() again (ITS#7589)
@@ -16,7 +40,7 @@ LMDB 0.9.9 Release (2013/10/24)
16
40
  Fix mdb_page_merge() cursor fixup (ITS#7722)
17
41
  Fix mdb_cursor_del() on last delete (ITS#7718)
18
42
  Fix adding WRITEMAP on existing env (ITS#7715)
19
- Fixes for nested txns (ITS#7515)
43
+ Fix nested txns (ITS#7515)
20
44
  Fix mdb_env_copy() O_DIRECT bug (ITS#7682)
21
45
  Fix mdb_cursor_set(SET_RANGE) return code (ITS#7681)
22
46
  Fix mdb_rebalance() cursor fixup (ITS#7701)
@@ -1,4 +1,4 @@
1
- Copyright 2011-2013 Howard Chu, Symas Corp.
1
+ Copyright 2011-2014 Howard Chu, Symas Corp.
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
@@ -119,7 +119,7 @@
119
119
  *
120
120
  * @author Howard Chu, Symas Corporation.
121
121
  *
122
- * @copyright Copyright 2011-2013 Howard Chu, Symas Corp. All rights reserved.
122
+ * @copyright Copyright 2011-2014 Howard Chu, Symas Corp. All rights reserved.
123
123
  *
124
124
  * Redistribution and use in source and binary forms, with or without
125
125
  * modification, are permitted only as authorized by the OpenLDAP
@@ -184,7 +184,7 @@ typedef int mdb_filehandle_t;
184
184
  /** Library minor version */
185
185
  #define MDB_VERSION_MINOR 9
186
186
  /** Library patch version */
187
- #define MDB_VERSION_PATCH 10
187
+ #define MDB_VERSION_PATCH 12
188
188
 
189
189
  /** Combine args a,b,c into a single integer for easy version comparisons */
190
190
  #define MDB_VERINT(a,b,c) (((a) << 24) | ((b) << 16) | (c))
@@ -194,7 +194,7 @@ typedef int mdb_filehandle_t;
194
194
  MDB_VERINT(MDB_VERSION_MAJOR,MDB_VERSION_MINOR,MDB_VERSION_PATCH)
195
195
 
196
196
  /** The release date of this library version */
197
- #define MDB_VERSION_DATE "November 11, 2013"
197
+ #define MDB_VERSION_DATE "June 13, 2014"
198
198
 
199
199
  /** A stringifier for the version info */
200
200
  #define MDB_VERSTR(a,b,c,d) "MDB " #a "." #b "." #c ": (" d ")"
@@ -263,8 +263,6 @@ typedef int (MDB_cmp_func)(const MDB_val *a, const MDB_val *b);
263
263
  typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *relctx);
264
264
 
265
265
  /** @defgroup mdb_env Environment Flags
266
- *
267
- * Values do not overlap Database Flags.
268
266
  * @{
269
267
  */
270
268
  /** mmap at a fixed address (experimental) */
@@ -292,8 +290,6 @@ typedef void (MDB_rel_func)(MDB_val *item, void *oldptr, void *newptr, void *rel
292
290
  /** @} */
293
291
 
294
292
  /** @defgroup mdb_dbi_open Database Flags
295
- *
296
- * Values do not overlap Environment Flags.
297
293
  * @{
298
294
  */
299
295
  /** use reverse string keys */
@@ -412,7 +408,7 @@ typedef enum MDB_cursor_op {
412
408
  #define MDB_BAD_RSLOT (-30783)
413
409
  /** Transaction cannot recover - it must be aborted */
414
410
  #define MDB_BAD_TXN (-30782)
415
- /** Too big key/data, key is empty, or wrong DUPFIXED size */
411
+ /** Unsupported size of key/DB name/data, or wrong DUPFIXED size */
416
412
  #define MDB_BAD_VALSIZE (-30781)
417
413
  #define MDB_LAST_ERRCODE MDB_BAD_VALSIZE
418
414
  /** @} */
@@ -672,7 +668,8 @@ void mdb_env_close(MDB_env *env);
672
668
  /** @brief Set environment flags.
673
669
  *
674
670
  * This may be used to set some flags in addition to those from
675
- * #mdb_env_open(), or to unset these flags.
671
+ * #mdb_env_open(), or to unset these flags. If several threads
672
+ * change the flags at the same time, the result is undefined.
676
673
  * @param[in] env An environment handle returned by #mdb_env_create()
677
674
  * @param[in] flags The flags to change, bitwise OR'ed together
678
675
  * @param[in] onoff A non-zero value sets the flags, zero clears them.
@@ -787,6 +784,10 @@ int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
787
784
  * environment. Simpler applications that use the environment as a single
788
785
  * unnamed database can ignore this option.
789
786
  * This function may only be called after #mdb_env_create() and before #mdb_env_open().
787
+ *
788
+ * Currently a moderate number of slots are cheap but a huge number gets
789
+ * expensive: 7-120 words per transaction, and every #mdb_dbi_open()
790
+ * does a linear search of the opened slots.
790
791
  * @param[in] env An environment handle returned by #mdb_env_create()
791
792
  * @param[in] dbs The maximum number of databases
792
793
  * @return A non-zero error value on failure and 0 on success. Some possible
@@ -797,15 +798,47 @@ int mdb_env_get_maxreaders(MDB_env *env, unsigned int *readers);
797
798
  */
798
799
  int mdb_env_set_maxdbs(MDB_env *env, MDB_dbi dbs);
799
800
 
800
- /** @brief Get the maximum size of a key for the environment.
801
+ /** @brief Get the maximum size of keys and #MDB_DUPSORT data we can write.
801
802
  *
802
- * This is the compile-time constant #MDB_MAXKEYSIZE, default 511.
803
+ * Depends on the compile-time constant #MDB_MAXKEYSIZE. Default 511.
803
804
  * See @ref MDB_val.
804
805
  * @param[in] env An environment handle returned by #mdb_env_create()
805
- * @return The maximum size of a key
806
+ * @return The maximum size of a key we can write
806
807
  */
807
808
  int mdb_env_get_maxkeysize(MDB_env *env);
808
809
 
810
+ /** @brief Set application information associated with the #MDB_env.
811
+ *
812
+ * @param[in] env An environment handle returned by #mdb_env_create()
813
+ * @param[in] ctx An arbitrary pointer for whatever the application needs.
814
+ * @return A non-zero error value on failure and 0 on success.
815
+ */
816
+ int mdb_env_set_userctx(MDB_env *env, void *ctx);
817
+
818
+ /** @brief Get the application information associated with the #MDB_env.
819
+ *
820
+ * @param[in] env An environment handle returned by #mdb_env_create()
821
+ * @return The pointer set by #mdb_env_set_userctx().
822
+ */
823
+ void *mdb_env_get_userctx(MDB_env *env);
824
+
825
+ /** @brief A callback function for most MDB assert() failures,
826
+ * called before printing the message and aborting.
827
+ *
828
+ * @param[in] env An environment handle returned by #mdb_env_create().
829
+ * @param[in] msg The assertion message, not including newline.
830
+ */
831
+ typedef void MDB_assert_func(MDB_env *env, const char *msg);
832
+
833
+ /** Set or reset the assert() callback of the environment.
834
+ * Disabled if liblmdb is buillt with NDEBUG.
835
+ * @note This hack should become obsolete as lmdb's error handling matures.
836
+ * @param[in] env An environment handle returned by #mdb_env_create().
837
+ * @param[in] func An #MDB_assert_func function, or 0.
838
+ * @return A non-zero error value on failure and 0 on success.
839
+ */
840
+ int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
841
+
809
842
  /** @brief Create a transaction for use with the environment.
810
843
  *
811
844
  * The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
@@ -922,7 +955,7 @@ int mdb_txn_renew(MDB_txn *txn);
922
955
  * independently of whether such a database exists.
923
956
  * The database handle may be discarded by calling #mdb_dbi_close().
924
957
  * The old database handle is returned if the database was already open.
925
- * The handle must only be closed once.
958
+ * The handle may only be closed once.
926
959
  * The database handle will be private to the current transaction until
927
960
  * the transaction is successfully committed. If the transaction is
928
961
  * aborted the handle will be closed automatically.
@@ -934,7 +967,8 @@ int mdb_txn_renew(MDB_txn *txn);
934
967
  * use this function.
935
968
  *
936
969
  * To use named databases (with name != NULL), #mdb_env_set_maxdbs()
937
- * must be called before opening the environment.
970
+ * must be called before opening the environment. Database names
971
+ * are kept as keys in the unnamed database.
938
972
  * @param[in] txn A transaction handle returned by #mdb_txn_begin()
939
973
  * @param[in] name The name of the database to open. If only a single
940
974
  * database is needed in the environment, this value may be NULL.
@@ -1004,12 +1038,19 @@ int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
1004
1038
  */
1005
1039
  int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags);
1006
1040
 
1007
- /** @brief Close a database handle.
1041
+ /** @brief Close a database handle. Normally unnecessary. Use with care:
1008
1042
  *
1009
1043
  * This call is not mutex protected. Handles should only be closed by
1010
1044
  * a single thread, and only if no other threads are going to reference
1011
1045
  * the database handle or one of its cursors any further. Do not close
1012
1046
  * a handle if an existing transaction has modified its database.
1047
+ * Doing so can cause misbehavior from database corruption to errors
1048
+ * like MDB_BAD_VALSIZE (since the DB name is gone).
1049
+ *
1050
+ * Closing a database handle is not necessary, but lets #mdb_dbi_open()
1051
+ * reuse the handle value. Usually it's better to set a bigger
1052
+ * #mdb_env_set_maxdbs(), unless that value would be large.
1053
+ *
1013
1054
  * @param[in] env An environment handle returned by #mdb_env_create()
1014
1055
  * @param[in] dbi A database handle returned by #mdb_dbi_open()
1015
1056
  */
@@ -1017,6 +1058,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi);
1017
1058
 
1018
1059
  /** @brief Empty or delete+close a database.
1019
1060
  *
1061
+ * See #mdb_dbi_close() for restrictions about closing the DB handle.
1020
1062
  * @param[in] txn A transaction handle returned by #mdb_txn_begin()
1021
1063
  * @param[in] dbi A database handle returned by #mdb_dbi_open()
1022
1064
  * @param[in] del 0 to empty the DB, 1 to delete it from the
@@ -1294,9 +1336,9 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
1294
1336
  /** @brief Store by cursor.
1295
1337
  *
1296
1338
  * This function stores key/data pairs into the database.
1297
- * If the function fails for any reason, the state of the cursor will be
1298
- * unchanged. If the function succeeds and an item is inserted into the
1299
- * database, the cursor is always positioned to refer to the newly inserted item.
1339
+ * The cursor is positioned at the new item, or on failure usually near it.
1340
+ * @note Earlier documentation incorrectly said errors would leave the
1341
+ * state of the cursor unchanged.
1300
1342
  * @param[in] cursor A cursor handle returned by #mdb_cursor_open()
1301
1343
  * @param[in] key The key operated on.
1302
1344
  * @param[in] data The data operated on.
@@ -1305,7 +1347,9 @@ int mdb_cursor_get(MDB_cursor *cursor, MDB_val *key, MDB_val *data,
1305
1347
  * <ul>
1306
1348
  * <li>#MDB_CURRENT - overwrite the data of the key/data pair to which
1307
1349
  * the cursor refers with the specified data item. The \b key
1308
- * parameter is ignored.
1350
+ * parameter is not used for positioning the cursor, but should
1351
+ * still be provided. If using sorted duplicates (#MDB_DUPSORT)
1352
+ * the data item must still sort into the same place.
1309
1353
  * <li>#MDB_NODUPDATA - enter the new key/data pair only if it does not
1310
1354
  * already appear in the database. This flag may only be specified
1311
1355
  * if the database was opened with #MDB_DUPSORT. The function will
@@ -1409,7 +1453,7 @@ int mdb_dcmp(MDB_txn *txn, MDB_dbi dbi, const MDB_val *a, const MDB_val *b);
1409
1453
  *
1410
1454
  * @param[in] msg The string to be printed.
1411
1455
  * @param[in] ctx An arbitrary context pointer for the callback.
1412
- * @return < 0 on failure, 0 on success.
1456
+ * @return < 0 on failure, >= 0 on success.
1413
1457
  */
1414
1458
  typedef int (MDB_msg_func)(const char *msg, void *ctx);
1415
1459
 
@@ -1418,7 +1462,7 @@ typedef int (MDB_msg_func)(const char *msg, void *ctx);
1418
1462
  * @param[in] env An environment handle returned by #mdb_env_create()
1419
1463
  * @param[in] func A #MDB_msg_func function
1420
1464
  * @param[in] ctx Anything the message function needs
1421
- * @return < 0 on failure, 0 on success.
1465
+ * @return < 0 on failure, >= 0 on success.
1422
1466
  */
1423
1467
  int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
1424
1468
 
@@ -65,7 +65,6 @@
65
65
  #include <fcntl.h>
66
66
  #endif
67
67
 
68
- #include <assert.h>
69
68
  #include <errno.h>
70
69
  #include <limits.h>
71
70
  #include <stddef.h>
@@ -149,13 +148,24 @@
149
148
  /** @defgroup internal MDB Internals
150
149
  * @{
151
150
  */
152
- /** @defgroup compat Windows Compatibility Macros
151
+ /** @defgroup compat Compatibility Macros
153
152
  * A bunch of macros to minimize the amount of platform-specific ifdefs
154
153
  * needed throughout the rest of the code. When the features this library
155
154
  * needs are similar enough to POSIX to be hidden in a one-or-two line
156
155
  * replacement, this macro approach is used.
157
156
  * @{
158
157
  */
158
+
159
+ /** Wrapper around __func__, which is a C99 feature */
160
+ #if __STDC_VERSION__ >= 199901L
161
+ # define mdb_func_ __func__
162
+ #elif __GNUC__ >= 2 || _MSC_VER >= 1300
163
+ # define mdb_func_ __FUNCTION__
164
+ #else
165
+ /* If a debug message says <mdb_unknown>(), update the #if statements above */
166
+ # define mdb_func_ "<mdb_unknown>"
167
+ #endif
168
+
159
169
  #ifdef _WIN32
160
170
  #define MDB_USE_HASH 1
161
171
  #define MDB_PIDLOCK 0
@@ -327,7 +337,7 @@ static txnid_t mdb_debug_start;
327
337
  */
328
338
  # define DPRINTF(args) ((void) ((mdb_debug) && DPRINTF0 args))
329
339
  # define DPRINTF0(fmt, ...) \
330
- fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__)
340
+ fprintf(stderr, "%s:%d " fmt "\n", mdb_func_, __LINE__, __VA_ARGS__)
331
341
  #else
332
342
  # define DPRINTF(args) ((void) 0)
333
343
  #endif
@@ -342,7 +352,7 @@ static txnid_t mdb_debug_start;
342
352
 
343
353
  /** @brief The maximum size of a database page.
344
354
  *
345
- * This is 32k, since it must fit in #MDB_page.#mp_upper.
355
+ * This is 32k, since it must fit in #MDB_page.%mp_upper.
346
356
  *
347
357
  * LMDB will use database pages < OS pages if needed.
348
358
  * That causes more I/O in write transactions: The OS must
@@ -382,20 +392,25 @@ static txnid_t mdb_debug_start;
382
392
  /** The version number for a database's lockfile format. */
383
393
  #define MDB_LOCK_VERSION 1
384
394
 
385
- /** @brief The maximum size of a key in the database.
386
- *
387
- * The library rejects bigger keys, and cannot deal with records
388
- * with bigger keys stored by a library with bigger max keysize.
395
+ /** @brief The max size of a key we can write, or 0 for dynamic max.
389
396
  *
390
- * We require that keys all fit onto a regular page. This limit
391
- * could be raised a bit further if needed; to something just
392
- * under (page size / #MDB_MINKEYS / 3).
397
+ * Define this as 0 to compute the max from the page size. 511
398
+ * is default for backwards compat: liblmdb <= 0.9.10 can break
399
+ * when modifying a DB with keys/dupsort data bigger than its max.
393
400
  *
394
- * Note that data items in an #MDB_DUPSORT database are actually keys
395
- * of a subDB, so they're also limited to this size.
401
+ * Data items in an #MDB_DUPSORT database are also limited to
402
+ * this size, since they're actually keys of a sub-DB. Keys and
403
+ * #MDB_DUPSORT data items must fit on a node in a regular page.
396
404
  */
397
405
  #ifndef MDB_MAXKEYSIZE
398
406
  #define MDB_MAXKEYSIZE 511
407
+ #endif
408
+
409
+ /** The maximum size of a key we can write to the environment. */
410
+ #if MDB_MAXKEYSIZE
411
+ #define ENV_MAXKEY(env) (MDB_MAXKEYSIZE)
412
+ #else
413
+ #define ENV_MAXKEY(env) ((env)->me_maxkey)
399
414
  #endif
400
415
 
401
416
  /** @brief The maximum size of a data item.
@@ -405,11 +420,15 @@ static txnid_t mdb_debug_start;
405
420
  #define MAXDATASIZE 0xffffffffUL
406
421
 
407
422
  #if MDB_DEBUG
423
+ /** Key size which fits in a #DKBUF.
424
+ * @ingroup debug
425
+ */
426
+ #define DKBUF_MAXKEYSIZE ((MDB_MAXKEYSIZE) > 0 ? (MDB_MAXKEYSIZE) : 511)
408
427
  /** A key buffer.
409
428
  * @ingroup debug
410
429
  * This is used for printing a hex dump of a key's contents.
411
430
  */
412
- #define DKBUF char kbuf[(MDB_MAXKEYSIZE*2+1)]
431
+ #define DKBUF char kbuf[DKBUF_MAXKEYSIZE*2+1]
413
432
  /** Display a key in hex.
414
433
  * @ingroup debug
415
434
  * Invoke a function to display a key in hex.
@@ -428,6 +447,9 @@ static txnid_t mdb_debug_start;
428
447
  /** Test if the flags \b f are set in a flag word \b w. */
429
448
  #define F_ISSET(w, f) (((w) & (f)) == (f))
430
449
 
450
+ /** Round \b n up to an even number. */
451
+ #define EVEN(n) (((n) + 1U) & -2) /* sign-extending -2 to match n+1U */
452
+
431
453
  /** Used for offsets within a single page.
432
454
  * Since memory pages are typically 4 or 8KB in size, 12-13 bits,
433
455
  * this is plenty.
@@ -679,7 +701,8 @@ typedef struct MDB_page {
679
701
  #define OVPAGES(size, psize) ((PAGEHDRSZ-1 + (size)) / (psize) + 1)
680
702
 
681
703
  /** Header for a single key/data pair within a page.
682
- * We guarantee 2-byte alignment for nodes.
704
+ * Used in pages of type #P_BRANCH and #P_LEAF without #P_LEAF2.
705
+ * We guarantee 2-byte alignment for 'MDB_node's.
683
706
  */
684
707
  typedef struct MDB_node {
685
708
  /** lo and hi are used for data size on leaf nodes and for
@@ -688,9 +711,11 @@ typedef struct MDB_node {
688
711
  * They are in host byte order in case that lets some
689
712
  * accesses be optimized into a 32-bit word access.
690
713
  */
691
- #define mn_lo mn_offset[BYTE_ORDER!=LITTLE_ENDIAN]
692
- #define mn_hi mn_offset[BYTE_ORDER==LITTLE_ENDIAN] /**< part of dsize or pgno */
693
- unsigned short mn_offset[2]; /**< storage for #mn_lo and #mn_hi */
714
+ #if BYTE_ORDER == LITTLE_ENDIAN
715
+ unsigned short mn_lo, mn_hi; /**< part of data size or pgno */
716
+ #else
717
+ unsigned short mn_hi, mn_lo;
718
+ #endif
694
719
  /** @defgroup mdb_node Node Flags
695
720
  * @ingroup internal
696
721
  * Flags for node headers.
@@ -911,12 +936,12 @@ struct MDB_txn {
911
936
  * @{
912
937
  */
913
938
  #define MDB_TXN_RDONLY 0x01 /**< read-only transaction */
914
- #define MDB_TXN_ERROR 0x02 /**< an error has occurred */
939
+ #define MDB_TXN_ERROR 0x02 /**< txn is unusable after an error */
915
940
  #define MDB_TXN_DIRTY 0x04 /**< must write, even if dirty list is empty */
916
941
  #define MDB_TXN_SPILLS 0x08 /**< txn or a parent has spilled pages */
917
942
  /** @} */
918
943
  unsigned int mt_flags; /**< @ref mdb_txn */
919
- /** dirty_list room: Array size - #dirty pages visible to this txn.
944
+ /** #dirty_list room: Array size - \#dirty pages visible to this txn.
920
945
  * Includes ancestor txns' dirty pages not hidden by other txns'
921
946
  * dirty/spilled pages. Thus commit(nested txn) has room to merge
922
947
  * dirty_list into mt_parent after freeing hidden mt_parent pages.
@@ -1009,8 +1034,6 @@ struct MDB_env {
1009
1034
  #define MDB_ENV_ACTIVE 0x20000000U
1010
1035
  /** me_txkey is set */
1011
1036
  #define MDB_ENV_TXKEY 0x10000000U
1012
- /** Have liveness lock in reader table */
1013
- #define MDB_LIVE_READER 0x08000000U
1014
1037
  uint32_t me_flags; /**< @ref mdb_env */
1015
1038
  unsigned int me_psize; /**< DB page size, inited from me_os_psize */
1016
1039
  unsigned int me_os_psize; /**< OS page size, from #GET_PAGESIZE */
@@ -1043,6 +1066,10 @@ struct MDB_env {
1043
1066
  int me_maxfree_1pg;
1044
1067
  /** Max size of a node on a page */
1045
1068
  unsigned int me_nodemax;
1069
+ #if !(MDB_MAXKEYSIZE)
1070
+ unsigned int me_maxkey; /**< max size of a key */
1071
+ #endif
1072
+ int me_live_reader; /**< have liveness lock in reader table */
1046
1073
  #ifdef _WIN32
1047
1074
  int me_pidquery; /**< Used in OpenProcess */
1048
1075
  HANDLE me_rmutex; /* Windows mutexes don't reside in shared mem */
@@ -1051,6 +1078,8 @@ struct MDB_env {
1051
1078
  sem_t *me_rmutex; /* Shared mutexes are not supported */
1052
1079
  sem_t *me_wmutex;
1053
1080
  #endif
1081
+ void *me_userctx; /**< User-settable context */
1082
+ MDB_assert_func *me_assert_func; /**< Callback for assertion failures */
1054
1083
  };
1055
1084
 
1056
1085
  /** Nested transaction */
@@ -1066,9 +1095,13 @@ typedef struct MDB_ntxn {
1066
1095
  #define MDB_COMMIT_PAGES IOV_MAX
1067
1096
  #endif
1068
1097
 
1069
- /* max bytes to write in one call */
1098
+ /** max bytes to write in one call */
1070
1099
  #define MAX_WRITE (0x80000000U >> (sizeof(ssize_t) == 4))
1071
1100
 
1101
+ /** Check \b txn and \b dbi arguments to a function */
1102
+ #define TXN_DBI_EXIST(txn, dbi) \
1103
+ ((txn) && (dbi) < (txn)->mt_numdbs && ((txn)->mt_dbflags[dbi] & DB_VALID))
1104
+
1072
1105
  static int mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp);
1073
1106
  static int mdb_page_new(MDB_cursor *mc, uint32_t flags, int num, MDB_page **mp);
1074
1107
  static int mdb_page_touch(MDB_cursor *mc);
@@ -1099,7 +1132,7 @@ static void mdb_env_close0(MDB_env *env, int excl);
1099
1132
  static MDB_node *mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp);
1100
1133
  static int mdb_node_add(MDB_cursor *mc, indx_t indx,
1101
1134
  MDB_val *key, MDB_val *data, pgno_t pgno, unsigned int flags);
1102
- static void mdb_node_del(MDB_page *mp, indx_t indx, int ksize);
1135
+ static void mdb_node_del(MDB_cursor *mc, int ksize);
1103
1136
  static void mdb_node_shrink(MDB_page *mp, indx_t indx);
1104
1137
  static int mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst);
1105
1138
  static int mdb_node_read(MDB_txn *txn, MDB_node *leaf, MDB_val *data);
@@ -1112,7 +1145,8 @@ static int mdb_update_key(MDB_cursor *mc, MDB_val *key);
1112
1145
  static void mdb_cursor_pop(MDB_cursor *mc);
1113
1146
  static int mdb_cursor_push(MDB_cursor *mc, MDB_page *mp);
1114
1147
 
1115
- static int mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf);
1148
+ static int mdb_cursor_del0(MDB_cursor *mc);
1149
+ static int mdb_del0(MDB_txn *txn, MDB_dbi dbi, MDB_val *key, MDB_val *data, unsigned flags);
1116
1150
  static int mdb_cursor_sibling(MDB_cursor *mc, int move_right);
1117
1151
  static int mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
1118
1152
  static int mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op);
@@ -1168,7 +1202,7 @@ static char *const mdb_errstr[] = {
1168
1202
  "MDB_INCOMPATIBLE: Operation and DB incompatible, or DB flags changed",
1169
1203
  "MDB_BAD_RSLOT: Invalid reuse of reader locktable slot",
1170
1204
  "MDB_BAD_TXN: Transaction cannot recover - it must be aborted",
1171
- "MDB_BAD_VALSIZE: Too big key/data, key is empty, or wrong DUPFIXED size",
1205
+ "MDB_BAD_VALSIZE: Unsupported size of key/DB name/data, or wrong DUPFIXED size",
1172
1206
  };
1173
1207
 
1174
1208
  char *
@@ -1186,7 +1220,43 @@ mdb_strerror(int err)
1186
1220
  return strerror(err);
1187
1221
  }
1188
1222
 
1223
+ /** assert(3) variant in cursor context */
1224
+ #define mdb_cassert(mc, expr) mdb_assert0((mc)->mc_txn->mt_env, expr, #expr)
1225
+ /** assert(3) variant in transaction context */
1226
+ #define mdb_tassert(mc, expr) mdb_assert0((txn)->mt_env, expr, #expr)
1227
+ /** assert(3) variant in environment context */
1228
+ #define mdb_eassert(env, expr) mdb_assert0(env, expr, #expr)
1229
+
1230
+ #ifndef NDEBUG
1231
+ # define mdb_assert0(env, expr, expr_txt) ((expr) ? (void)0 : \
1232
+ mdb_assert_fail(env, expr_txt, mdb_func_, __FILE__, __LINE__))
1233
+
1234
+ static void
1235
+ mdb_assert_fail(MDB_env *env, const char *expr_txt,
1236
+ const char *func, const char *file, int line)
1237
+ {
1238
+ char buf[400];
1239
+ sprintf(buf, "%.100s:%d: Assertion '%.200s' failed in %.40s()",
1240
+ file, line, expr_txt, func);
1241
+ if (env->me_assert_func)
1242
+ env->me_assert_func(env, buf);
1243
+ fprintf(stderr, "%s\n", buf);
1244
+ abort();
1245
+ }
1246
+ #else
1247
+ # define mdb_assert0(env, expr, expr_txt) ((void) 0)
1248
+ #endif /* NDEBUG */
1249
+
1189
1250
  #if MDB_DEBUG
1251
+ /** Return the page number of \b mp which may be sub-page, for debug output */
1252
+ static pgno_t
1253
+ mdb_dbg_pgno(MDB_page *mp)
1254
+ {
1255
+ pgno_t ret;
1256
+ COPY_PGNO(ret, mp->mp_pgno);
1257
+ return ret;
1258
+ }
1259
+
1190
1260
  /** Display a key in hexadecimal and return the address of the result.
1191
1261
  * @param[in] key the key to display
1192
1262
  * @param[in] buf the buffer to write into. Should always be #DKBUF.
@@ -1202,7 +1272,7 @@ mdb_dkey(MDB_val *key, char *buf)
1202
1272
  if (!key)
1203
1273
  return "";
1204
1274
 
1205
- if (key->mv_size > MDB_MAXKEYSIZE)
1275
+ if (key->mv_size > DKBUF_MAXKEYSIZE)
1206
1276
  return "MDB_MAXKEYSIZE";
1207
1277
  /* may want to make this a dynamic check: if the key is mostly
1208
1278
  * printable characters, print it as-is instead of converting to hex.
@@ -1217,33 +1287,77 @@ mdb_dkey(MDB_val *key, char *buf)
1217
1287
  return buf;
1218
1288
  }
1219
1289
 
1290
+ static const char *
1291
+ mdb_leafnode_type(MDB_node *n)
1292
+ {
1293
+ static char *const tp[2][2] = {{"", ": DB"}, {": sub-page", ": sub-DB"}};
1294
+ return F_ISSET(n->mn_flags, F_BIGDATA) ? ": overflow page" :
1295
+ tp[F_ISSET(n->mn_flags, F_DUPDATA)][F_ISSET(n->mn_flags, F_SUBDATA)];
1296
+ }
1297
+
1220
1298
  /** Display all the keys in the page. */
1221
1299
  void
1222
1300
  mdb_page_list(MDB_page *mp)
1223
1301
  {
1302
+ pgno_t pgno = mdb_dbg_pgno(mp);
1303
+ const char *type, *state = (mp->mp_flags & P_DIRTY) ? ", dirty" : "";
1224
1304
  MDB_node *node;
1225
- unsigned int i, nkeys, nsize;
1305
+ unsigned int i, nkeys, nsize, total = 0;
1226
1306
  MDB_val key;
1227
1307
  DKBUF;
1228
1308
 
1309
+ switch (mp->mp_flags & (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP)) {
1310
+ case P_BRANCH: type = "Branch page"; break;
1311
+ case P_LEAF: type = "Leaf page"; break;
1312
+ case P_LEAF|P_SUBP: type = "Sub-page"; break;
1313
+ case P_LEAF|P_LEAF2: type = "LEAF2 page"; break;
1314
+ case P_LEAF|P_LEAF2|P_SUBP: type = "LEAF2 sub-page"; break;
1315
+ case P_OVERFLOW:
1316
+ fprintf(stderr, "Overflow page %"Z"u pages %u%s\n",
1317
+ pgno, mp->mp_pages, state);
1318
+ return;
1319
+ case P_META:
1320
+ fprintf(stderr, "Meta-page %"Z"u txnid %"Z"u\n",
1321
+ pgno, ((MDB_meta *)METADATA(mp))->mm_txnid);
1322
+ return;
1323
+ default:
1324
+ fprintf(stderr, "Bad page %"Z"u flags 0x%u\n", pgno, mp->mp_flags);
1325
+ return;
1326
+ }
1327
+
1229
1328
  nkeys = NUMKEYS(mp);
1230
- fprintf(stderr, "Page %"Z"u numkeys %d\n", mp->mp_pgno, nkeys);
1329
+ fprintf(stderr, "%s %"Z"u numkeys %d%s\n", type, pgno, nkeys, state);
1330
+
1231
1331
  for (i=0; i<nkeys; i++) {
1332
+ if (IS_LEAF2(mp)) { /* LEAF2 pages have no mp_ptrs[] or node headers */
1333
+ key.mv_size = nsize = mp->mp_pad;
1334
+ key.mv_data = LEAF2KEY(mp, i, nsize);
1335
+ total += nsize;
1336
+ fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
1337
+ continue;
1338
+ }
1232
1339
  node = NODEPTR(mp, i);
1233
1340
  key.mv_size = node->mn_ksize;
1234
1341
  key.mv_data = node->mn_data;
1235
- nsize = NODESIZE + NODEKSZ(node) + sizeof(indx_t);
1342
+ nsize = NODESIZE + key.mv_size;
1236
1343
  if (IS_BRANCH(mp)) {
1237
1344
  fprintf(stderr, "key %d: page %"Z"u, %s\n", i, NODEPGNO(node),
1238
1345
  DKEY(&key));
1346
+ total += nsize;
1239
1347
  } else {
1240
1348
  if (F_ISSET(node->mn_flags, F_BIGDATA))
1241
1349
  nsize += sizeof(pgno_t);
1242
1350
  else
1243
1351
  nsize += NODEDSZ(node);
1244
- fprintf(stderr, "key %d: nsize %d, %s\n", i, nsize, DKEY(&key));
1352
+ total += nsize;
1353
+ nsize += sizeof(indx_t);
1354
+ fprintf(stderr, "key %d: nsize %d, %s%s\n",
1355
+ i, nsize, DKEY(&key), mdb_leafnode_type(node));
1245
1356
  }
1357
+ total = EVEN(total);
1246
1358
  }
1359
+ fprintf(stderr, "Total: header %d + contents %d + unused %d\n",
1360
+ IS_LEAF2(mp) ? PAGEHDRSZ : mp->mp_lower, total, SIZELEFT(mp));
1247
1361
  }
1248
1362
 
1249
1363
  void
@@ -1269,6 +1383,7 @@ mdb_cursor_chk(MDB_cursor *mc)
1269
1383
  /** Count all the pages in each DB and in the freelist
1270
1384
  * and make sure it matches the actual number of pages
1271
1385
  * being used.
1386
+ * All named DBs must be open for a correct count.
1272
1387
  */
1273
1388
  static void mdb_audit(MDB_txn *txn)
1274
1389
  {
@@ -1282,10 +1397,13 @@ static void mdb_audit(MDB_txn *txn)
1282
1397
  mdb_cursor_init(&mc, txn, FREE_DBI, NULL);
1283
1398
  while ((rc = mdb_cursor_get(&mc, &key, &data, MDB_NEXT)) == 0)
1284
1399
  freecount += *(MDB_ID *)data.mv_data;
1400
+ mdb_tassert(txn, rc == MDB_NOTFOUND);
1285
1401
 
1286
1402
  count = 0;
1287
1403
  for (i = 0; i<txn->mt_numdbs; i++) {
1288
1404
  MDB_xcursor mx;
1405
+ if (!(txn->mt_dbflags[i] & DB_VALID))
1406
+ continue;
1289
1407
  mdb_cursor_init(&mc, txn, i, &mx);
1290
1408
  if (txn->mt_dbs[i].md_root == P_INVALID)
1291
1409
  continue;
@@ -1293,8 +1411,8 @@ static void mdb_audit(MDB_txn *txn)
1293
1411
  txn->mt_dbs[i].md_leaf_pages +
1294
1412
  txn->mt_dbs[i].md_overflow_pages;
1295
1413
  if (txn->mt_dbs[i].md_flags & MDB_DUPSORT) {
1296
- mdb_page_search(&mc, NULL, MDB_PS_FIRST);
1297
- do {
1414
+ rc = mdb_page_search(&mc, NULL, MDB_PS_FIRST);
1415
+ for (; rc == MDB_SUCCESS; rc = mdb_cursor_sibling(&mc, 1)) {
1298
1416
  unsigned j;
1299
1417
  MDB_page *mp;
1300
1418
  mp = mc.mc_pg[mc.mc_top];
@@ -1308,7 +1426,7 @@ static void mdb_audit(MDB_txn *txn)
1308
1426
  }
1309
1427
  }
1310
1428
  }
1311
- while (mdb_cursor_sibling(&mc, 1) == 0);
1429
+ mdb_tassert(txn, rc == MDB_NOTFOUND);
1312
1430
  }
1313
1431
  }
1314
1432
  if (freecount + count + 2 /* metapages */ != txn->mt_next_pgno) {
@@ -1357,11 +1475,13 @@ mdb_page_malloc(MDB_txn *txn, unsigned num)
1357
1475
  off = sz - psize;
1358
1476
  }
1359
1477
  if ((ret = malloc(sz)) != NULL) {
1478
+ VGMEMP_ALLOC(env, ret, sz);
1360
1479
  if (!(env->me_flags & MDB_NOMEMINIT)) {
1361
1480
  memset((char *)ret + off, 0, psize);
1362
1481
  ret->mp_pad = 0;
1363
1482
  }
1364
- VGMEMP_ALLOC(env, ret, sz);
1483
+ } else {
1484
+ txn->mt_flags |= MDB_TXN_ERROR;
1365
1485
  }
1366
1486
  return ret;
1367
1487
  }
@@ -1627,7 +1747,7 @@ static void
1627
1747
  mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
1628
1748
  {
1629
1749
  MDB_ID2 mid;
1630
- int (*insert)(MDB_ID2L, MDB_ID2 *);
1750
+ int rc, (*insert)(MDB_ID2L, MDB_ID2 *);
1631
1751
 
1632
1752
  if (txn->mt_env->me_flags & MDB_WRITEMAP) {
1633
1753
  insert = mdb_mid2l_append;
@@ -1636,7 +1756,8 @@ mdb_page_dirty(MDB_txn *txn, MDB_page *mp)
1636
1756
  }
1637
1757
  mid.mid = mp->mp_pgno;
1638
1758
  mid.mptr = mp;
1639
- insert(txn->mt_u.dirty_list, &mid);
1759
+ rc = insert(txn->mt_u.dirty_list, &mid);
1760
+ mdb_tassert(txn, rc == 0);
1640
1761
  txn->mt_dirty_room--;
1641
1762
  }
1642
1763
 
@@ -1669,11 +1790,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1669
1790
  #else
1670
1791
  enum { Paranoid = 0, Max_retries = INT_MAX /*infinite*/ };
1671
1792
  #endif
1672
- int rc, n2 = num-1, retry = Max_retries;
1793
+ int rc, retry = Max_retries;
1673
1794
  MDB_txn *txn = mc->mc_txn;
1674
1795
  MDB_env *env = txn->mt_env;
1675
1796
  pgno_t pgno, *mop = env->me_pghead;
1676
- unsigned i, j, k, mop_len = mop ? mop[0] : 0;
1797
+ unsigned i, j, k, mop_len = mop ? mop[0] : 0, n2 = num-1;
1677
1798
  MDB_page *np;
1678
1799
  txnid_t oldest = 0, last;
1679
1800
  MDB_cursor_op op;
@@ -1682,8 +1803,10 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1682
1803
  *mp = NULL;
1683
1804
 
1684
1805
  /* If our dirty list is already full, we can't do anything */
1685
- if (txn->mt_dirty_room == 0)
1686
- return MDB_TXN_FULL;
1806
+ if (txn->mt_dirty_room == 0) {
1807
+ rc = MDB_TXN_FULL;
1808
+ goto fail;
1809
+ }
1687
1810
 
1688
1811
  for (op = MDB_FIRST;; op = MDB_NEXT) {
1689
1812
  MDB_val key, data;
@@ -1693,13 +1816,13 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1693
1816
  /* Seek a big enough contiguous page range. Prefer
1694
1817
  * pages at the tail, just truncating the list.
1695
1818
  */
1696
- if (mop_len >= (unsigned)num) {
1819
+ if (mop_len > n2) {
1697
1820
  i = mop_len;
1698
1821
  do {
1699
1822
  pgno = mop[i];
1700
1823
  if (mop[i-n2] == pgno+n2)
1701
1824
  goto search_done;
1702
- } while (--i >= (unsigned)num);
1825
+ } while (--i > n2);
1703
1826
  if (Max_retries < INT_MAX && --retry < 0)
1704
1827
  break;
1705
1828
  }
@@ -1728,7 +1851,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1728
1851
  if (rc) {
1729
1852
  if (rc == MDB_NOTFOUND)
1730
1853
  break;
1731
- return rc;
1854
+ goto fail;
1732
1855
  }
1733
1856
  last = *(txnid_t*)key.mv_data;
1734
1857
  if (oldest <= last)
@@ -1741,11 +1864,13 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1741
1864
  idl = (MDB_ID *) data.mv_data;
1742
1865
  i = idl[0];
1743
1866
  if (!mop) {
1744
- if (!(env->me_pghead = mop = mdb_midl_alloc(i)))
1745
- return ENOMEM;
1867
+ if (!(env->me_pghead = mop = mdb_midl_alloc(i))) {
1868
+ rc = ENOMEM;
1869
+ goto fail;
1870
+ }
1746
1871
  } else {
1747
1872
  if ((rc = mdb_midl_need(&env->me_pghead, i)) != 0)
1748
- return rc;
1873
+ goto fail;
1749
1874
  mop = env->me_pghead;
1750
1875
  }
1751
1876
  env->me_pglast = last;
@@ -1774,15 +1899,18 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
1774
1899
  pgno = txn->mt_next_pgno;
1775
1900
  if (pgno + num >= env->me_maxpg) {
1776
1901
  DPUTS("DB size maxed out");
1777
- return MDB_MAP_FULL;
1902
+ rc = MDB_MAP_FULL;
1903
+ goto fail;
1778
1904
  }
1779
1905
 
1780
1906
  search_done:
1781
1907
  if (env->me_flags & MDB_WRITEMAP) {
1782
1908
  np = (MDB_page *)(env->me_map + env->me_psize * pgno);
1783
1909
  } else {
1784
- if (!(np = mdb_page_malloc(txn, num)))
1785
- return ENOMEM;
1910
+ if (!(np = mdb_page_malloc(txn, num))) {
1911
+ rc = ENOMEM;
1912
+ goto fail;
1913
+ }
1786
1914
  }
1787
1915
  if (i) {
1788
1916
  mop[0] = mop_len -= num;
@@ -1797,6 +1925,10 @@ search_done:
1797
1925
  *mp = np;
1798
1926
 
1799
1927
  return MDB_SUCCESS;
1928
+
1929
+ fail:
1930
+ txn->mt_flags |= MDB_TXN_ERROR;
1931
+ return rc;
1800
1932
  }
1801
1933
 
1802
1934
  /** Copy the used portions of a non-overflow page.
@@ -1827,7 +1959,7 @@ mdb_page_copy(MDB_page *dst, MDB_page *src, unsigned int psize)
1827
1959
  * If a page being referenced was spilled to disk in this txn, bring
1828
1960
  * it back and make it dirty/writable again.
1829
1961
  * @param[in] txn the transaction handle.
1830
- * @param[in] mp the page being referenced.
1962
+ * @param[in] mp the page being referenced. It must not be dirty.
1831
1963
  * @param[out] ret the writable page, if any. ret is unchanged if
1832
1964
  * mp wasn't spilled.
1833
1965
  */
@@ -1903,17 +2035,17 @@ mdb_page_touch(MDB_cursor *mc)
1903
2035
  np = NULL;
1904
2036
  rc = mdb_page_unspill(txn, mp, &np);
1905
2037
  if (rc)
1906
- return rc;
2038
+ goto fail;
1907
2039
  if (np)
1908
2040
  goto done;
1909
2041
  }
1910
2042
  if ((rc = mdb_midl_need(&txn->mt_free_pgs, 1)) ||
1911
2043
  (rc = mdb_page_alloc(mc, 1, &np)))
1912
- return rc;
2044
+ goto fail;
1913
2045
  pgno = np->mp_pgno;
1914
2046
  DPRINTF(("touched db %d page %"Z"u -> %"Z"u", DDBI(mc),
1915
2047
  mp->mp_pgno, pgno));
1916
- assert(mp->mp_pgno != pgno);
2048
+ mdb_cassert(mc, mp->mp_pgno != pgno);
1917
2049
  mdb_midl_xappend(txn->mt_free_pgs, mp->mp_pgno);
1918
2050
  /* Update the parent page, if any, to point to the new page */
1919
2051
  if (mc->mc_top) {
@@ -1934,19 +2066,21 @@ mdb_page_touch(MDB_cursor *mc)
1934
2066
  if (x <= dl[0].mid && dl[x].mid == pgno) {
1935
2067
  if (mp != dl[x].mptr) { /* bad cursor? */
1936
2068
  mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
2069
+ txn->mt_flags |= MDB_TXN_ERROR;
1937
2070
  return MDB_CORRUPTED;
1938
2071
  }
1939
2072
  return 0;
1940
2073
  }
1941
2074
  }
1942
- assert(dl[0].mid < MDB_IDL_UM_MAX);
2075
+ mdb_cassert(mc, dl[0].mid < MDB_IDL_UM_MAX);
1943
2076
  /* No - copy it */
1944
2077
  np = mdb_page_malloc(txn, 1);
1945
2078
  if (!np)
1946
2079
  return ENOMEM;
1947
2080
  mid.mid = pgno;
1948
2081
  mid.mptr = np;
1949
- mdb_mid2l_insert(dl, &mid);
2082
+ rc = mdb_mid2l_insert(dl, &mid);
2083
+ mdb_cassert(mc, rc == 0);
1950
2084
  } else {
1951
2085
  return 0;
1952
2086
  }
@@ -1972,6 +2106,7 @@ done:
1972
2106
  if (m2->mc_pg[mc->mc_top] == mp) {
1973
2107
  m2->mc_pg[mc->mc_top] = np;
1974
2108
  if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
2109
+ IS_LEAF(np) &&
1975
2110
  m2->mc_ki[mc->mc_top] == mc->mc_ki[mc->mc_top])
1976
2111
  {
1977
2112
  MDB_node *leaf = NODEPTR(np, mc->mc_ki[mc->mc_top]);
@@ -1982,6 +2117,10 @@ done:
1982
2117
  }
1983
2118
  }
1984
2119
  return 0;
2120
+
2121
+ fail:
2122
+ txn->mt_flags |= MDB_TXN_ERROR;
2123
+ return rc;
1985
2124
  }
1986
2125
 
1987
2126
  int
@@ -2177,13 +2316,11 @@ mdb_txn_renew0(MDB_txn *txn)
2177
2316
  MDB_PID_T pid = env->me_pid;
2178
2317
  pthread_t tid = pthread_self();
2179
2318
 
2180
- if (!(env->me_flags & MDB_LIVE_READER)) {
2319
+ if (!env->me_live_reader) {
2181
2320
  rc = mdb_reader_pid(env, Pidset, pid);
2182
- if (rc) {
2183
- UNLOCK_MUTEX_R(env);
2321
+ if (rc)
2184
2322
  return rc;
2185
- }
2186
- env->me_flags |= MDB_LIVE_READER;
2323
+ env->me_live_reader = 1;
2187
2324
  }
2188
2325
 
2189
2326
  LOCK_MUTEX_R(env);
@@ -2543,7 +2680,7 @@ mdb_freelist_save(MDB_txn *txn)
2543
2680
  return rc;
2544
2681
  pglast = head_id = *(txnid_t *)key.mv_data;
2545
2682
  total_room = head_room = 0;
2546
- assert(pglast <= env->me_pglast);
2683
+ mdb_tassert(txn, pglast <= env->me_pglast);
2547
2684
  rc = mdb_cursor_del(&mc, 0);
2548
2685
  if (rc)
2549
2686
  return rc;
@@ -2633,22 +2770,20 @@ mdb_freelist_save(MDB_txn *txn)
2633
2770
  mop += mop_len;
2634
2771
  rc = mdb_cursor_first(&mc, &key, &data);
2635
2772
  for (; !rc; rc = mdb_cursor_next(&mc, &key, &data, MDB_NEXT)) {
2636
- unsigned flags = MDB_CURRENT;
2637
2773
  txnid_t id = *(txnid_t *)key.mv_data;
2638
2774
  ssize_t len = (ssize_t)(data.mv_size / sizeof(MDB_ID)) - 1;
2639
2775
  MDB_ID save;
2640
2776
 
2641
- assert(len >= 0 && id <= env->me_pglast);
2777
+ mdb_tassert(txn, len >= 0 && id <= env->me_pglast);
2642
2778
  key.mv_data = &id;
2643
2779
  if (len > mop_len) {
2644
2780
  len = mop_len;
2645
2781
  data.mv_size = (len + 1) * sizeof(MDB_ID);
2646
- flags = 0;
2647
2782
  }
2648
2783
  data.mv_data = mop -= len;
2649
2784
  save = mop[0];
2650
2785
  mop[0] = len;
2651
- rc = mdb_cursor_put(&mc, &key, &data, flags);
2786
+ rc = mdb_cursor_put(&mc, &key, &data, MDB_CURRENT);
2652
2787
  mop[0] = save;
2653
2788
  if (rc || !(mop_len -= len))
2654
2789
  break;
@@ -2804,8 +2939,8 @@ mdb_txn_commit(MDB_txn *txn)
2804
2939
  unsigned int i;
2805
2940
  MDB_env *env;
2806
2941
 
2807
- assert(txn != NULL);
2808
- assert(txn->mt_env != NULL);
2942
+ if (txn == NULL || txn->mt_env == NULL)
2943
+ return EINVAL;
2809
2944
 
2810
2945
  if (txn->mt_child) {
2811
2946
  rc = mdb_txn_commit(txn->mt_child);
@@ -2913,7 +3048,7 @@ mdb_txn_commit(MDB_txn *txn)
2913
3048
  if (yp == dst[x].mid)
2914
3049
  free(dst[x--].mptr);
2915
3050
  }
2916
- assert(i == x);
3051
+ mdb_tassert(txn, i == x);
2917
3052
  dst[0].mid = len;
2918
3053
  free(txn->mt_u.dirty_list);
2919
3054
  parent->mt_dirty_room = txn->mt_dirty_room;
@@ -3146,9 +3281,6 @@ mdb_env_write_meta(MDB_txn *txn)
3146
3281
  int r2;
3147
3282
  #endif
3148
3283
 
3149
- assert(txn != NULL);
3150
- assert(txn->mt_env != NULL);
3151
-
3152
3284
  toggle = txn->mt_txnid & 1;
3153
3285
  DPRINTF(("writing meta page %d for root page %"Z"u",
3154
3286
  toggle, txn->mt_dbs[MAIN_DBI].md_root));
@@ -3470,7 +3602,7 @@ mdb_env_open2(MDB_env *env)
3470
3602
  env->me_mapsize = minsize;
3471
3603
  }
3472
3604
 
3473
- rc = mdb_env_map(env, meta.mm_address, newenv);
3605
+ rc = mdb_env_map(env, meta.mm_address, newenv || env->me_mapsize != meta.mm_mapsize);
3474
3606
  if (rc)
3475
3607
  return rc;
3476
3608
 
@@ -3482,10 +3614,15 @@ mdb_env_open2(MDB_env *env)
3482
3614
  return i;
3483
3615
  }
3484
3616
  }
3485
- env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
3486
- env->me_nodemax = (env->me_psize - PAGEHDRSZ) / MDB_MINKEYS;
3487
3617
 
3618
+ env->me_maxfree_1pg = (env->me_psize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
3619
+ env->me_nodemax = (((env->me_psize - PAGEHDRSZ) / MDB_MINKEYS) & -2)
3620
+ - sizeof(indx_t);
3621
+ #if !(MDB_MAXKEYSIZE)
3622
+ env->me_maxkey = env->me_nodemax - (NODESIZE + sizeof(MDB_db));
3623
+ #endif
3488
3624
  env->me_maxpg = env->me_mapsize / env->me_psize;
3625
+
3489
3626
  #if MDB_DEBUG
3490
3627
  {
3491
3628
  int toggle = mdb_env_pick_meta(env);
@@ -3540,7 +3677,9 @@ static void NTAPI mdb_tls_callback(PVOID module, DWORD reason, PVOID ptr)
3540
3677
  case DLL_THREAD_DETACH:
3541
3678
  for (i=0; i<mdb_tls_nkeys; i++) {
3542
3679
  MDB_reader *r = pthread_getspecific(mdb_tls_keys[i]);
3543
- mdb_env_reader_dest(r);
3680
+ if (r) {
3681
+ mdb_env_reader_dest(r);
3682
+ }
3544
3683
  }
3545
3684
  break;
3546
3685
  case DLL_PROCESS_DETACH: break;
@@ -3985,6 +4124,10 @@ fail:
3985
4124
  #define CHANGELESS (MDB_FIXEDMAP|MDB_NOSUBDIR|MDB_RDONLY|MDB_WRITEMAP| \
3986
4125
  MDB_NOTLS|MDB_NOLOCK|MDB_NORDAHEAD)
3987
4126
 
4127
+ #if VALID_FLAGS & PERSISTENT_FLAGS & (CHANGEABLE|CHANGELESS)
4128
+ # error "Persistent DB flags & env flags overlap, but both go in mm_flags"
4129
+ #endif
4130
+
3988
4131
  int
3989
4132
  mdb_env_open(MDB_env *env, const char *path, unsigned int flags, mdb_mode_t mode)
3990
4133
  {
@@ -4403,6 +4546,13 @@ mdb_cmp_cint(const MDB_val *a, const MDB_val *b)
4403
4546
  #endif
4404
4547
  }
4405
4548
 
4549
+ /** Compare two items pointing at size_t's of unknown alignment. */
4550
+ #ifdef MISALIGNED_OK
4551
+ # define mdb_cmp_clong mdb_cmp_long
4552
+ #else
4553
+ # define mdb_cmp_clong mdb_cmp_cint
4554
+ #endif
4555
+
4406
4556
  /** Compare two items lexically */
4407
4557
  static int
4408
4558
  mdb_cmp_memn(const MDB_val *a, const MDB_val *b)
@@ -4469,17 +4619,9 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
4469
4619
 
4470
4620
  nkeys = NUMKEYS(mp);
4471
4621
 
4472
- #if MDB_DEBUG
4473
- {
4474
- pgno_t pgno;
4475
- COPY_PGNO(pgno, mp->mp_pgno);
4476
4622
  DPRINTF(("searching %u keys in %s %spage %"Z"u",
4477
4623
  nkeys, IS_LEAF(mp) ? "leaf" : "branch", IS_SUBP(mp) ? "sub-" : "",
4478
- pgno));
4479
- }
4480
- #endif
4481
-
4482
- assert(nkeys > 0);
4624
+ mdb_dbg_pgno(mp)));
4483
4625
 
4484
4626
  low = IS_LEAF(mp) ? 0 : 1;
4485
4627
  high = nkeys - 1;
@@ -4543,7 +4685,7 @@ mdb_node_search(MDB_cursor *mc, MDB_val *key, int *exactp)
4543
4685
  node = NODEPTR(mp, i);
4544
4686
  }
4545
4687
  if (exactp)
4546
- *exactp = (rc == 0);
4688
+ *exactp = (rc == 0 && nkeys > 0);
4547
4689
  /* store the key index */
4548
4690
  mc->mc_ki[mc->mc_top] = i;
4549
4691
  if (i >= nkeys)
@@ -4593,7 +4735,7 @@ mdb_cursor_push(MDB_cursor *mc, MDB_page *mp)
4593
4735
  DDBI(mc), (void *) mc));
4594
4736
 
4595
4737
  if (mc->mc_snum >= CURSOR_STACK) {
4596
- assert(mc->mc_snum < CURSOR_STACK);
4738
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
4597
4739
  return MDB_CURSOR_FULL;
4598
4740
  }
4599
4741
 
@@ -4653,7 +4795,7 @@ mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **ret, int *lvl)
4653
4795
  p = (MDB_page *)(env->me_map + env->me_psize * pgno);
4654
4796
  } else {
4655
4797
  DPRINTF(("page %"Z"u not found", pgno));
4656
- assert(p != NULL);
4798
+ txn->mt_flags |= MDB_TXN_ERROR;
4657
4799
  return MDB_PAGE_NOTFOUND;
4658
4800
  }
4659
4801
 
@@ -4679,7 +4821,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
4679
4821
  indx_t i;
4680
4822
 
4681
4823
  DPRINTF(("branch page %"Z"u has %u keys", mp->mp_pgno, NUMKEYS(mp)));
4682
- assert(NUMKEYS(mp) > 1);
4824
+ mdb_cassert(mc, NUMKEYS(mp) > 1);
4683
4825
  DPRINTF(("found index 0 to page %"Z"u", NODEPGNO(NODEPTR(mp, 0))));
4684
4826
 
4685
4827
  if (flags & (MDB_PS_FIRST|MDB_PS_LAST)) {
@@ -4694,14 +4836,14 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
4694
4836
  else {
4695
4837
  i = mc->mc_ki[mc->mc_top];
4696
4838
  if (!exact) {
4697
- assert(i > 0);
4839
+ mdb_cassert(mc, i > 0);
4698
4840
  i--;
4699
4841
  }
4700
4842
  }
4701
4843
  DPRINTF(("following index %u for key [%s]", i, DKEY(key)));
4702
4844
  }
4703
4845
 
4704
- assert(i < NUMKEYS(mp));
4846
+ mdb_cassert(mc, i < NUMKEYS(mp));
4705
4847
  node = NODEPTR(mp, i);
4706
4848
 
4707
4849
  if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
@@ -4721,6 +4863,7 @@ mdb_page_search_root(MDB_cursor *mc, MDB_val *key, int flags)
4721
4863
  if (!IS_LEAF(mp)) {
4722
4864
  DPRINTF(("internal error, index points to a %02X page!?",
4723
4865
  mp->mp_flags));
4866
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
4724
4867
  return MDB_CORRUPTED;
4725
4868
  }
4726
4869
 
@@ -4815,7 +4958,7 @@ mdb_page_search(MDB_cursor *mc, MDB_val *key, int flags)
4815
4958
  }
4816
4959
  }
4817
4960
 
4818
- assert(root > 1);
4961
+ mdb_cassert(mc, root > 1);
4819
4962
  if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
4820
4963
  if ((rc = mdb_page_get(mc->mc_txn, root, &mc->mc_pg[0], NULL)) != 0)
4821
4964
  return rc;
@@ -4885,7 +5028,7 @@ mdb_ovpage_free(MDB_cursor *mc, MDB_page *mp)
4885
5028
  iy = dl[x];
4886
5029
  dl[x] = ix;
4887
5030
  } else {
4888
- assert(x > 1);
5031
+ mdb_cassert(mc, x > 1);
4889
5032
  j = ++(dl[0].mid);
4890
5033
  dl[j] = ix; /* Unsorted. OK when MDB_TXN_ERROR. */
4891
5034
  txn->mt_flags |= MDB_TXN_ERROR;
@@ -4953,20 +5096,14 @@ mdb_get(MDB_txn *txn, MDB_dbi dbi,
4953
5096
  int exact = 0;
4954
5097
  DKBUF;
4955
5098
 
4956
- assert(key);
4957
- assert(data);
4958
5099
  DPRINTF(("===> get db %u key [%s]", dbi, DKEY(key)));
4959
5100
 
4960
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
5101
+ if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
4961
5102
  return EINVAL;
4962
5103
 
4963
5104
  if (txn->mt_flags & MDB_TXN_ERROR)
4964
5105
  return MDB_BAD_TXN;
4965
5106
 
4966
- if (key->mv_size > MDB_MAXKEYSIZE) {
4967
- return MDB_BAD_VALSIZE;
4968
- }
4969
-
4970
5107
  mdb_cursor_init(&mc, txn, dbi, &mx);
4971
5108
  return mdb_cursor_set(&mc, key, data, MDB_SET, &exact);
4972
5109
  }
@@ -5012,7 +5149,7 @@ mdb_cursor_sibling(MDB_cursor *mc, int move_right)
5012
5149
  DPRINTF(("just moving to %s index key %u",
5013
5150
  move_right ? "right" : "left", mc->mc_ki[mc->mc_top]));
5014
5151
  }
5015
- assert(IS_BRANCH(mc->mc_pg[mc->mc_top]));
5152
+ mdb_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
5016
5153
 
5017
5154
  indx = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
5018
5155
  if ((rc = mdb_page_get(mc->mc_txn, NODEPGNO(indx), &mp, NULL)) != 0) {
@@ -5040,7 +5177,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5040
5177
  return MDB_NOTFOUND;
5041
5178
  }
5042
5179
 
5043
- assert(mc->mc_flags & C_INITIALIZED);
5180
+ mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
5044
5181
 
5045
5182
  mp = mc->mc_pg[mc->mc_top];
5046
5183
 
@@ -5062,7 +5199,8 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5062
5199
  }
5063
5200
  }
5064
5201
 
5065
- DPRINTF(("cursor_next: top page is %"Z"u in cursor %p", mp->mp_pgno, (void *) mc));
5202
+ DPRINTF(("cursor_next: top page is %"Z"u in cursor %p",
5203
+ mdb_dbg_pgno(mp), (void *) mc));
5066
5204
  if (mc->mc_flags & C_DEL)
5067
5205
  goto skip;
5068
5206
 
@@ -5079,7 +5217,7 @@ mdb_cursor_next(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5079
5217
 
5080
5218
  skip:
5081
5219
  DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
5082
- mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5220
+ mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5083
5221
 
5084
5222
  if (IS_LEAF2(mp)) {
5085
5223
  key->mv_size = mc->mc_db->md_pad;
@@ -5087,7 +5225,7 @@ skip:
5087
5225
  return MDB_SUCCESS;
5088
5226
  }
5089
5227
 
5090
- assert(IS_LEAF(mp));
5228
+ mdb_cassert(mc, IS_LEAF(mp));
5091
5229
  leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
5092
5230
 
5093
5231
  if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
@@ -5116,7 +5254,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5116
5254
  MDB_node *leaf;
5117
5255
  int rc;
5118
5256
 
5119
- assert(mc->mc_flags & C_INITIALIZED);
5257
+ mdb_cassert(mc, mc->mc_flags & C_INITIALIZED);
5120
5258
 
5121
5259
  mp = mc->mc_pg[mc->mc_top];
5122
5260
 
@@ -5138,7 +5276,8 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5138
5276
  }
5139
5277
  }
5140
5278
 
5141
- DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p", mp->mp_pgno, (void *) mc));
5279
+ DPRINTF(("cursor_prev: top page is %"Z"u in cursor %p",
5280
+ mdb_dbg_pgno(mp), (void *) mc));
5142
5281
 
5143
5282
  if (mc->mc_ki[mc->mc_top] == 0) {
5144
5283
  DPUTS("=====> move to prev sibling page");
@@ -5154,7 +5293,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5154
5293
  mc->mc_flags &= ~C_EOF;
5155
5294
 
5156
5295
  DPRINTF(("==> cursor points to page %"Z"u with %u keys, key index %u",
5157
- mp->mp_pgno, NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5296
+ mdb_dbg_pgno(mp), NUMKEYS(mp), mc->mc_ki[mc->mc_top]));
5158
5297
 
5159
5298
  if (IS_LEAF2(mp)) {
5160
5299
  key->mv_size = mc->mc_db->md_pad;
@@ -5162,7 +5301,7 @@ mdb_cursor_prev(MDB_cursor *mc, MDB_val *key, MDB_val *data, MDB_cursor_op op)
5162
5301
  return MDB_SUCCESS;
5163
5302
  }
5164
5303
 
5165
- assert(IS_LEAF(mp));
5304
+ mdb_cassert(mc, IS_LEAF(mp));
5166
5305
  leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
5167
5306
 
5168
5307
  if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
@@ -5193,8 +5332,6 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5193
5332
  MDB_node *leaf = NULL;
5194
5333
  DKBUF;
5195
5334
 
5196
- assert(mc);
5197
- assert(key);
5198
5335
  if (key->mv_size == 0)
5199
5336
  return MDB_BAD_VALSIZE;
5200
5337
 
@@ -5284,7 +5421,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5284
5421
  if (!mc->mc_top) {
5285
5422
  /* There are no other pages */
5286
5423
  mc->mc_ki[mc->mc_top] = 0;
5287
- if (op == MDB_SET_RANGE) {
5424
+ if (op == MDB_SET_RANGE && !exactp) {
5288
5425
  rc = 0;
5289
5426
  goto set1;
5290
5427
  } else
@@ -5297,7 +5434,7 @@ mdb_cursor_set(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5297
5434
  return rc;
5298
5435
 
5299
5436
  mp = mc->mc_pg[mc->mc_top];
5300
- assert(IS_LEAF(mp));
5437
+ mdb_cassert(mc, IS_LEAF(mp));
5301
5438
 
5302
5439
  set2:
5303
5440
  leaf = mdb_node_search(mc, key, exactp);
@@ -5311,7 +5448,7 @@ set2:
5311
5448
  if ((rc = mdb_cursor_sibling(mc, 1)) != MDB_SUCCESS)
5312
5449
  return rc; /* no entries matched */
5313
5450
  mp = mc->mc_pg[mc->mc_top];
5314
- assert(IS_LEAF(mp));
5451
+ mdb_cassert(mc, IS_LEAF(mp));
5315
5452
  leaf = NODEPTR(mp, 0);
5316
5453
  }
5317
5454
 
@@ -5353,6 +5490,7 @@ set1:
5353
5490
  if (op == MDB_GET_BOTH || rc > 0)
5354
5491
  return MDB_NOTFOUND;
5355
5492
  rc = 0;
5493
+ *data = d2;
5356
5494
  }
5357
5495
 
5358
5496
  } else {
@@ -5386,7 +5524,7 @@ mdb_cursor_first(MDB_cursor *mc, MDB_val *key, MDB_val *data)
5386
5524
  if (rc != MDB_SUCCESS)
5387
5525
  return rc;
5388
5526
  }
5389
- assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
5527
+ mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
5390
5528
 
5391
5529
  leaf = NODEPTR(mc->mc_pg[mc->mc_top], 0);
5392
5530
  mc->mc_flags |= C_INITIALIZED;
@@ -5432,7 +5570,7 @@ mdb_cursor_last(MDB_cursor *mc, MDB_val *key, MDB_val *data)
5432
5570
  if (rc != MDB_SUCCESS)
5433
5571
  return rc;
5434
5572
  }
5435
- assert(IS_LEAF(mc->mc_pg[mc->mc_top]));
5573
+ mdb_cassert(mc, IS_LEAF(mc->mc_pg[mc->mc_top]));
5436
5574
 
5437
5575
  }
5438
5576
  mc->mc_ki[mc->mc_top] = NUMKEYS(mc->mc_pg[mc->mc_top]) - 1;
@@ -5469,7 +5607,8 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5469
5607
  int exact = 0;
5470
5608
  int (*mfunc)(MDB_cursor *mc, MDB_val *key, MDB_val *data);
5471
5609
 
5472
- assert(mc);
5610
+ if (mc == NULL)
5611
+ return EINVAL;
5473
5612
 
5474
5613
  if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
5475
5614
  return MDB_BAD_TXN;
@@ -5521,12 +5660,10 @@ mdb_cursor_get(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5521
5660
  case MDB_SET_RANGE:
5522
5661
  if (key == NULL) {
5523
5662
  rc = EINVAL;
5524
- } else if (key->mv_size > MDB_MAXKEYSIZE) {
5525
- rc = MDB_BAD_VALSIZE;
5526
- } else if (op == MDB_SET_RANGE)
5527
- rc = mdb_cursor_set(mc, key, data, op, NULL);
5528
- else
5529
- rc = mdb_cursor_set(mc, key, data, op, &exact);
5663
+ } else {
5664
+ rc = mdb_cursor_set(mc, key, data, op,
5665
+ op == MDB_SET_RANGE ? NULL : &exact);
5666
+ }
5530
5667
  break;
5531
5668
  case MDB_GET_MULTIPLE:
5532
5669
  if (data == NULL || !(mc->mc_flags & C_INITIALIZED)) {
@@ -5663,18 +5800,24 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5663
5800
  unsigned int flags)
5664
5801
  {
5665
5802
  enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */
5666
- MDB_env *env = mc->mc_txn->mt_env;
5803
+ MDB_env *env;
5667
5804
  MDB_node *leaf = NULL;
5668
- MDB_val xdata, *rdata, dkey;
5805
+ MDB_page *fp, *mp;
5806
+ uint16_t fp_flags;
5807
+ MDB_val xdata, *rdata, dkey, olddata;
5669
5808
  MDB_db dummy;
5670
- int do_sub = 0, insert = 0;
5809
+ int do_sub = 0, insert_key, insert_data;
5671
5810
  unsigned int mcount = 0, dcount = 0, nospill;
5672
5811
  size_t nsize;
5673
5812
  int rc, rc2;
5674
- char dbuf[MDB_MAXKEYSIZE+1];
5675
5813
  unsigned int nflags;
5676
5814
  DKBUF;
5677
5815
 
5816
+ if (mc == NULL || key == NULL)
5817
+ return EINVAL;
5818
+
5819
+ env = mc->mc_txn->mt_env;
5820
+
5678
5821
  /* Check this first so counter will always be zero on any
5679
5822
  * early failures.
5680
5823
  */
@@ -5691,14 +5834,14 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5691
5834
  if (mc->mc_txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
5692
5835
  return (mc->mc_txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
5693
5836
 
5694
- if (flags != MDB_CURRENT && (key->mv_size == 0 || key->mv_size > MDB_MAXKEYSIZE))
5695
- return MDB_BAD_VALSIZE;
5696
-
5697
- if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT) && data->mv_size > MDB_MAXKEYSIZE)
5837
+ if (key->mv_size-1 >= ENV_MAXKEY(env))
5698
5838
  return MDB_BAD_VALSIZE;
5699
5839
 
5700
5840
  #if SIZE_MAX > MAXDATASIZE
5701
- if (data->mv_size > MAXDATASIZE)
5841
+ if (data->mv_size > ((mc->mc_db->md_flags & MDB_DUPSORT) ? ENV_MAXKEY(env) : MAXDATASIZE))
5842
+ return MDB_BAD_VALSIZE;
5843
+ #else
5844
+ if ((mc->mc_db->md_flags & MDB_DUPSORT) && data->mv_size > ENV_MAXKEY(env))
5702
5845
  return MDB_BAD_VALSIZE;
5703
5846
  #endif
5704
5847
 
@@ -5782,11 +5925,21 @@ mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
5782
5925
  return rc2;
5783
5926
  }
5784
5927
 
5785
- /* The key already exists */
5786
- if (rc == MDB_SUCCESS) {
5787
- MDB_page *fp, *mp;
5788
- MDB_val olddata;
5789
-
5928
+ insert_key = insert_data = rc;
5929
+ if (insert_key) {
5930
+ /* The key does not exist */
5931
+ DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
5932
+ if ((mc->mc_db->md_flags & MDB_DUPSORT) &&
5933
+ LEAFSIZE(key, data) > env->me_nodemax)
5934
+ {
5935
+ /* Too big for a node, insert in sub-DB */
5936
+ fp_flags = P_LEAF|P_DIRTY;
5937
+ fp = env->me_pbuf;
5938
+ fp->mp_pad = data->mv_size; /* used if MDB_DUPFIXED */
5939
+ fp->mp_lower = fp->mp_upper = olddata.mv_size = PAGEHDRSZ;
5940
+ goto prep_subDB;
5941
+ }
5942
+ } else {
5790
5943
  /* there's only a key anyway, so this is a no-op */
5791
5944
  if (IS_LEAF2(mc->mc_pg[mc->mc_top])) {
5792
5945
  unsigned int ksize = mc->mc_db->md_pad;
@@ -5806,6 +5959,12 @@ more:
5806
5959
 
5807
5960
  /* DB has dups? */
5808
5961
  if (F_ISSET(mc->mc_db->md_flags, MDB_DUPSORT)) {
5962
+ /* Prepare (sub-)page/sub-DB to accept the new item,
5963
+ * if needed. fp: old sub-page or a header faking
5964
+ * it. mp: new (sub-)page. offset: growth in page
5965
+ * size. xdata: node data with new page or DB.
5966
+ */
5967
+ unsigned i, offset = 0;
5809
5968
  mp = fp = xdata.mv_data = env->me_pbuf;
5810
5969
  mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno;
5811
5970
 
@@ -5815,29 +5974,23 @@ more:
5815
5974
  if (flags == MDB_CURRENT)
5816
5975
  goto current;
5817
5976
 
5818
- dkey = olddata;
5819
5977
  #if UINT_MAX < SIZE_MAX
5820
- if (mc->mc_dbx->md_dcmp == mdb_cmp_int && dkey.mv_size == sizeof(size_t))
5821
- #ifdef MISALIGNED_OK
5822
- mc->mc_dbx->md_dcmp = mdb_cmp_long;
5823
- #else
5824
- mc->mc_dbx->md_dcmp = mdb_cmp_cint;
5825
- #endif
5978
+ if (mc->mc_dbx->md_dcmp == mdb_cmp_int && olddata.mv_size == sizeof(size_t))
5979
+ mc->mc_dbx->md_dcmp = mdb_cmp_clong;
5826
5980
  #endif
5827
5981
  /* if data matches, skip it */
5828
- if (!mc->mc_dbx->md_dcmp(data, &dkey)) {
5982
+ if (!mc->mc_dbx->md_dcmp(data, &olddata)) {
5829
5983
  if (flags & MDB_NODUPDATA)
5830
- rc = MDB_KEYEXIST;
5831
- else if (flags & MDB_MULTIPLE)
5832
- goto next_mult;
5833
- else
5834
- rc = MDB_SUCCESS;
5835
- return rc;
5984
+ return MDB_KEYEXIST;
5985
+ rc = MDB_SUCCESS;
5986
+ goto next_sub;
5836
5987
  }
5837
5988
 
5838
- /* create a fake page for the dup items */
5839
- memcpy(dbuf, dkey.mv_data, dkey.mv_size);
5840
- dkey.mv_data = dbuf;
5989
+ /* Back up original data item */
5990
+ dkey.mv_size = olddata.mv_size;
5991
+ dkey.mv_data = memcpy(fp+1, olddata.mv_data, olddata.mv_size);
5992
+
5993
+ /* Make sub-page header for the dup items, with dummy body */
5841
5994
  fp->mp_flags = P_LEAF|P_DIRTY|P_SUBP;
5842
5995
  fp->mp_lower = PAGEHDRSZ;
5843
5996
  xdata.mv_size = PAGEHDRSZ + dkey.mv_size + data->mv_size;
@@ -5850,22 +6003,19 @@ more:
5850
6003
  (dkey.mv_size & 1) + (data->mv_size & 1);
5851
6004
  }
5852
6005
  fp->mp_upper = xdata.mv_size;
6006
+ olddata.mv_size = fp->mp_upper; /* pretend olddata is fp */
5853
6007
  } else if (leaf->mn_flags & F_SUBDATA) {
5854
6008
  /* Data is on sub-DB, just store it */
5855
6009
  flags |= F_DUPDATA|F_SUBDATA;
5856
6010
  goto put_sub;
5857
6011
  } else {
5858
- /* See if we need to convert from fake page to subDB */
5859
- unsigned int offset;
5860
- unsigned int i;
5861
- uint16_t fp_flags;
5862
-
6012
+ /* Data is on sub-page */
5863
6013
  fp = olddata.mv_data;
5864
6014
  switch (flags) {
5865
6015
  default:
5866
6016
  if (!(mc->mc_db->md_flags & MDB_DUPFIXED)) {
5867
- offset = NODESIZE + sizeof(indx_t) + data->mv_size;
5868
- offset += offset & 1;
6017
+ offset = EVEN(NODESIZE + sizeof(indx_t) +
6018
+ data->mv_size);
5869
6019
  break;
5870
6020
  }
5871
6021
  offset = fp->mp_pad;
@@ -5881,12 +6031,16 @@ more:
5881
6031
  flags |= F_DUPDATA;
5882
6032
  goto put_sub;
5883
6033
  }
5884
- fp_flags = fp->mp_flags;
5885
6034
  xdata.mv_size = olddata.mv_size + offset;
5886
- if (NODESIZE + sizeof(indx_t) + NODEKSZ(leaf) + xdata.mv_size
5887
- >= env->me_nodemax) {
5888
- /* yes, convert it */
6035
+ }
6036
+
6037
+ fp_flags = fp->mp_flags;
6038
+ if (NODESIZE + NODEKSZ(leaf) + xdata.mv_size > env->me_nodemax) {
6039
+ /* Too big for a sub-page, convert to sub-DB */
6040
+ fp_flags &= ~P_SUBP;
6041
+ prep_subDB:
5889
6042
  if (mc->mc_db->md_flags & MDB_DUPFIXED) {
6043
+ fp_flags |= P_LEAF2;
5890
6044
  dummy.md_pad = fp->mp_pad;
5891
6045
  dummy.md_flags = MDB_DUPFIXED;
5892
6046
  if (mc->mc_db->md_flags & MDB_INTEGERDUP)
@@ -5907,13 +6061,13 @@ more:
5907
6061
  offset = env->me_psize - olddata.mv_size;
5908
6062
  flags |= F_DUPDATA|F_SUBDATA;
5909
6063
  dummy.md_root = mp->mp_pgno;
5910
- fp_flags &= ~P_SUBP;
5911
- }
6064
+ }
6065
+ if (mp != fp) {
5912
6066
  mp->mp_flags = fp_flags | P_DIRTY;
5913
6067
  mp->mp_pad = fp->mp_pad;
5914
6068
  mp->mp_lower = fp->mp_lower;
5915
6069
  mp->mp_upper = fp->mp_upper + offset;
5916
- if (IS_LEAF2(fp)) {
6070
+ if (fp_flags & P_LEAF2) {
5917
6071
  memcpy(METADATA(mp), METADATA(fp), NUMKEYS(fp) * fp->mp_pad);
5918
6072
  } else {
5919
6073
  memcpy((char *)mp + mp->mp_upper, (char *)fp + fp->mp_upper,
@@ -5926,7 +6080,8 @@ more:
5926
6080
  rdata = &xdata;
5927
6081
  flags |= F_DUPDATA;
5928
6082
  do_sub = 1;
5929
- mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
6083
+ if (!insert_key)
6084
+ mdb_node_del(mc, 0);
5930
6085
  goto new_sub;
5931
6086
  }
5932
6087
  current:
@@ -5966,7 +6121,8 @@ current:
5966
6121
  return ENOMEM;
5967
6122
  id2.mid = pg;
5968
6123
  id2.mptr = np;
5969
- mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
6124
+ rc2 = mdb_mid2l_insert(mc->mc_txn->mt_u.dirty_list, &id2);
6125
+ mdb_cassert(mc, rc2 == 0);
5970
6126
  if (!(flags & MDB_RESERVE)) {
5971
6127
  /* Copy end of page, adjusting alignment so
5972
6128
  * compiler may copy words instead of bytes.
@@ -5984,7 +6140,7 @@ current:
5984
6140
  data->mv_data = METADATA(omp);
5985
6141
  else
5986
6142
  memcpy(METADATA(omp), data->mv_data, data->mv_size);
5987
- goto done;
6143
+ return MDB_SUCCESS;
5988
6144
  }
5989
6145
  }
5990
6146
  if ((rc2 = mdb_ovpage_free(mc, omp)) != MDB_SUCCESS)
@@ -5996,17 +6152,13 @@ current:
5996
6152
  */
5997
6153
  if (F_ISSET(flags, MDB_RESERVE))
5998
6154
  data->mv_data = olddata.mv_data;
5999
- else if (data->mv_size)
6155
+ else if (!(mc->mc_flags & C_SUB))
6000
6156
  memcpy(olddata.mv_data, data->mv_data, data->mv_size);
6001
6157
  else
6002
6158
  memcpy(NODEKEY(leaf), key->mv_data, key->mv_size);
6003
- goto done;
6159
+ return MDB_SUCCESS;
6004
6160
  }
6005
- mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
6006
- mc->mc_db->md_entries--;
6007
- } else {
6008
- DPRINTF(("inserting key at index %i", mc->mc_ki[mc->mc_top]));
6009
- insert = 1;
6161
+ mdb_node_del(mc, 0);
6010
6162
  }
6011
6163
 
6012
6164
  rdata = data;
@@ -6016,14 +6168,14 @@ new_sub:
6016
6168
  nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->mv_size : mdb_leaf_size(env, key, rdata);
6017
6169
  if (SIZELEFT(mc->mc_pg[mc->mc_top]) < nsize) {
6018
6170
  if (( flags & (F_DUPDATA|F_SUBDATA)) == F_DUPDATA )
6019
- nflags &= ~MDB_APPEND;
6020
- if (!insert)
6171
+ nflags &= ~MDB_APPEND; /* sub-page may need room to grow */
6172
+ if (!insert_key)
6021
6173
  nflags |= MDB_SPLIT_REPLACE;
6022
6174
  rc = mdb_page_split(mc, key, rdata, P_INVALID, nflags);
6023
6175
  } else {
6024
6176
  /* There is room already in this leaf page. */
6025
6177
  rc = mdb_node_add(mc, mc->mc_ki[mc->mc_top], key, rdata, 0, nflags);
6026
- if (rc == 0 && !do_sub && insert) {
6178
+ if (rc == 0 && insert_key) {
6027
6179
  /* Adjust other cursors pointing to mp */
6028
6180
  MDB_cursor *m2, *m3;
6029
6181
  MDB_dbi dbi = mc->mc_dbi;
@@ -6043,9 +6195,7 @@ new_sub:
6043
6195
  }
6044
6196
  }
6045
6197
 
6046
- if (rc != MDB_SUCCESS)
6047
- mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6048
- else {
6198
+ if (rc == MDB_SUCCESS) {
6049
6199
  /* Now store the actual data in the child DB. Note that we're
6050
6200
  * storing the user data in the keys field, so there are strict
6051
6201
  * size limits on dupdata. The actual data fields of the child
@@ -6053,6 +6203,7 @@ new_sub:
6053
6203
  */
6054
6204
  if (do_sub) {
6055
6205
  int xflags;
6206
+ size_t ecount;
6056
6207
  put_sub:
6057
6208
  xdata.mv_size = 0;
6058
6209
  xdata.mv_data = "";
@@ -6068,7 +6219,7 @@ put_sub:
6068
6219
  if (dkey.mv_size) {
6069
6220
  rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, xflags);
6070
6221
  if (rc)
6071
- return rc;
6222
+ goto bad_sub;
6072
6223
  {
6073
6224
  /* Adjust other cursors pointing to mp */
6074
6225
  MDB_cursor *m2;
@@ -6086,6 +6237,7 @@ put_sub:
6086
6237
  /* we've done our job */
6087
6238
  dkey.mv_size = 0;
6088
6239
  }
6240
+ ecount = mc->mc_xcursor->mx_db.md_entries;
6089
6241
  if (flags & MDB_APPENDDUP)
6090
6242
  xflags |= MDB_APPEND;
6091
6243
  rc = mdb_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
@@ -6093,31 +6245,39 @@ put_sub:
6093
6245
  void *db = NODEDATA(leaf);
6094
6246
  memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDB_db));
6095
6247
  }
6248
+ insert_data = mc->mc_xcursor->mx_db.md_entries - ecount;
6096
6249
  }
6097
- /* sub-writes might have failed so check rc again.
6098
- * Don't increment count if we just replaced an existing item.
6099
- */
6100
- if (!rc && !(flags & MDB_CURRENT))
6250
+ /* Increment count unless we just replaced an existing item. */
6251
+ if (insert_data)
6101
6252
  mc->mc_db->md_entries++;
6253
+ if (insert_key) {
6254
+ /* Invalidate txn if we created an empty sub-DB */
6255
+ if (rc)
6256
+ goto bad_sub;
6257
+ /* If we succeeded and the key didn't exist before,
6258
+ * make sure the cursor is marked valid.
6259
+ */
6260
+ mc->mc_flags |= C_INITIALIZED;
6261
+ }
6262
+ next_sub:
6102
6263
  if (flags & MDB_MULTIPLE) {
6103
6264
  if (!rc) {
6104
- next_mult:
6105
6265
  mcount++;
6106
6266
  /* let caller know how many succeeded, if any */
6107
6267
  data[1].mv_size = mcount;
6108
6268
  if (mcount < dcount) {
6109
6269
  data[0].mv_data = (char *)data[0].mv_data + data[0].mv_size;
6270
+ insert_key = insert_data = 0;
6110
6271
  goto more;
6111
6272
  }
6112
6273
  }
6113
6274
  }
6275
+ return rc;
6276
+ bad_sub:
6277
+ if (rc == MDB_KEYEXIST) /* should not happen, we deleted that item */
6278
+ rc = MDB_CORRUPTED;
6114
6279
  }
6115
- done:
6116
- /* If we succeeded and the key didn't exist before, make sure
6117
- * the cursor is marked valid.
6118
- */
6119
- if (!rc && insert)
6120
- mc->mc_flags |= C_INITIALIZED;
6280
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6121
6281
  return rc;
6122
6282
  }
6123
6283
 
@@ -6145,14 +6305,21 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
6145
6305
  return rc;
6146
6306
 
6147
6307
  mp = mc->mc_pg[mc->mc_top];
6308
+ if (IS_LEAF2(mp))
6309
+ goto del_key;
6148
6310
  leaf = NODEPTR(mp, mc->mc_ki[mc->mc_top]);
6149
6311
 
6150
- if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_DUPDATA)) {
6151
- if (!(flags & MDB_NODUPDATA)) {
6312
+ if (F_ISSET(leaf->mn_flags, F_DUPDATA)) {
6313
+ if (flags & MDB_NODUPDATA) {
6314
+ /* mdb_cursor_del0() will subtract the final entry */
6315
+ mc->mc_db->md_entries -= mc->mc_xcursor->mx_db.md_entries - 1;
6316
+ } else {
6152
6317
  if (!F_ISSET(leaf->mn_flags, F_SUBDATA)) {
6153
6318
  mc->mc_xcursor->mx_cursor.mc_pg[0] = NODEDATA(leaf);
6154
6319
  }
6155
6320
  rc = mdb_cursor_del(&mc->mc_xcursor->mx_cursor, MDB_NOSPILL);
6321
+ if (rc)
6322
+ return rc;
6156
6323
  /* If sub-DB still has entries, we're done */
6157
6324
  if (mc->mc_xcursor->mx_db.md_entries) {
6158
6325
  if (leaf->mn_flags & F_SUBDATA) {
@@ -6183,14 +6350,28 @@ mdb_cursor_del(MDB_cursor *mc, unsigned int flags)
6183
6350
  if (leaf->mn_flags & F_SUBDATA) {
6184
6351
  /* add all the child DB's pages to the free list */
6185
6352
  rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
6186
- if (rc == MDB_SUCCESS) {
6187
- mc->mc_db->md_entries -=
6188
- mc->mc_xcursor->mx_db.md_entries;
6189
- }
6353
+ if (rc)
6354
+ goto fail;
6190
6355
  }
6191
6356
  }
6192
6357
 
6193
- return mdb_cursor_del0(mc, leaf);
6358
+ /* add overflow pages to free list */
6359
+ if (F_ISSET(leaf->mn_flags, F_BIGDATA)) {
6360
+ MDB_page *omp;
6361
+ pgno_t pg;
6362
+
6363
+ memcpy(&pg, NODEDATA(leaf), sizeof(pg));
6364
+ if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
6365
+ (rc = mdb_ovpage_free(mc, omp)))
6366
+ goto fail;
6367
+ }
6368
+
6369
+ del_key:
6370
+ return mdb_cursor_del0(mc);
6371
+
6372
+ fail:
6373
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6374
+ return rc;
6194
6375
  }
6195
6376
 
6196
6377
  /** Allocate and initialize new pages for a database.
@@ -6245,13 +6426,12 @@ mdb_leaf_size(MDB_env *env, MDB_val *key, MDB_val *data)
6245
6426
  size_t sz;
6246
6427
 
6247
6428
  sz = LEAFSIZE(key, data);
6248
- if (sz >= env->me_nodemax) {
6429
+ if (sz > env->me_nodemax) {
6249
6430
  /* put on overflow page */
6250
6431
  sz -= data->mv_size - sizeof(pgno_t);
6251
6432
  }
6252
- sz += sz & 1;
6253
6433
 
6254
- return sz + sizeof(indx_t);
6434
+ return EVEN(sz + sizeof(indx_t));
6255
6435
  }
6256
6436
 
6257
6437
  /** Calculate the size of a branch node.
@@ -6270,7 +6450,7 @@ mdb_branch_size(MDB_env *env, MDB_val *key)
6270
6450
  size_t sz;
6271
6451
 
6272
6452
  sz = INDXSIZE(key);
6273
- if (sz >= env->me_nodemax) {
6453
+ if (sz > env->me_nodemax) {
6274
6454
  /* put on overflow page */
6275
6455
  /* not implemented */
6276
6456
  /* sz -= key->size - sizeof(pgno_t); */
@@ -6307,12 +6487,12 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
6307
6487
  MDB_page *ofp = NULL; /* overflow page */
6308
6488
  DKBUF;
6309
6489
 
6310
- assert(mp->mp_upper >= mp->mp_lower);
6490
+ mdb_cassert(mc, mp->mp_upper >= mp->mp_lower);
6311
6491
 
6312
6492
  DPRINTF(("add to %s %spage %"Z"u index %i, data size %"Z"u key size %"Z"u [%s]",
6313
6493
  IS_LEAF(mp) ? "leaf" : "branch",
6314
6494
  IS_SUBP(mp) ? "sub-" : "",
6315
- mp->mp_pgno, indx, data ? data->mv_size : 0,
6495
+ mdb_dbg_pgno(mp), indx, data ? data->mv_size : 0,
6316
6496
  key ? key->mv_size : 0, key ? DKEY(key) : "null"));
6317
6497
 
6318
6498
  if (IS_LEAF2(mp)) {
@@ -6335,17 +6515,17 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
6335
6515
  if (key != NULL)
6336
6516
  node_size += key->mv_size;
6337
6517
  if (IS_LEAF(mp)) {
6338
- assert(data);
6518
+ mdb_cassert(mc, data);
6339
6519
  if (F_ISSET(flags, F_BIGDATA)) {
6340
6520
  /* Data already on overflow page. */
6341
6521
  node_size += sizeof(pgno_t);
6342
- } else if (node_size + data->mv_size >= mc->mc_txn->mt_env->me_nodemax) {
6522
+ } else if (node_size + data->mv_size > mc->mc_txn->mt_env->me_nodemax) {
6343
6523
  int ovpages = OVPAGES(data->mv_size, mc->mc_txn->mt_env->me_psize);
6344
6524
  int rc;
6345
6525
  /* Put data on overflow page. */
6346
6526
  DPRINTF(("data size is %"Z"u, node would be %"Z"u, put data on overflow page",
6347
6527
  data->mv_size, node_size+data->mv_size));
6348
- node_size += sizeof(pgno_t) + (node_size & 1);
6528
+ node_size = EVEN(node_size + sizeof(pgno_t));
6349
6529
  if ((ssize_t)node_size > room)
6350
6530
  goto full;
6351
6531
  if ((rc = mdb_page_new(mc, P_OVERFLOW, ovpages, &ofp)))
@@ -6357,7 +6537,7 @@ mdb_node_add(MDB_cursor *mc, indx_t indx,
6357
6537
  node_size += data->mv_size;
6358
6538
  }
6359
6539
  }
6360
- node_size += node_size & 1;
6540
+ node_size = EVEN(node_size);
6361
6541
  if ((ssize_t)node_size > room)
6362
6542
  goto full;
6363
6543
 
@@ -6368,7 +6548,7 @@ update:
6368
6548
 
6369
6549
  /* Adjust free space offsets. */
6370
6550
  ofs = mp->mp_upper - node_size;
6371
- assert(ofs >= mp->mp_lower + sizeof(indx_t));
6551
+ mdb_cassert(mc, ofs >= mp->mp_lower + sizeof(indx_t));
6372
6552
  mp->mp_ptrs[indx] = ofs;
6373
6553
  mp->mp_upper = ofs;
6374
6554
  mp->mp_lower += sizeof(indx_t);
@@ -6386,7 +6566,7 @@ update:
6386
6566
  memcpy(NODEKEY(node), key->mv_data, key->mv_size);
6387
6567
 
6388
6568
  if (IS_LEAF(mp)) {
6389
- assert(key);
6569
+ mdb_cassert(mc, key);
6390
6570
  if (ofp == NULL) {
6391
6571
  if (F_ISSET(flags, F_BIGDATA))
6392
6572
  memcpy(node->mn_data + key->mv_size, data->mv_data,
@@ -6410,38 +6590,35 @@ update:
6410
6590
 
6411
6591
  full:
6412
6592
  DPRINTF(("not enough room in page %"Z"u, got %u ptrs",
6413
- mp->mp_pgno, NUMKEYS(mp)));
6593
+ mdb_dbg_pgno(mp), NUMKEYS(mp)));
6414
6594
  DPRINTF(("upper-lower = %u - %u = %"Z"d", mp->mp_upper,mp->mp_lower,room));
6415
6595
  DPRINTF(("node size = %"Z"u", node_size));
6596
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
6416
6597
  return MDB_PAGE_FULL;
6417
6598
  }
6418
6599
 
6419
6600
  /** Delete the specified node from a page.
6420
- * @param[in] mp The page to operate on.
6421
- * @param[in] indx The index of the node to delete.
6601
+ * @param[in] mc Cursor pointing to the node to delete.
6422
6602
  * @param[in] ksize The size of a node. Only used if the page is
6423
6603
  * part of a #MDB_DUPFIXED database.
6424
6604
  */
6425
6605
  static void
6426
- mdb_node_del(MDB_page *mp, indx_t indx, int ksize)
6606
+ mdb_node_del(MDB_cursor *mc, int ksize)
6427
6607
  {
6608
+ MDB_page *mp = mc->mc_pg[mc->mc_top];
6609
+ indx_t indx = mc->mc_ki[mc->mc_top];
6428
6610
  unsigned int sz;
6429
6611
  indx_t i, j, numkeys, ptr;
6430
6612
  MDB_node *node;
6431
6613
  char *base;
6432
6614
 
6433
- #if MDB_DEBUG
6434
- {
6435
- pgno_t pgno;
6436
- COPY_PGNO(pgno, mp->mp_pgno);
6437
6615
  DPRINTF(("delete node %u on %s page %"Z"u", indx,
6438
- IS_LEAF(mp) ? "leaf" : "branch", pgno));
6439
- }
6440
- #endif
6441
- assert(indx < NUMKEYS(mp));
6616
+ IS_LEAF(mp) ? "leaf" : "branch", mdb_dbg_pgno(mp)));
6617
+ numkeys = NUMKEYS(mp);
6618
+ mdb_cassert(mc, indx < numkeys);
6442
6619
 
6443
6620
  if (IS_LEAF2(mp)) {
6444
- int x = NUMKEYS(mp) - 1 - indx;
6621
+ int x = numkeys - 1 - indx;
6445
6622
  base = LEAF2KEY(mp, indx, ksize);
6446
6623
  if (x)
6447
6624
  memmove(base, base + ksize, x * ksize);
@@ -6458,10 +6635,9 @@ mdb_node_del(MDB_page *mp, indx_t indx, int ksize)
6458
6635
  else
6459
6636
  sz += NODEDSZ(node);
6460
6637
  }
6461
- sz += sz & 1;
6638
+ sz = EVEN(sz);
6462
6639
 
6463
6640
  ptr = mp->mp_ptrs[indx];
6464
- numkeys = NUMKEYS(mp);
6465
6641
  for (i = j = 0; i < numkeys; i++) {
6466
6642
  if (i != indx) {
6467
6643
  mp->mp_ptrs[j] = mp->mp_ptrs[i];
@@ -6488,25 +6664,22 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
6488
6664
  MDB_node *node;
6489
6665
  MDB_page *sp, *xp;
6490
6666
  char *base;
6491
- int osize, nsize;
6492
- int delta;
6667
+ int nsize, delta;
6493
6668
  indx_t i, numkeys, ptr;
6494
6669
 
6495
6670
  node = NODEPTR(mp, indx);
6496
6671
  sp = (MDB_page *)NODEDATA(node);
6497
- osize = NODEDSZ(node);
6498
-
6499
- delta = sp->mp_upper - sp->mp_lower;
6500
- SETDSZ(node, osize - delta);
6672
+ delta = SIZELEFT(sp);
6501
6673
  xp = (MDB_page *)((char *)sp + delta);
6502
6674
 
6503
6675
  /* shift subpage upward */
6504
6676
  if (IS_LEAF2(sp)) {
6505
6677
  nsize = NUMKEYS(sp) * sp->mp_pad;
6678
+ if (nsize & 1)
6679
+ return; /* do not make the node uneven-sized */
6506
6680
  memmove(METADATA(xp), METADATA(sp), nsize);
6507
6681
  } else {
6508
6682
  int i;
6509
- nsize = osize - sp->mp_upper;
6510
6683
  numkeys = NUMKEYS(sp);
6511
6684
  for (i=numkeys-1; i>=0; i--)
6512
6685
  xp->mp_ptrs[i] = sp->mp_ptrs[i] - delta;
@@ -6517,6 +6690,9 @@ mdb_node_shrink(MDB_page *mp, indx_t indx)
6517
6690
  xp->mp_pad = sp->mp_pad;
6518
6691
  COPY_PGNO(xp->mp_pgno, mp->mp_pgno);
6519
6692
 
6693
+ nsize = NODEDSZ(node) - delta;
6694
+ SETDSZ(node, nsize);
6695
+
6520
6696
  /* shift lower nodes upward */
6521
6697
  ptr = mp->mp_ptrs[indx];
6522
6698
  numkeys = NUMKEYS(mp);
@@ -6604,11 +6780,7 @@ mdb_xcursor_init1(MDB_cursor *mc, MDB_node *node)
6604
6780
  mx->mx_dbflag = DB_VALID|DB_DIRTY; /* DB_DIRTY guides mdb_cursor_touch */
6605
6781
  #if UINT_MAX < SIZE_MAX
6606
6782
  if (mx->mx_dbx.md_cmp == mdb_cmp_int && mx->mx_db.md_pad == sizeof(size_t))
6607
- #ifdef MISALIGNED_OK
6608
- mx->mx_dbx.md_cmp = mdb_cmp_long;
6609
- #else
6610
- mx->mx_dbx.md_cmp = mdb_cmp_cint;
6611
- #endif
6783
+ mx->mx_dbx.md_cmp = mdb_cmp_clong;
6612
6784
  #endif
6613
6785
  }
6614
6786
 
@@ -6628,7 +6800,7 @@ mdb_cursor_init(MDB_cursor *mc, MDB_txn *txn, MDB_dbi dbi, MDB_xcursor *mx)
6628
6800
  mc->mc_pg[0] = 0;
6629
6801
  mc->mc_flags = 0;
6630
6802
  if (txn->mt_dbs[dbi].md_flags & MDB_DUPSORT) {
6631
- assert(mx != NULL);
6803
+ mdb_tassert(txn, mx != NULL);
6632
6804
  mc->mc_xcursor = mx;
6633
6805
  mdb_xcursor_init0(mc);
6634
6806
  } else {
@@ -6645,7 +6817,7 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
6645
6817
  MDB_cursor *mc;
6646
6818
  size_t size = sizeof(MDB_cursor);
6647
6819
 
6648
- if (txn == NULL || ret == NULL || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
6820
+ if (!ret || !TXN_DBI_EXIST(txn, dbi))
6649
6821
  return EINVAL;
6650
6822
 
6651
6823
  if (txn->mt_flags & MDB_TXN_ERROR)
@@ -6677,12 +6849,15 @@ mdb_cursor_open(MDB_txn *txn, MDB_dbi dbi, MDB_cursor **ret)
6677
6849
  int
6678
6850
  mdb_cursor_renew(MDB_txn *txn, MDB_cursor *mc)
6679
6851
  {
6680
- if (txn == NULL || mc == NULL || mc->mc_dbi >= txn->mt_numdbs)
6852
+ if (!mc || !TXN_DBI_EXIST(txn, mc->mc_dbi))
6681
6853
  return EINVAL;
6682
6854
 
6683
6855
  if ((mc->mc_flags & C_UNTRACK) || txn->mt_cursors)
6684
6856
  return EINVAL;
6685
6857
 
6858
+ if (txn->mt_flags & MDB_TXN_ERROR)
6859
+ return MDB_BAD_TXN;
6860
+
6686
6861
  mdb_cursor_init(mc, txn, mc->mc_dbi, mc->mc_xcursor);
6687
6862
  return MDB_SUCCESS;
6688
6863
  }
@@ -6699,6 +6874,9 @@ mdb_cursor_count(MDB_cursor *mc, size_t *countp)
6699
6874
  if (mc->mc_xcursor == NULL)
6700
6875
  return MDB_INCOMPATIBLE;
6701
6876
 
6877
+ if (mc->mc_txn->mt_flags & MDB_TXN_ERROR)
6878
+ return MDB_BAD_TXN;
6879
+
6702
6880
  leaf = NODEPTR(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
6703
6881
  if (!F_ISSET(leaf->mn_flags, F_DUPDATA)) {
6704
6882
  *countp = 1;
@@ -6736,11 +6914,10 @@ mdb_cursor_txn(MDB_cursor *mc)
6736
6914
  MDB_dbi
6737
6915
  mdb_cursor_dbi(MDB_cursor *mc)
6738
6916
  {
6739
- assert(mc != NULL);
6740
6917
  return mc->mc_dbi;
6741
6918
  }
6742
6919
 
6743
- /** Replace the key for a node with a new key.
6920
+ /** Replace the key for a branch node with a new key.
6744
6921
  * @param[in] mc Cursor pointing to the node to operate on.
6745
6922
  * @param[in] key The new key to use.
6746
6923
  * @return 0 on success, non-zero on failure.
@@ -6752,7 +6929,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6752
6929
  MDB_node *node;
6753
6930
  char *base;
6754
6931
  size_t len;
6755
- int delta, delta0;
6932
+ int delta, ksize, oksize;
6756
6933
  indx_t ptr, i, numkeys, indx;
6757
6934
  DKBUF;
6758
6935
 
@@ -6763,7 +6940,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6763
6940
  #if MDB_DEBUG
6764
6941
  {
6765
6942
  MDB_val k2;
6766
- char kbuf2[(MDB_MAXKEYSIZE*2+1)];
6943
+ char kbuf2[DKBUF_MAXKEYSIZE*2+1];
6767
6944
  k2.mv_data = NODEKEY(node);
6768
6945
  k2.mv_size = node->mn_ksize;
6769
6946
  DPRINTF(("update key %u (ofs %u) [%s] to [%s] on page %"Z"u",
@@ -6774,19 +6951,19 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6774
6951
  }
6775
6952
  #endif
6776
6953
 
6777
- delta0 = delta = key->mv_size - node->mn_ksize;
6954
+ /* Sizes must be 2-byte aligned. */
6955
+ ksize = EVEN(key->mv_size);
6956
+ oksize = EVEN(node->mn_ksize);
6957
+ delta = ksize - oksize;
6778
6958
 
6779
- /* Must be 2-byte aligned. If new key is
6780
- * shorter by 1, the shift will be skipped.
6781
- */
6782
- delta += (delta & 1);
6959
+ /* Shift node contents if EVEN(key length) changed. */
6783
6960
  if (delta) {
6784
6961
  if (delta > 0 && SIZELEFT(mp) < delta) {
6785
6962
  pgno_t pgno;
6786
6963
  /* not enough space left, do a delete and split */
6787
6964
  DPRINTF(("Not enough room, delta = %d, splitting...", delta));
6788
6965
  pgno = NODEPGNO(node);
6789
- mdb_node_del(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top], 0);
6966
+ mdb_node_del(mc, 0);
6790
6967
  return mdb_page_split(mc, key, NULL, pgno, MDB_SPLIT_REPLACE);
6791
6968
  }
6792
6969
 
@@ -6805,7 +6982,7 @@ mdb_update_key(MDB_cursor *mc, MDB_val *key)
6805
6982
  }
6806
6983
 
6807
6984
  /* But even if no shift was needed, update ksize */
6808
- if (delta0)
6985
+ if (node->mn_ksize != key->mv_size)
6809
6986
  node->mn_ksize = key->mv_size;
6810
6987
 
6811
6988
  if (key->mv_size)
@@ -6837,7 +7014,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6837
7014
  return rc;
6838
7015
 
6839
7016
  if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6840
- srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], 0); /* fake */
6841
7017
  key.mv_size = csrc->mc_db->md_pad;
6842
7018
  key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
6843
7019
  data.mv_size = 0;
@@ -6846,14 +7022,16 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6846
7022
  flags = 0;
6847
7023
  } else {
6848
7024
  srcnode = NODEPTR(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top]);
6849
- assert(!((size_t)srcnode&1));
7025
+ mdb_cassert(csrc, !((size_t)srcnode & 1));
6850
7026
  srcpg = NODEPGNO(srcnode);
6851
7027
  flags = srcnode->mn_flags;
6852
7028
  if (csrc->mc_ki[csrc->mc_top] == 0 && IS_BRANCH(csrc->mc_pg[csrc->mc_top])) {
6853
7029
  unsigned int snum = csrc->mc_snum;
6854
7030
  MDB_node *s2;
6855
7031
  /* must find the lowest key below src */
6856
- mdb_page_search_lowest(csrc);
7032
+ rc = mdb_page_search_lowest(csrc);
7033
+ if (rc)
7034
+ return rc;
6857
7035
  if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
6858
7036
  key.mv_size = csrc->mc_db->md_pad;
6859
7037
  key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
@@ -6876,7 +7054,9 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6876
7054
  MDB_node *s2;
6877
7055
  MDB_val bkey;
6878
7056
  /* must find the lowest key below dst */
6879
- mdb_page_search_lowest(cdst);
7057
+ rc = mdb_page_search_lowest(cdst);
7058
+ if (rc)
7059
+ return rc;
6880
7060
  if (IS_LEAF2(cdst->mc_pg[cdst->mc_top])) {
6881
7061
  bkey.mv_size = cdst->mc_db->md_pad;
6882
7062
  bkey.mv_data = LEAF2KEY(cdst->mc_pg[cdst->mc_top], 0, bkey.mv_size);
@@ -6909,7 +7089,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6909
7089
 
6910
7090
  /* Delete the node from the source page.
6911
7091
  */
6912
- mdb_node_del(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
7092
+ mdb_node_del(csrc, key.mv_size);
6913
7093
 
6914
7094
  {
6915
7095
  /* Adjust other cursors pointing to mp */
@@ -6957,7 +7137,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6957
7137
  csrc->mc_ki[csrc->mc_top] = 0;
6958
7138
  rc = mdb_update_key(csrc, &nullkey);
6959
7139
  csrc->mc_ki[csrc->mc_top] = ix;
6960
- assert(rc == MDB_SUCCESS);
7140
+ mdb_cassert(csrc, rc == MDB_SUCCESS);
6961
7141
  }
6962
7142
  }
6963
7143
 
@@ -6985,7 +7165,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6985
7165
  cdst->mc_ki[cdst->mc_top] = 0;
6986
7166
  rc = mdb_update_key(cdst, &nullkey);
6987
7167
  cdst->mc_ki[cdst->mc_top] = ix;
6988
- assert(rc == MDB_SUCCESS);
7168
+ mdb_cassert(csrc, rc == MDB_SUCCESS);
6989
7169
  }
6990
7170
  }
6991
7171
 
@@ -6998,6 +7178,7 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst)
6998
7178
  * the \b csrc page will be freed.
6999
7179
  * @param[in] csrc Cursor pointing to the source page.
7000
7180
  * @param[in] cdst Cursor pointing to the destination page.
7181
+ * @return 0 on success, non-zero on failure.
7001
7182
  */
7002
7183
  static int
7003
7184
  mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
@@ -7011,8 +7192,8 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7011
7192
  DPRINTF(("merging page %"Z"u into %"Z"u", csrc->mc_pg[csrc->mc_top]->mp_pgno,
7012
7193
  cdst->mc_pg[cdst->mc_top]->mp_pgno));
7013
7194
 
7014
- assert(csrc->mc_snum > 1); /* can't merge root page */
7015
- assert(cdst->mc_snum > 1);
7195
+ mdb_cassert(csrc, csrc->mc_snum > 1); /* can't merge root page */
7196
+ mdb_cassert(csrc, cdst->mc_snum > 1);
7016
7197
 
7017
7198
  /* Mark dst as dirty. */
7018
7199
  if ((rc = mdb_page_touch(cdst)))
@@ -7037,7 +7218,9 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7037
7218
  unsigned int snum = csrc->mc_snum;
7038
7219
  MDB_node *s2;
7039
7220
  /* must find the lowest key below src */
7040
- mdb_page_search_lowest(csrc);
7221
+ rc = mdb_page_search_lowest(csrc);
7222
+ if (rc)
7223
+ return rc;
7041
7224
  if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
7042
7225
  key.mv_size = csrc->mc_db->md_pad;
7043
7226
  key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], 0, key.mv_size);
@@ -7067,15 +7250,17 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7067
7250
 
7068
7251
  /* Unlink the src page from parent and add to free list.
7069
7252
  */
7070
- mdb_node_del(csrc->mc_pg[csrc->mc_top-1], csrc->mc_ki[csrc->mc_top-1], 0);
7071
- if (csrc->mc_ki[csrc->mc_top-1] == 0) {
7253
+ csrc->mc_top--;
7254
+ mdb_node_del(csrc, 0);
7255
+ if (csrc->mc_ki[csrc->mc_top] == 0) {
7072
7256
  key.mv_size = 0;
7073
- csrc->mc_top--;
7074
7257
  rc = mdb_update_key(csrc, &key);
7075
- csrc->mc_top++;
7076
- if (rc)
7258
+ if (rc) {
7259
+ csrc->mc_top++;
7077
7260
  return rc;
7261
+ }
7078
7262
  }
7263
+ csrc->mc_top++;
7079
7264
 
7080
7265
  rc = mdb_midl_append(&csrc->mc_txn->mt_free_pgs,
7081
7266
  csrc->mc_pg[csrc->mc_top]->mp_pgno);
@@ -7104,9 +7289,18 @@ mdb_page_merge(MDB_cursor *csrc, MDB_cursor *cdst)
7104
7289
  }
7105
7290
  }
7106
7291
  }
7107
- mdb_cursor_pop(csrc);
7108
-
7109
- return mdb_rebalance(csrc);
7292
+ {
7293
+ unsigned int snum = cdst->mc_snum;
7294
+ uint16_t depth = cdst->mc_db->md_depth;
7295
+ mdb_cursor_pop(cdst);
7296
+ rc = mdb_rebalance(cdst);
7297
+ /* Did the tree shrink? */
7298
+ if (depth > cdst->mc_db->md_depth)
7299
+ snum--;
7300
+ cdst->mc_snum = snum;
7301
+ cdst->mc_top = snum-1;
7302
+ }
7303
+ return rc;
7110
7304
  }
7111
7305
 
7112
7306
  /** Copy the contents of a cursor.
@@ -7144,27 +7338,18 @@ mdb_rebalance(MDB_cursor *mc)
7144
7338
  int rc;
7145
7339
  unsigned int ptop, minkeys;
7146
7340
  MDB_cursor mn;
7341
+ indx_t oldki;
7147
7342
 
7148
7343
  minkeys = 1 + (IS_BRANCH(mc->mc_pg[mc->mc_top]));
7149
- #if MDB_DEBUG
7150
- {
7151
- pgno_t pgno;
7152
- COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
7153
7344
  DPRINTF(("rebalancing %s page %"Z"u (has %u keys, %.1f%% full)",
7154
7345
  IS_LEAF(mc->mc_pg[mc->mc_top]) ? "leaf" : "branch",
7155
- pgno, NUMKEYS(mc->mc_pg[mc->mc_top]),
7346
+ mdb_dbg_pgno(mc->mc_pg[mc->mc_top]), NUMKEYS(mc->mc_pg[mc->mc_top]),
7156
7347
  (float)PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) / 10));
7157
- }
7158
- #endif
7159
7348
 
7160
7349
  if (PAGEFILL(mc->mc_txn->mt_env, mc->mc_pg[mc->mc_top]) >= FILL_THRESHOLD &&
7161
7350
  NUMKEYS(mc->mc_pg[mc->mc_top]) >= minkeys) {
7162
- #if MDB_DEBUG
7163
- pgno_t pgno;
7164
- COPY_PGNO(pgno, mc->mc_pg[mc->mc_top]->mp_pgno);
7165
7351
  DPRINTF(("no need to rebalance page %"Z"u, above fill threshold",
7166
- pgno));
7167
- #endif
7352
+ mdb_dbg_pgno(mc->mc_pg[mc->mc_top])));
7168
7353
  return MDB_SUCCESS;
7169
7354
  }
7170
7355
 
@@ -7204,6 +7389,7 @@ mdb_rebalance(MDB_cursor *mc)
7204
7389
  }
7205
7390
  }
7206
7391
  } else if (IS_BRANCH(mp) && NUMKEYS(mp) == 1) {
7392
+ int i;
7207
7393
  DPUTS("collapsing root page!");
7208
7394
  rc = mdb_midl_append(&mc->mc_txn->mt_free_pgs, mp->mp_pgno);
7209
7395
  if (rc)
@@ -7215,6 +7401,10 @@ mdb_rebalance(MDB_cursor *mc)
7215
7401
  mc->mc_db->md_depth--;
7216
7402
  mc->mc_db->md_branch_pages--;
7217
7403
  mc->mc_ki[0] = mc->mc_ki[1];
7404
+ for (i = 1; i<mc->mc_db->md_depth; i++) {
7405
+ mc->mc_pg[i] = mc->mc_pg[i+1];
7406
+ mc->mc_ki[i] = mc->mc_ki[i+1];
7407
+ }
7218
7408
  {
7219
7409
  /* Adjust other cursors pointing to mp */
7220
7410
  MDB_cursor *m2, *m3;
@@ -7227,7 +7417,6 @@ mdb_rebalance(MDB_cursor *mc)
7227
7417
  m3 = m2;
7228
7418
  if (m3 == mc || m3->mc_snum < mc->mc_snum) continue;
7229
7419
  if (m3->mc_pg[0] == mp) {
7230
- int i;
7231
7420
  m3->mc_snum--;
7232
7421
  m3->mc_top--;
7233
7422
  for (i=0; i<m3->mc_snum; i++) {
@@ -7246,7 +7435,7 @@ mdb_rebalance(MDB_cursor *mc)
7246
7435
  * otherwise the tree is invalid.
7247
7436
  */
7248
7437
  ptop = mc->mc_top-1;
7249
- assert(NUMKEYS(mc->mc_pg[ptop]) > 1);
7438
+ mdb_cassert(mc, NUMKEYS(mc->mc_pg[ptop]) > 1);
7250
7439
 
7251
7440
  /* Leaf page fill factor is below the threshold.
7252
7441
  * Try to move keys from left or right neighbor, or
@@ -7258,6 +7447,7 @@ mdb_rebalance(MDB_cursor *mc)
7258
7447
  mdb_cursor_copy(mc, &mn);
7259
7448
  mn.mc_xcursor = NULL;
7260
7449
 
7450
+ oldki = mc->mc_ki[mc->mc_top];
7261
7451
  if (mc->mc_ki[ptop] == 0) {
7262
7452
  /* We're the leftmost leaf in our parent.
7263
7453
  */
@@ -7291,113 +7481,115 @@ mdb_rebalance(MDB_cursor *mc)
7291
7481
  * (A branch page must never have less than 2 keys.)
7292
7482
  */
7293
7483
  minkeys = 1 + (IS_BRANCH(mn.mc_pg[mn.mc_top]));
7294
- if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys)
7295
- return mdb_node_move(&mn, mc);
7296
- else {
7297
- if (mc->mc_ki[ptop] == 0)
7484
+ if (PAGEFILL(mc->mc_txn->mt_env, mn.mc_pg[mn.mc_top]) >= FILL_THRESHOLD && NUMKEYS(mn.mc_pg[mn.mc_top]) > minkeys) {
7485
+ rc = mdb_node_move(&mn, mc);
7486
+ if (mc->mc_ki[ptop]) {
7487
+ oldki++;
7488
+ }
7489
+ } else {
7490
+ if (mc->mc_ki[ptop] == 0) {
7298
7491
  rc = mdb_page_merge(&mn, mc);
7299
- else {
7492
+ } else {
7493
+ oldki += NUMKEYS(mn.mc_pg[mn.mc_top]);
7300
7494
  mn.mc_ki[mn.mc_top] += mc->mc_ki[mn.mc_top] + 1;
7301
7495
  rc = mdb_page_merge(mc, &mn);
7302
7496
  mdb_cursor_copy(&mn, mc);
7303
7497
  }
7304
- mc->mc_flags &= ~(C_INITIALIZED|C_EOF);
7498
+ mc->mc_flags &= ~C_EOF;
7305
7499
  }
7500
+ mc->mc_ki[mc->mc_top] = oldki;
7306
7501
  return rc;
7307
7502
  }
7308
7503
 
7309
7504
  /** Complete a delete operation started by #mdb_cursor_del(). */
7310
7505
  static int
7311
- mdb_cursor_del0(MDB_cursor *mc, MDB_node *leaf)
7506
+ mdb_cursor_del0(MDB_cursor *mc)
7312
7507
  {
7313
7508
  int rc;
7314
7509
  MDB_page *mp;
7315
7510
  indx_t ki;
7316
7511
  unsigned int nkeys;
7317
7512
 
7318
- mp = mc->mc_pg[mc->mc_top];
7319
7513
  ki = mc->mc_ki[mc->mc_top];
7320
-
7321
- /* add overflow pages to free list */
7322
- if (!IS_LEAF2(mp) && F_ISSET(leaf->mn_flags, F_BIGDATA)) {
7323
- MDB_page *omp;
7324
- pgno_t pg;
7325
-
7326
- memcpy(&pg, NODEDATA(leaf), sizeof(pg));
7327
- if ((rc = mdb_page_get(mc->mc_txn, pg, &omp, NULL)) ||
7328
- (rc = mdb_ovpage_free(mc, omp)))
7329
- return rc;
7330
- }
7331
- mdb_node_del(mp, ki, mc->mc_db->md_pad);
7514
+ mdb_node_del(mc, mc->mc_db->md_pad);
7332
7515
  mc->mc_db->md_entries--;
7333
7516
  rc = mdb_rebalance(mc);
7334
- if (rc != MDB_SUCCESS)
7335
- mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
7336
- else {
7337
- MDB_cursor *m2;
7517
+
7518
+ if (rc == MDB_SUCCESS) {
7519
+ MDB_cursor *m2, *m3;
7338
7520
  MDB_dbi dbi = mc->mc_dbi;
7339
7521
 
7340
7522
  mp = mc->mc_pg[mc->mc_top];
7341
7523
  nkeys = NUMKEYS(mp);
7342
7524
 
7343
7525
  /* if mc points past last node in page, find next sibling */
7344
- if (mc->mc_ki[mc->mc_top] >= nkeys)
7345
- mdb_cursor_sibling(mc, 1);
7526
+ if (mc->mc_ki[mc->mc_top] >= nkeys) {
7527
+ rc = mdb_cursor_sibling(mc, 1);
7528
+ if (rc == MDB_NOTFOUND)
7529
+ rc = MDB_SUCCESS;
7530
+ }
7346
7531
 
7347
7532
  /* Adjust other cursors pointing to mp */
7348
- for (m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2=m2->mc_next) {
7349
- if (m2 == mc || m2->mc_snum < mc->mc_snum)
7533
+ for (m2 = mc->mc_txn->mt_cursors[dbi]; !rc && m2; m2=m2->mc_next) {
7534
+ m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2;
7535
+ if (! (m2->mc_flags & m3->mc_flags & C_INITIALIZED))
7350
7536
  continue;
7351
- if (!(m2->mc_flags & C_INITIALIZED))
7537
+ if (m3 == mc || m3->mc_snum < mc->mc_snum)
7352
7538
  continue;
7353
- if (m2->mc_pg[mc->mc_top] == mp) {
7354
- if (m2->mc_ki[mc->mc_top] >= ki) {
7355
- m2->mc_flags |= C_DEL;
7356
- if (m2->mc_ki[mc->mc_top] > ki)
7357
- m2->mc_ki[mc->mc_top]--;
7539
+ if (m3->mc_pg[mc->mc_top] == mp) {
7540
+ if (m3->mc_ki[mc->mc_top] >= ki) {
7541
+ m3->mc_flags |= C_DEL;
7542
+ if (m3->mc_ki[mc->mc_top] > ki)
7543
+ m3->mc_ki[mc->mc_top]--;
7544
+ }
7545
+ if (m3->mc_ki[mc->mc_top] >= nkeys) {
7546
+ rc = mdb_cursor_sibling(m3, 1);
7547
+ if (rc == MDB_NOTFOUND)
7548
+ rc = MDB_SUCCESS;
7358
7549
  }
7359
- if (m2->mc_ki[mc->mc_top] >= nkeys)
7360
- mdb_cursor_sibling(m2, 1);
7361
7550
  }
7362
7551
  }
7363
7552
  mc->mc_flags |= C_DEL;
7364
7553
  }
7365
7554
 
7555
+ if (rc)
7556
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
7366
7557
  return rc;
7367
7558
  }
7368
7559
 
7369
7560
  int
7370
7561
  mdb_del(MDB_txn *txn, MDB_dbi dbi,
7371
7562
  MDB_val *key, MDB_val *data)
7563
+ {
7564
+ if (!key || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
7565
+ return EINVAL;
7566
+
7567
+ if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
7568
+ return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
7569
+
7570
+ if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
7571
+ /* must ignore any data */
7572
+ data = NULL;
7573
+ }
7574
+
7575
+ return mdb_del0(txn, dbi, key, data, 0);
7576
+ }
7577
+
7578
+ static int
7579
+ mdb_del0(MDB_txn *txn, MDB_dbi dbi,
7580
+ MDB_val *key, MDB_val *data, unsigned flags)
7372
7581
  {
7373
7582
  MDB_cursor mc;
7374
7583
  MDB_xcursor mx;
7375
7584
  MDB_cursor_op op;
7376
7585
  MDB_val rdata, *xdata;
7377
- int rc, exact;
7586
+ int rc, exact = 0;
7378
7587
  DKBUF;
7379
7588
 
7380
- assert(key != NULL);
7381
-
7382
7589
  DPRINTF(("====> delete db %u key [%s]", dbi, DKEY(key)));
7383
7590
 
7384
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
7385
- return EINVAL;
7386
-
7387
- if (txn->mt_flags & (MDB_TXN_RDONLY|MDB_TXN_ERROR))
7388
- return (txn->mt_flags & MDB_TXN_RDONLY) ? EACCES : MDB_BAD_TXN;
7389
-
7390
- if (key->mv_size > MDB_MAXKEYSIZE) {
7391
- return MDB_BAD_VALSIZE;
7392
- }
7393
-
7394
7591
  mdb_cursor_init(&mc, txn, dbi, &mx);
7395
7592
 
7396
- exact = 0;
7397
- if (!F_ISSET(txn->mt_dbs[dbi].md_flags, MDB_DUPSORT)) {
7398
- /* must ignore any data */
7399
- data = NULL;
7400
- }
7401
7593
  if (data) {
7402
7594
  op = MDB_GET_BOTH;
7403
7595
  rdata = *data;
@@ -7405,6 +7597,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
7405
7597
  } else {
7406
7598
  op = MDB_SET;
7407
7599
  xdata = NULL;
7600
+ flags |= MDB_NODUPDATA;
7408
7601
  }
7409
7602
  rc = mdb_cursor_set(&mc, key, xdata, op, &exact);
7410
7603
  if (rc == 0) {
@@ -7419,7 +7612,7 @@ mdb_del(MDB_txn *txn, MDB_dbi dbi,
7419
7612
  mc.mc_flags |= C_UNTRACK;
7420
7613
  mc.mc_next = txn->mt_cursors[dbi];
7421
7614
  txn->mt_cursors[dbi] = &mc;
7422
- rc = mdb_cursor_del(&mc, data ? 0 : MDB_NODUPDATA);
7615
+ rc = mdb_cursor_del(&mc, flags);
7423
7616
  txn->mt_cursors[dbi] = mc.mc_next;
7424
7617
  }
7425
7618
  return rc;
@@ -7468,7 +7661,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7468
7661
 
7469
7662
  if (mc->mc_snum < 2) {
7470
7663
  if ((rc = mdb_page_new(mc, P_BRANCH, 1, &pp)))
7471
- return rc;
7664
+ goto done;
7472
7665
  /* shift current top to make room for new parent */
7473
7666
  mc->mc_pg[1] = mc->mc_pg[0];
7474
7667
  mc->mc_ki[1] = mc->mc_ki[0];
@@ -7486,7 +7679,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7486
7679
  mc->mc_ki[0] = mc->mc_ki[1];
7487
7680
  mc->mc_db->md_root = mp->mp_pgno;
7488
7681
  mc->mc_db->md_depth--;
7489
- return rc;
7682
+ goto done;
7490
7683
  }
7491
7684
  mc->mc_snum = 2;
7492
7685
  mc->mc_top = 1;
@@ -7515,7 +7708,6 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7515
7708
  int x;
7516
7709
  unsigned int lsize, rsize, ksize;
7517
7710
  /* Move half of the keys to the right sibling */
7518
- copy = NULL;
7519
7711
  x = mc->mc_ki[mc->mc_top] - split_indx;
7520
7712
  ksize = mc->mc_db->md_pad;
7521
7713
  split = LEAF2KEY(mp, split_indx, ksize);
@@ -7558,12 +7750,14 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7558
7750
  nsize = mdb_leaf_size(env, newkey, newdata);
7559
7751
  else
7560
7752
  nsize = mdb_branch_size(env, newkey);
7561
- nsize += nsize & 1;
7753
+ nsize = EVEN(nsize);
7562
7754
 
7563
7755
  /* grab a page to hold a temporary copy */
7564
7756
  copy = mdb_page_malloc(mc->mc_txn, 1);
7565
- if (copy == NULL)
7566
- return ENOMEM;
7757
+ if (copy == NULL) {
7758
+ rc = ENOMEM;
7759
+ goto done;
7760
+ }
7567
7761
  copy->mp_pgno = mp->mp_pgno;
7568
7762
  copy->mp_flags = mp->mp_flags;
7569
7763
  copy->mp_lower = PAGEHDRSZ;
@@ -7615,7 +7809,7 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7615
7809
  else
7616
7810
  psize += NODEDSZ(node);
7617
7811
  }
7618
- psize += psize & 1;
7812
+ psize = EVEN(psize);
7619
7813
  }
7620
7814
  if (psize > pmax || i == k-j) {
7621
7815
  split_indx = i + (j<0);
@@ -7643,6 +7837,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7643
7837
  mn.mc_top--;
7644
7838
  did_split = 1;
7645
7839
  rc = mdb_page_split(&mn, &sepkey, NULL, rp->mp_pgno, 0);
7840
+ if (rc)
7841
+ goto done;
7646
7842
 
7647
7843
  /* root split? */
7648
7844
  if (mn.mc_snum == mc->mc_snum) {
@@ -7664,7 +7860,13 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7664
7860
  mc->mc_ki[i] = mn.mc_ki[i];
7665
7861
  }
7666
7862
  mc->mc_pg[ptop] = mn.mc_pg[ptop];
7667
- mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
7863
+ if (mn.mc_ki[ptop]) {
7864
+ mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
7865
+ } else {
7866
+ /* find right page's left sibling */
7867
+ mc->mc_ki[ptop] = mn.mc_ki[ptop];
7868
+ mdb_cursor_sibling(mc, 0);
7869
+ }
7668
7870
  }
7669
7871
  } else {
7670
7872
  mn.mc_top--;
@@ -7673,14 +7875,14 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7673
7875
  }
7674
7876
  mc->mc_flags ^= C_SPLITTING;
7675
7877
  if (rc != MDB_SUCCESS) {
7676
- return rc;
7878
+ goto done;
7677
7879
  }
7678
7880
  if (nflags & MDB_APPEND) {
7679
7881
  mc->mc_pg[mc->mc_top] = rp;
7680
7882
  mc->mc_ki[mc->mc_top] = 0;
7681
7883
  rc = mdb_node_add(mc, 0, newkey, newdata, newpgno, nflags);
7682
7884
  if (rc)
7683
- return rc;
7885
+ goto done;
7684
7886
  for (i=0; i<mc->mc_top; i++)
7685
7887
  mc->mc_ki[i] = mn.mc_ki[i];
7686
7888
  } else if (!IS_LEAF2(mp)) {
@@ -7718,11 +7920,8 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7718
7920
  }
7719
7921
 
7720
7922
  rc = mdb_node_add(mc, j, &rkey, rdata, pgno, flags);
7721
- if (rc) {
7722
- /* return tmp page to freelist */
7723
- mdb_page_free(env, copy);
7724
- return rc;
7725
- }
7923
+ if (rc)
7924
+ goto done;
7726
7925
  if (i == nkeys) {
7727
7926
  i = 0;
7728
7927
  j = 0;
@@ -7756,16 +7955,12 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7756
7955
  */
7757
7956
  if (mn.mc_pg[ptop] != mc->mc_pg[ptop] &&
7758
7957
  mc->mc_ki[ptop] >= NUMKEYS(mc->mc_pg[ptop])) {
7759
- for (i=0; i<ptop; i++) {
7958
+ for (i=0; i<=ptop; i++) {
7760
7959
  mc->mc_pg[i] = mn.mc_pg[i];
7761
7960
  mc->mc_ki[i] = mn.mc_ki[i];
7762
7961
  }
7763
- mc->mc_pg[ptop] = mn.mc_pg[ptop];
7764
- mc->mc_ki[ptop] = mn.mc_ki[ptop] - 1;
7765
7962
  }
7766
7963
  }
7767
- /* return tmp page to freelist */
7768
- mdb_page_free(env, copy);
7769
7964
  }
7770
7965
 
7771
7966
  {
@@ -7816,6 +8011,12 @@ mdb_page_split(MDB_cursor *mc, MDB_val *newkey, MDB_val *newdata, pgno_t newpgno
7816
8011
  }
7817
8012
  }
7818
8013
  DPRINTF(("mp left: %d, rp left: %d", SIZELEFT(mp), SIZELEFT(rp)));
8014
+
8015
+ done:
8016
+ if (copy) /* tmp page */
8017
+ mdb_page_free(env, copy);
8018
+ if (rc)
8019
+ mc->mc_txn->mt_flags |= MDB_TXN_ERROR;
7819
8020
  return rc;
7820
8021
  }
7821
8022
 
@@ -7826,10 +8027,7 @@ mdb_put(MDB_txn *txn, MDB_dbi dbi,
7826
8027
  MDB_cursor mc;
7827
8028
  MDB_xcursor mx;
7828
8029
 
7829
- assert(key != NULL);
7830
- assert(data != NULL);
7831
-
7832
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8030
+ if (!key || !data || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
7833
8031
  return EINVAL;
7834
8032
 
7835
8033
  if ((flags & (MDB_NOOVERWRITE|MDB_NODUPDATA|MDB_RESERVE|MDB_APPEND|MDB_APPENDDUP)) != flags)
@@ -7861,6 +8059,32 @@ mdb_env_get_flags(MDB_env *env, unsigned int *arg)
7861
8059
  return MDB_SUCCESS;
7862
8060
  }
7863
8061
 
8062
+ int
8063
+ mdb_env_set_userctx(MDB_env *env, void *ctx)
8064
+ {
8065
+ if (!env)
8066
+ return EINVAL;
8067
+ env->me_userctx = ctx;
8068
+ return MDB_SUCCESS;
8069
+ }
8070
+
8071
+ void *
8072
+ mdb_env_get_userctx(MDB_env *env)
8073
+ {
8074
+ return env ? env->me_userctx : NULL;
8075
+ }
8076
+
8077
+ int
8078
+ mdb_env_set_assert(MDB_env *env, MDB_assert_func *func)
8079
+ {
8080
+ if (!env)
8081
+ return EINVAL;
8082
+ #ifndef NDEBUG
8083
+ env->me_assert_func = func;
8084
+ #endif
8085
+ return MDB_SUCCESS;
8086
+ }
8087
+
7864
8088
  int
7865
8089
  mdb_env_get_path(MDB_env *env, const char **arg)
7866
8090
  {
@@ -8062,9 +8286,12 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned int flags, MDB_dbi *db
8062
8286
 
8063
8287
  int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *arg)
8064
8288
  {
8065
- if (txn == NULL || arg == NULL || dbi >= txn->mt_numdbs)
8289
+ if (!arg || !TXN_DBI_EXIST(txn, dbi))
8066
8290
  return EINVAL;
8067
8291
 
8292
+ if (txn->mt_flags & MDB_TXN_ERROR)
8293
+ return MDB_BAD_TXN;
8294
+
8068
8295
  if (txn->mt_dbflags[dbi] & DB_STALE) {
8069
8296
  MDB_cursor mc;
8070
8297
  MDB_xcursor mx;
@@ -8089,7 +8316,7 @@ void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
8089
8316
  int mdb_dbi_flags(MDB_txn *txn, MDB_dbi dbi, unsigned int *flags)
8090
8317
  {
8091
8318
  /* We could return the flags for the FREE_DBI too but what's the point? */
8092
- if (txn == NULL || dbi < MAIN_DBI || dbi >= txn->mt_numdbs)
8319
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8093
8320
  return EINVAL;
8094
8321
  *flags = txn->mt_dbs[dbi].md_flags & PERSISTENT_FLAGS;
8095
8322
  return MDB_SUCCESS;
@@ -8129,22 +8356,22 @@ mdb_drop0(MDB_cursor *mc, int subs)
8129
8356
  memcpy(&pg, NODEDATA(ni), sizeof(pg));
8130
8357
  rc = mdb_page_get(txn, pg, &omp, NULL);
8131
8358
  if (rc != 0)
8132
- return rc;
8133
- assert(IS_OVERFLOW(omp));
8359
+ goto done;
8360
+ mdb_cassert(mc, IS_OVERFLOW(omp));
8134
8361
  rc = mdb_midl_append_range(&txn->mt_free_pgs,
8135
8362
  pg, omp->mp_pages);
8136
8363
  if (rc)
8137
- return rc;
8364
+ goto done;
8138
8365
  } else if (subs && (ni->mn_flags & F_SUBDATA)) {
8139
8366
  mdb_xcursor_init1(mc, ni);
8140
8367
  rc = mdb_drop0(&mc->mc_xcursor->mx_cursor, 0);
8141
8368
  if (rc)
8142
- return rc;
8369
+ goto done;
8143
8370
  }
8144
8371
  }
8145
8372
  } else {
8146
8373
  if ((rc = mdb_midl_need(&txn->mt_free_pgs, n)) != 0)
8147
- return rc;
8374
+ goto done;
8148
8375
  for (i=0; i<n; i++) {
8149
8376
  pgno_t pg;
8150
8377
  ni = NODEPTR(mp, i);
@@ -8158,6 +8385,8 @@ mdb_drop0(MDB_cursor *mc, int subs)
8158
8385
  mc->mc_ki[mc->mc_top] = i;
8159
8386
  rc = mdb_cursor_sibling(mc, 1);
8160
8387
  if (rc) {
8388
+ if (rc != MDB_NOTFOUND)
8389
+ goto done;
8161
8390
  /* no more siblings, go back to beginning
8162
8391
  * of previous level.
8163
8392
  */
@@ -8171,6 +8400,9 @@ mdb_drop0(MDB_cursor *mc, int subs)
8171
8400
  }
8172
8401
  /* free it */
8173
8402
  rc = mdb_midl_append(&txn->mt_free_pgs, mc->mc_db->md_root);
8403
+ done:
8404
+ if (rc)
8405
+ txn->mt_flags |= MDB_TXN_ERROR;
8174
8406
  } else if (rc == MDB_NOTFOUND) {
8175
8407
  rc = MDB_SUCCESS;
8176
8408
  }
@@ -8182,7 +8414,7 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
8182
8414
  MDB_cursor *mc, *m2;
8183
8415
  int rc;
8184
8416
 
8185
- if (!txn || !dbi || dbi >= txn->mt_numdbs || (unsigned)del > 1 || !(txn->mt_dbflags[dbi] & DB_VALID))
8417
+ if ((unsigned)del > 1 || dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8186
8418
  return EINVAL;
8187
8419
 
8188
8420
  if (F_ISSET(txn->mt_flags, MDB_TXN_RDONLY))
@@ -8201,10 +8433,12 @@ int mdb_drop(MDB_txn *txn, MDB_dbi dbi, int del)
8201
8433
 
8202
8434
  /* Can't delete the main DB */
8203
8435
  if (del && dbi > MAIN_DBI) {
8204
- rc = mdb_del(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL);
8436
+ rc = mdb_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, 0);
8205
8437
  if (!rc) {
8206
8438
  txn->mt_dbflags[dbi] = DB_STALE;
8207
8439
  mdb_dbi_close(txn->mt_env, dbi);
8440
+ } else {
8441
+ txn->mt_flags |= MDB_TXN_ERROR;
8208
8442
  }
8209
8443
  } else {
8210
8444
  /* reset the DB record, mark it dirty */
@@ -8225,7 +8459,7 @@ leave:
8225
8459
 
8226
8460
  int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8227
8461
  {
8228
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8462
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8229
8463
  return EINVAL;
8230
8464
 
8231
8465
  txn->mt_dbxs[dbi].md_cmp = cmp;
@@ -8234,7 +8468,7 @@ int mdb_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8234
8468
 
8235
8469
  int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8236
8470
  {
8237
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8471
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8238
8472
  return EINVAL;
8239
8473
 
8240
8474
  txn->mt_dbxs[dbi].md_dcmp = cmp;
@@ -8243,7 +8477,7 @@ int mdb_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp)
8243
8477
 
8244
8478
  int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
8245
8479
  {
8246
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8480
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8247
8481
  return EINVAL;
8248
8482
 
8249
8483
  txn->mt_dbxs[dbi].md_rel = rel;
@@ -8252,7 +8486,7 @@ int mdb_set_relfunc(MDB_txn *txn, MDB_dbi dbi, MDB_rel_func *rel)
8252
8486
 
8253
8487
  int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
8254
8488
  {
8255
- if (txn == NULL || !dbi || dbi >= txn->mt_numdbs || !(txn->mt_dbflags[dbi] & DB_VALID))
8489
+ if (dbi == FREE_DBI || !TXN_DBI_EXIST(txn, dbi))
8256
8490
  return EINVAL;
8257
8491
 
8258
8492
  txn->mt_dbxs[dbi].md_relctx = ctx;
@@ -8261,7 +8495,7 @@ int mdb_set_relctx(MDB_txn *txn, MDB_dbi dbi, void *ctx)
8261
8495
 
8262
8496
  int mdb_env_get_maxkeysize(MDB_env *env)
8263
8497
  {
8264
- return MDB_MAXKEYSIZE;
8498
+ return ENV_MAXKEY(env);
8265
8499
  }
8266
8500
 
8267
8501
  int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
@@ -8269,7 +8503,7 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
8269
8503
  unsigned int i, rdrs;
8270
8504
  MDB_reader *mr;
8271
8505
  char buf[64];
8272
- int first = 1;
8506
+ int rc = 0, first = 1;
8273
8507
 
8274
8508
  if (!env || !func)
8275
8509
  return -1;
@@ -8280,27 +8514,25 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx)
8280
8514
  mr = env->me_txns->mti_readers;
8281
8515
  for (i=0; i<rdrs; i++) {
8282
8516
  if (mr[i].mr_pid) {
8283
- size_t tid;
8284
- int rc;
8285
- tid = mr[i].mr_tid;
8286
- if (mr[i].mr_txnid == (txnid_t)-1) {
8287
- sprintf(buf, "%10d %"Z"x -\n", mr[i].mr_pid, tid);
8288
- } else {
8289
- sprintf(buf, "%10d %"Z"x %"Z"u\n", mr[i].mr_pid, tid, mr[i].mr_txnid);
8290
- }
8517
+ txnid_t txnid = mr[i].mr_txnid;
8518
+ sprintf(buf, txnid == (txnid_t)-1 ?
8519
+ "%10d %"Z"x -\n" : "%10d %"Z"x %"Z"u\n",
8520
+ (int)mr[i].mr_pid, (size_t)mr[i].mr_tid, txnid);
8291
8521
  if (first) {
8292
8522
  first = 0;
8293
- func(" pid thread txnid\n", ctx);
8523
+ rc = func(" pid thread txnid\n", ctx);
8524
+ if (rc < 0)
8525
+ break;
8294
8526
  }
8295
8527
  rc = func(buf, ctx);
8296
8528
  if (rc < 0)
8297
- return rc;
8529
+ break;
8298
8530
  }
8299
8531
  }
8300
8532
  if (first) {
8301
- func("(no active readers)\n", ctx);
8533
+ rc = func("(no active readers)\n", ctx);
8302
8534
  }
8303
- return 0;
8535
+ return rc;
8304
8536
  }
8305
8537
 
8306
8538
  /** Insert pid into list if not already present.
@@ -8361,7 +8593,6 @@ int mdb_reader_check(MDB_env *env, int *dead)
8361
8593
  return ENOMEM;
8362
8594
  pids[0] = 0;
8363
8595
  mr = env->me_txns->mti_readers;
8364
- j = 0;
8365
8596
  for (i=0; i<rdrs; i++) {
8366
8597
  if (mr[i].mr_pid && mr[i].mr_pid != env->me_pid) {
8367
8598
  pid = mr[i].mr_pid;