rrudb 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +1 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +26 -0
  5. data/examples/example.rb +39 -0
  6. data/ext/rudb/NuDB/include/nudb/CMakeLists.txt +104 -0
  7. data/ext/rudb/NuDB/include/nudb/_experimental/basic_seconds_clock.hpp +200 -0
  8. data/ext/rudb/NuDB/include/nudb/_experimental/chrono_util.hpp +58 -0
  9. data/ext/rudb/NuDB/include/nudb/_experimental/test/fail_file.hpp +343 -0
  10. data/ext/rudb/NuDB/include/nudb/_experimental/test/temp_dir.hpp +73 -0
  11. data/ext/rudb/NuDB/include/nudb/_experimental/test/test_store.hpp +451 -0
  12. data/ext/rudb/NuDB/include/nudb/_experimental/test/xor_shift_engine.hpp +105 -0
  13. data/ext/rudb/NuDB/include/nudb/_experimental/util.hpp +288 -0
  14. data/ext/rudb/NuDB/include/nudb/basic_store.hpp +461 -0
  15. data/ext/rudb/NuDB/include/nudb/concepts.hpp +205 -0
  16. data/ext/rudb/NuDB/include/nudb/context.hpp +144 -0
  17. data/ext/rudb/NuDB/include/nudb/create.hpp +117 -0
  18. data/ext/rudb/NuDB/include/nudb/detail/arena.hpp +296 -0
  19. data/ext/rudb/NuDB/include/nudb/detail/bucket.hpp +473 -0
  20. data/ext/rudb/NuDB/include/nudb/detail/buffer.hpp +86 -0
  21. data/ext/rudb/NuDB/include/nudb/detail/bulkio.hpp +196 -0
  22. data/ext/rudb/NuDB/include/nudb/detail/cache.hpp +236 -0
  23. data/ext/rudb/NuDB/include/nudb/detail/endian.hpp +93 -0
  24. data/ext/rudb/NuDB/include/nudb/detail/field.hpp +265 -0
  25. data/ext/rudb/NuDB/include/nudb/detail/format.hpp +630 -0
  26. data/ext/rudb/NuDB/include/nudb/detail/gentex.hpp +259 -0
  27. data/ext/rudb/NuDB/include/nudb/detail/mutex.hpp +26 -0
  28. data/ext/rudb/NuDB/include/nudb/detail/pool.hpp +243 -0
  29. data/ext/rudb/NuDB/include/nudb/detail/store_base.hpp +45 -0
  30. data/ext/rudb/NuDB/include/nudb/detail/stream.hpp +149 -0
  31. data/ext/rudb/NuDB/include/nudb/detail/xxhash.hpp +328 -0
  32. data/ext/rudb/NuDB/include/nudb/error.hpp +257 -0
  33. data/ext/rudb/NuDB/include/nudb/file.hpp +55 -0
  34. data/ext/rudb/NuDB/include/nudb/impl/basic_store.ipp +785 -0
  35. data/ext/rudb/NuDB/include/nudb/impl/context.ipp +241 -0
  36. data/ext/rudb/NuDB/include/nudb/impl/create.ipp +163 -0
  37. data/ext/rudb/NuDB/include/nudb/impl/error.ipp +175 -0
  38. data/ext/rudb/NuDB/include/nudb/impl/posix_file.ipp +248 -0
  39. data/ext/rudb/NuDB/include/nudb/impl/recover.ipp +209 -0
  40. data/ext/rudb/NuDB/include/nudb/impl/rekey.ipp +248 -0
  41. data/ext/rudb/NuDB/include/nudb/impl/verify.ipp +634 -0
  42. data/ext/rudb/NuDB/include/nudb/impl/visit.ipp +96 -0
  43. data/ext/rudb/NuDB/include/nudb/impl/win32_file.ipp +264 -0
  44. data/ext/rudb/NuDB/include/nudb/native_file.hpp +76 -0
  45. data/ext/rudb/NuDB/include/nudb/nudb.hpp +27 -0
  46. data/ext/rudb/NuDB/include/nudb/posix_file.hpp +228 -0
  47. data/ext/rudb/NuDB/include/nudb/progress.hpp +32 -0
  48. data/ext/rudb/NuDB/include/nudb/recover.hpp +73 -0
  49. data/ext/rudb/NuDB/include/nudb/rekey.hpp +110 -0
  50. data/ext/rudb/NuDB/include/nudb/store.hpp +27 -0
  51. data/ext/rudb/NuDB/include/nudb/type_traits.hpp +63 -0
  52. data/ext/rudb/NuDB/include/nudb/verify.hpp +200 -0
  53. data/ext/rudb/NuDB/include/nudb/version.hpp +21 -0
  54. data/ext/rudb/NuDB/include/nudb/visit.hpp +63 -0
  55. data/ext/rudb/NuDB/include/nudb/win32_file.hpp +246 -0
  56. data/ext/rudb/NuDB/include/nudb/xxhasher.hpp +45 -0
  57. data/ext/rudb/extconf.rb +12 -0
  58. data/ext/rudb/rudb.cpp +234 -0
  59. data/lib/rudb/version.rb +3 -0
  60. data/lib/rudb.rb +1 -0
  61. metadata +104 -0
@@ -0,0 +1,73 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_RECOVER_HPP
9
+ #define NUDB_RECOVER_HPP
10
+
11
+ #include <nudb/error.hpp>
12
+ #include <nudb/native_file.hpp>
13
+
14
+ namespace nudb {
15
+
16
+ /** Perform recovery on a database.
17
+
18
+ This implements the recovery algorithm by rolling back
19
+ any partially committed data. If no log file is present,
20
+ the function does nothing.
21
+
22
+ During the commit phase of a NuDB database, a log file
23
+ is generated with information that may be used to roll
24
+ back the results of a partial commit. This function
25
+ checks for the presence of a log file. If present, the
26
+ log file is replayed on the key and data files belonging
27
+ to the database, restoring the database to its state
28
+ before the partial commit. When @ref recover is
29
+ successful, it erases the log file.
30
+
31
+ It is normally not necessary to call this function
32
+ directly, it is called automatically when a database is
33
+ opened in a call to @ref basic_store::open. Callers may
34
+ use this function to implement auxiliary tools for
35
+ manipulating the database.
36
+
37
+ @par Template Parameters
38
+
39
+ @tparam Hasher The hash function to use. This type must
40
+ meet the requirements of @b Hasher. The hash function
41
+ must be the same as that used to create the database, or
42
+ else an error is returned.
43
+
44
+ @tparam File The type of file to use. Use the default of
45
+ @ref native_file unless customizing the file behavior.
46
+
47
+ @param dat_path The path to the data file.
48
+
49
+ @param key_path The path to the key file.
50
+
51
+ @param log_path The path to the log file.
52
+
53
+ @param args Optional parameters passed to File constructors.
54
+
55
+ @param ec Set to the error, if any occurred.
56
+ */
57
+ template<
58
+ class Hasher,
59
+ class File = native_file,
60
+ class... Args>
61
+ void
62
+ recover(
63
+ path_type const& dat_path,
64
+ path_type const& key_path,
65
+ path_type const& log_path,
66
+ error_code& ec,
67
+ Args&&... args);
68
+
69
+ } // nudb
70
+
71
+ #include <nudb/impl/recover.ipp>
72
+
73
+ #endif
@@ -0,0 +1,110 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_REKEY_HPP
9
+ #define NUDB_REKEY_HPP
10
+
11
+ #include <nudb/error.hpp>
12
+ #include <nudb/file.hpp>
13
+ #include <cstddef>
14
+ #include <cstdint>
15
+
16
+ namespace nudb {
17
+
18
+ /** Create a new key file from a data file.
19
+
20
+ This algorithm rebuilds a key file for the given data file.
21
+ It works efficiently by iterating the data file multiple times.
22
+ During the iteration, a contiguous block of the key file is
23
+ rendered in memory, then flushed to disk when the iteration is
24
+ complete. The size of this memory buffer is controlled by the
25
+ `bufferSize` parameter, larger is better. The algorithm works
26
+ the fastest when `bufferSize` is large enough to hold the entire
27
+ key file in memory; only a single iteration of the data file
28
+ is needed in this case.
29
+
30
+ During the rekey, spill records may be appended to the data
31
+ file. If the rekey operation is abnormally terminated, this
32
+ would normally result in a corrupted data file. To prevent this,
33
+ the function creates a log file using the specified path so
34
+ that the database can be fixed in a subsequent call to
35
+ @ref recover.
36
+
37
+ @note If a log file is already present, this function will
38
+ fail with @ref error::log_file_exists.
39
+
40
+ @par Template Parameters
41
+
42
+ @tparam Hasher The hash function to use. This type must
43
+ meet the requirements of @b Hasher. The hash function
44
+ must be the same as that used to create the database, or
45
+ else an error is returned.
46
+
47
+ @tparam File The type of file to use. This type must meet
48
+ the requirements of @b File.
49
+
50
+ @param dat_path The path to the data file.
51
+
52
+ @param key_path The path to the key file.
53
+
54
+ @param log_path The path to the log file.
55
+
56
+ @param blockSize The size of a key file block. Larger
57
+ blocks hold more keys but require more I/O cycles per
58
+ operation. The ideal block size the largest size that
59
+ may be read in a single I/O cycle, and device dependent.
60
+ The return value of @ref block_size returns a suitable
61
+ value for the volume of a given path.
62
+
63
+ @param loadFactor A number between zero and one
64
+ representing the average bucket occupancy (number of
65
+ items). A value of 0.5 is perfect. Lower numbers
66
+ waste space, and higher numbers produce negligible
67
+ savings at the cost of increased I/O cycles.
68
+
69
+ @param itemCount The number of items in the data file.
70
+
71
+ @param bufferSize The number of bytes to allocate for the buffer.
72
+
73
+ @param ec Set to the error if any occurred.
74
+
75
+ @param progress A function which will be called periodically
76
+ as the algorithm proceeds. The equivalent signature of the
77
+ progress function must be:
78
+ @code
79
+ void progress(
80
+ std::uint64_t amount, // Amount of work done so far
81
+ std::uint64_t total // Total amount of work to do
82
+ );
83
+ @endcode
84
+
85
+ @param args Optional arguments passed to @b File constructors.
86
+ */
87
+ template<
88
+ class Hasher,
89
+ class File,
90
+ class Progress,
91
+ class... Args
92
+ >
93
+ void
94
+ rekey(
95
+ path_type const& dat_path,
96
+ path_type const& key_path,
97
+ path_type const& log_path,
98
+ std::size_t blockSize,
99
+ float loadFactor,
100
+ std::uint64_t itemCount,
101
+ std::size_t bufferSize,
102
+ error_code& ec,
103
+ Progress&& progress,
104
+ Args&&... args);
105
+
106
+ } // nudb
107
+
108
+ #include <nudb/impl/rekey.ipp>
109
+
110
+ #endif
@@ -0,0 +1,27 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_STORE_HPP
9
+ #define NUDB_STORE_HPP
10
+
11
+ #include <nudb/basic_store.hpp>
12
+ #include <nudb/native_file.hpp>
13
+ #include <nudb/xxhasher.hpp>
14
+
15
+ namespace nudb {
16
+
17
+ /** A key/value database.
18
+
19
+ The @b Hasher used is is @ref xxhasher, which works very
20
+ well for almost all cases. The @b File is @ref native_file which
21
+ works on Windows and POSIX platforms.
22
+ */
23
+ using store = basic_store<xxhasher, native_file>;
24
+
25
+ } // nudb
26
+
27
+ #endif
@@ -0,0 +1,63 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_TYPE_TRAITS_HPP
9
+ #define NUDB_TYPE_TRAITS_HPP
10
+
11
+ #include <cstddef>
12
+ #include <cstdint>
13
+
14
+ namespace nudb {
15
+
16
+ #if ! GENERATING_DOCS
17
+
18
+ namespace detail {
19
+
20
+ // Holds a full digest
21
+ using nhash_t = std::uint64_t;
22
+
23
+ } // detail
24
+
25
+ /** Holds a bucket index or bucket count.
26
+
27
+ The maximum number of buckets in a key file is 2^32-1.
28
+ */
29
+ //using nbuck_t = std::uint32_t;
30
+ using nbuck_t = std::size_t;
31
+
32
+ /** Holds a key index or count in bucket.
33
+
34
+ A bucket is limited to 2^16-1 items. The practical
35
+ limit is lower, since a bucket cannot be larger than
36
+ the block size.
37
+ */
38
+ //using nkey_t = std::uint16_t;
39
+ using nkey_t = std::size_t;
40
+
41
+ /** Holds a file size or offset.
42
+
43
+ Operating system support for large files is required.
44
+ Practically, data files cannot exceed 2^48 since offsets
45
+ are stored as 48 bit unsigned values.
46
+ */
47
+ using noff_t = std::uint64_t;
48
+
49
+ /** Holds a block, key, or value size.
50
+
51
+ Block size is limited to 2^16
52
+
53
+ Key file blocks are limited to the block size.
54
+
55
+ Value sizes are limited to 2^31-1.
56
+ */
57
+ using nsize_t = std::size_t;
58
+
59
+ #endif
60
+
61
+ } // nudb
62
+
63
+ #endif
@@ -0,0 +1,200 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_VERIFY_HPP
9
+ #define NUDB_VERIFY_HPP
10
+
11
+ #include <nudb/file.hpp>
12
+ #include <nudb/type_traits.hpp>
13
+ #include <nudb/detail/bucket.hpp>
14
+ #include <nudb/detail/bulkio.hpp>
15
+ #include <nudb/detail/format.hpp>
16
+ #include <algorithm>
17
+ #include <cstddef>
18
+ #include <cstdint>
19
+ #include <string>
20
+
21
+ namespace nudb {
22
+
23
+ /// Describes database statistics calculated by @ref verify.
24
+ struct verify_info
25
+ {
26
+ /** Indicates the verify algorithm used.
27
+
28
+ @li @b 0 Normal algorithm
29
+ @li @b 1 Fast algorith
30
+ */
31
+ int algorithm; // 0 = normal, 1 = fast
32
+
33
+ /// The path to the data file
34
+ path_type dat_path;
35
+
36
+ /// The path to the key file
37
+ path_type key_path;
38
+
39
+ /// The API version used to create the database
40
+ std::size_t version = 0;
41
+
42
+ /// The unique identifier
43
+ std::uint64_t uid = 0;
44
+
45
+ /// The application-defined constant
46
+ std::uint64_t appnum = 0;
47
+
48
+ /// The size of each key, in bytes
49
+ nsize_t key_size = 0;
50
+
51
+ /// The salt used in the key file
52
+ std::uint64_t salt = 0;
53
+
54
+ /// The salt fingerprint
55
+ std::uint64_t pepper = 0;
56
+
57
+ /// The block size used in the key file
58
+ nsize_t block_size = 0;
59
+
60
+ /// The target load factor used in the key file
61
+ float load_factor = 0;
62
+
63
+ /// The maximum number of keys each bucket can hold
64
+ nkey_t capacity = 0;
65
+
66
+ /// The number of buckets in the key file
67
+ nbuck_t buckets = 0;
68
+
69
+ /// The size of a bucket in bytes
70
+ nsize_t bucket_size = 0;
71
+
72
+ /// The size of the key file
73
+ noff_t key_file_size = 0;
74
+
75
+ /// The size of the data file
76
+ noff_t dat_file_size = 0;
77
+
78
+ /// The number of keys found
79
+ std::uint64_t key_count = 0;
80
+
81
+ /// The number of values found
82
+ std::uint64_t value_count = 0;
83
+
84
+ /// The total number of bytes occupied by values
85
+ std::uint64_t value_bytes = 0;
86
+
87
+ /// The number of spill records in use
88
+ std::uint64_t spill_count = 0;
89
+
90
+ /// The total number of spill records
91
+ std::uint64_t spill_count_tot = 0;
92
+
93
+ /// The number of bytes occupied by spill records in use
94
+ std::uint64_t spill_bytes = 0;
95
+
96
+ /// The number of bytes occupied by all spill records
97
+ std::uint64_t spill_bytes_tot = 0;
98
+
99
+ /// Average number of key file reads per fetch
100
+ float avg_fetch = 0;
101
+
102
+ /// The fraction of the data file that is wasted
103
+ float waste = 0;
104
+
105
+ /// The data amplification ratio
106
+ float overhead = 0;
107
+
108
+ /// The measured bucket load fraction
109
+ float actual_load = 0;
110
+
111
+ /// A histogram of the number of buckets having N spill records
112
+ std::array<nbuck_t, 10> hist;
113
+
114
+ /// Default constructor
115
+ verify_info()
116
+ {
117
+ hist.fill(0);
118
+ }
119
+ };
120
+
121
+ /** Verify consistency of the key and data files.
122
+
123
+ This function opens the key and data files, and
124
+ performs the following checks on the contents:
125
+
126
+ @li Data file header validity
127
+
128
+ @li Key file header validity
129
+
130
+ @li Data and key file header agreements
131
+
132
+ @li Check that each value is contained in a bucket
133
+
134
+ @li Check that each bucket item reflects a value
135
+
136
+ @li Ensure no values with duplicate keys
137
+
138
+ Undefined behavior results when verifying a database
139
+ that still has a log file. Use @ref recover on such
140
+ databases first.
141
+
142
+ This function selects one of two algorithms to use, the
143
+ normal version, and a faster version that can take advantage
144
+ of a buffer of sufficient size. Depending on the value of
145
+ the bufferSize argument, the appropriate algorithm is chosen.
146
+
147
+ A good value of bufferSize is one that is a large fraction
148
+ of the key file size. For example, 20% of the size of the
149
+ key file. Larger is better, with the highest usable value
150
+ depending on the size of the key file. If presented with
151
+ a buffer size that is too large to be of extra use, the
152
+ fast algorithm will simply allocate what it needs.
153
+
154
+ @par Template Parameters
155
+
156
+ @tparam Hasher The hash function to use. This type must
157
+ meet the requirements of @b HashFunction. The hash function
158
+ must be the same as that used to create the database, or
159
+ else an error is returned.
160
+
161
+ @param info A structure which will be default constructed
162
+ inside this function, and filled in if the operation completes
163
+ successfully. If an error is indicated, the contents of this
164
+ variable are undefined.
165
+
166
+ @param dat_path The path to the data file.
167
+
168
+ @param key_path The path to the key file.
169
+
170
+ @param bufferSize The number of bytes to allocate for the buffer.
171
+ If this number is too small, or zero, a slower algorithm will be
172
+ used that does not require a buffer.
173
+
174
+ @param progress A function which will be called periodically
175
+ as the algorithm proceeds. The equivalent signature of the
176
+ progress function must be:
177
+ @code
178
+ void progress(
179
+ std::uint64_t amount, // Amount of work done so far
180
+ std::uint64_t total // Total amount of work to do
181
+ );
182
+ @endcode
183
+
184
+ @param ec Set to the error, if any occurred.
185
+ */
186
+ template<class Hasher, class Progress>
187
+ void
188
+ verify(
189
+ verify_info& info,
190
+ path_type const& dat_path,
191
+ path_type const& key_path,
192
+ std::size_t bufferSize,
193
+ Progress&& progress,
194
+ error_code& ec);
195
+
196
+ } // nudb
197
+
198
+ #include <nudb/impl/verify.ipp>
199
+
200
+ #endif
@@ -0,0 +1,21 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_VERSION_HPP
9
+ #define NUDB_VERSION_HPP
10
+
11
+ // follows http://semver.org
12
+
13
+ // NUDB_VERSION % 100 is the patch level
14
+ // NUDB_VERSION / 100 % 1000 is the minor version
15
+ // NUDB_VERSION / 100000 is the major version
16
+ //
17
+ #define NUDB_VERSION 200000
18
+
19
+ #define NUDB_VERSION_STRING "2.0.0"
20
+
21
+ #endif
@@ -0,0 +1,63 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_VISIT_HPP
9
+ #define NUDB_VISIT_HPP
10
+
11
+ #include <nudb/error.hpp>
12
+ #include <nudb/file.hpp>
13
+
14
+ namespace nudb {
15
+
16
+ /** Visit each key/data pair in a data file.
17
+
18
+ This function will open and iterate the contents of a
19
+ data file, invoking the callback for each key/value
20
+ pair found. Only a data file is necessary, the key
21
+ file may be omitted.
22
+
23
+ @param path The path to the data file.
24
+
25
+ @param callback A function which will be called with
26
+ each item found in the data file. The equivalent signature
27
+ of the callback must be:
28
+ @code
29
+ void callback(
30
+ void const* key, // A pointer to the item key
31
+ std::size_t key_size, // The size of the key (always the same)
32
+ void const* data, // A pointer to the item data
33
+ std::size_t data_size, // The size of the item data
34
+ error_code& ec // Indicates an error (out parameter)
35
+ );
36
+ @endcode
37
+ If the callback sets ec to an error, the visit is terminated.
38
+
39
+ @param progress A function which will be called periodically
40
+ as the algorithm proceeds. The equivalent signature of the
41
+ progress function must be:
42
+ @code
43
+ void progress(
44
+ std::uint64_t amount, // Amount of work done so far
45
+ std::uint64_t total // Total amount of work to do
46
+ );
47
+ @endcode
48
+
49
+ @param ec Set to the error, if any occurred.
50
+ */
51
+ template<class Callback, class Progress>
52
+ void
53
+ visit(
54
+ path_type const& path,
55
+ Callback&& callback,
56
+ Progress&& progress,
57
+ error_code& ec);
58
+
59
+ } // nudb
60
+
61
+ #include <nudb/impl/visit.ipp>
62
+
63
+ #endif