rrudb 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +1 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +26 -0
  5. data/examples/example.rb +39 -0
  6. data/ext/rudb/NuDB/include/nudb/CMakeLists.txt +104 -0
  7. data/ext/rudb/NuDB/include/nudb/_experimental/basic_seconds_clock.hpp +200 -0
  8. data/ext/rudb/NuDB/include/nudb/_experimental/chrono_util.hpp +58 -0
  9. data/ext/rudb/NuDB/include/nudb/_experimental/test/fail_file.hpp +343 -0
  10. data/ext/rudb/NuDB/include/nudb/_experimental/test/temp_dir.hpp +73 -0
  11. data/ext/rudb/NuDB/include/nudb/_experimental/test/test_store.hpp +451 -0
  12. data/ext/rudb/NuDB/include/nudb/_experimental/test/xor_shift_engine.hpp +105 -0
  13. data/ext/rudb/NuDB/include/nudb/_experimental/util.hpp +288 -0
  14. data/ext/rudb/NuDB/include/nudb/basic_store.hpp +461 -0
  15. data/ext/rudb/NuDB/include/nudb/concepts.hpp +205 -0
  16. data/ext/rudb/NuDB/include/nudb/context.hpp +144 -0
  17. data/ext/rudb/NuDB/include/nudb/create.hpp +117 -0
  18. data/ext/rudb/NuDB/include/nudb/detail/arena.hpp +296 -0
  19. data/ext/rudb/NuDB/include/nudb/detail/bucket.hpp +473 -0
  20. data/ext/rudb/NuDB/include/nudb/detail/buffer.hpp +86 -0
  21. data/ext/rudb/NuDB/include/nudb/detail/bulkio.hpp +196 -0
  22. data/ext/rudb/NuDB/include/nudb/detail/cache.hpp +236 -0
  23. data/ext/rudb/NuDB/include/nudb/detail/endian.hpp +93 -0
  24. data/ext/rudb/NuDB/include/nudb/detail/field.hpp +265 -0
  25. data/ext/rudb/NuDB/include/nudb/detail/format.hpp +630 -0
  26. data/ext/rudb/NuDB/include/nudb/detail/gentex.hpp +259 -0
  27. data/ext/rudb/NuDB/include/nudb/detail/mutex.hpp +26 -0
  28. data/ext/rudb/NuDB/include/nudb/detail/pool.hpp +243 -0
  29. data/ext/rudb/NuDB/include/nudb/detail/store_base.hpp +45 -0
  30. data/ext/rudb/NuDB/include/nudb/detail/stream.hpp +149 -0
  31. data/ext/rudb/NuDB/include/nudb/detail/xxhash.hpp +328 -0
  32. data/ext/rudb/NuDB/include/nudb/error.hpp +257 -0
  33. data/ext/rudb/NuDB/include/nudb/file.hpp +55 -0
  34. data/ext/rudb/NuDB/include/nudb/impl/basic_store.ipp +785 -0
  35. data/ext/rudb/NuDB/include/nudb/impl/context.ipp +241 -0
  36. data/ext/rudb/NuDB/include/nudb/impl/create.ipp +163 -0
  37. data/ext/rudb/NuDB/include/nudb/impl/error.ipp +175 -0
  38. data/ext/rudb/NuDB/include/nudb/impl/posix_file.ipp +248 -0
  39. data/ext/rudb/NuDB/include/nudb/impl/recover.ipp +209 -0
  40. data/ext/rudb/NuDB/include/nudb/impl/rekey.ipp +248 -0
  41. data/ext/rudb/NuDB/include/nudb/impl/verify.ipp +634 -0
  42. data/ext/rudb/NuDB/include/nudb/impl/visit.ipp +96 -0
  43. data/ext/rudb/NuDB/include/nudb/impl/win32_file.ipp +264 -0
  44. data/ext/rudb/NuDB/include/nudb/native_file.hpp +76 -0
  45. data/ext/rudb/NuDB/include/nudb/nudb.hpp +27 -0
  46. data/ext/rudb/NuDB/include/nudb/posix_file.hpp +228 -0
  47. data/ext/rudb/NuDB/include/nudb/progress.hpp +32 -0
  48. data/ext/rudb/NuDB/include/nudb/recover.hpp +73 -0
  49. data/ext/rudb/NuDB/include/nudb/rekey.hpp +110 -0
  50. data/ext/rudb/NuDB/include/nudb/store.hpp +27 -0
  51. data/ext/rudb/NuDB/include/nudb/type_traits.hpp +63 -0
  52. data/ext/rudb/NuDB/include/nudb/verify.hpp +200 -0
  53. data/ext/rudb/NuDB/include/nudb/version.hpp +21 -0
  54. data/ext/rudb/NuDB/include/nudb/visit.hpp +63 -0
  55. data/ext/rudb/NuDB/include/nudb/win32_file.hpp +246 -0
  56. data/ext/rudb/NuDB/include/nudb/xxhasher.hpp +45 -0
  57. data/ext/rudb/extconf.rb +12 -0
  58. data/ext/rudb/rudb.cpp +234 -0
  59. data/lib/rudb/version.rb +3 -0
  60. data/lib/rudb.rb +1 -0
  61. metadata +104 -0
@@ -0,0 +1,73 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_RECOVER_HPP
9
+ #define NUDB_RECOVER_HPP
10
+
11
+ #include <nudb/error.hpp>
12
+ #include <nudb/native_file.hpp>
13
+
14
+ namespace nudb {
15
+
16
+ /** Perform recovery on a database.
17
+
18
+ This implements the recovery algorithm by rolling back
19
+ any partially committed data. If no log file is present,
20
+ the function does nothing.
21
+
22
+ During the commit phase of a NuDB database, a log file
23
+ is generated with information that may be used to roll
24
+ back the results of a partial commit. This function
25
+ checks for the presence of a log file. If present, the
26
+ log file is replayed on the key and data files belonging
27
+ to the database, restoring the database to its state
28
+ before the partial commit. When @ref recover is
29
+ successful, it erases the log file.
30
+
31
+ It is normally not necessary to call this function
32
+ directly, it is called automatically when a database is
33
+ opened in a call to @ref basic_store::open. Callers may
34
+ use this function to implement auxiliary tools for
35
+ manipulating the database.
36
+
37
+ @par Template Parameters
38
+
39
+ @tparam Hasher The hash function to use. This type must
40
+ meet the requirements of @b Hasher. The hash function
41
+ must be the same as that used to create the database, or
42
+ else an error is returned.
43
+
44
+ @tparam File The type of file to use. Use the default of
45
+ @ref native_file unless customizing the file behavior.
46
+
47
+ @param dat_path The path to the data file.
48
+
49
+ @param key_path The path to the key file.
50
+
51
+ @param log_path The path to the log file.
52
+
53
+ @param args Optional parameters passed to File constructors.
54
+
55
+ @param ec Set to the error, if any occurred.
56
+ */
57
+ template<
58
+ class Hasher,
59
+ class File = native_file,
60
+ class... Args>
61
+ void
62
+ recover(
63
+ path_type const& dat_path,
64
+ path_type const& key_path,
65
+ path_type const& log_path,
66
+ error_code& ec,
67
+ Args&&... args);
68
+
69
+ } // nudb
70
+
71
+ #include <nudb/impl/recover.ipp>
72
+
73
+ #endif
@@ -0,0 +1,110 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_REKEY_HPP
9
+ #define NUDB_REKEY_HPP
10
+
11
+ #include <nudb/error.hpp>
12
+ #include <nudb/file.hpp>
13
+ #include <cstddef>
14
+ #include <cstdint>
15
+
16
+ namespace nudb {
17
+
18
+ /** Create a new key file from a data file.
19
+
20
+ This algorithm rebuilds a key file for the given data file.
21
+ It works efficiently by iterating the data file multiple times.
22
+ During the iteration, a contiguous block of the key file is
23
+ rendered in memory, then flushed to disk when the iteration is
24
+ complete. The size of this memory buffer is controlled by the
25
+ `bufferSize` parameter, larger is better. The algorithm works
26
+ the fastest when `bufferSize` is large enough to hold the entire
27
+ key file in memory; only a single iteration of the data file
28
+ is needed in this case.
29
+
30
+ During the rekey, spill records may be appended to the data
31
+ file. If the rekey operation is abnormally terminated, this
32
+ would normally result in a corrupted data file. To prevent this,
33
+ the function creates a log file using the specified path so
34
+ that the database can be fixed in a subsequent call to
35
+ @ref recover.
36
+
37
+ @note If a log file is already present, this function will
38
+ fail with @ref error::log_file_exists.
39
+
40
+ @par Template Parameters
41
+
42
+ @tparam Hasher The hash function to use. This type must
43
+ meet the requirements of @b Hasher. The hash function
44
+ must be the same as that used to create the database, or
45
+ else an error is returned.
46
+
47
+ @tparam File The type of file to use. This type must meet
48
+ the requirements of @b File.
49
+
50
+ @param dat_path The path to the data file.
51
+
52
+ @param key_path The path to the key file.
53
+
54
+ @param log_path The path to the log file.
55
+
56
+ @param blockSize The size of a key file block. Larger
57
+ blocks hold more keys but require more I/O cycles per
58
+ operation. The ideal block size the largest size that
59
+ may be read in a single I/O cycle, and device dependent.
60
+ The return value of @ref block_size returns a suitable
61
+ value for the volume of a given path.
62
+
63
+ @param loadFactor A number between zero and one
64
+ representing the average bucket occupancy (number of
65
+ items). A value of 0.5 is perfect. Lower numbers
66
+ waste space, and higher numbers produce negligible
67
+ savings at the cost of increased I/O cycles.
68
+
69
+ @param itemCount The number of items in the data file.
70
+
71
+ @param bufferSize The number of bytes to allocate for the buffer.
72
+
73
+ @param ec Set to the error if any occurred.
74
+
75
+ @param progress A function which will be called periodically
76
+ as the algorithm proceeds. The equivalent signature of the
77
+ progress function must be:
78
+ @code
79
+ void progress(
80
+ std::uint64_t amount, // Amount of work done so far
81
+ std::uint64_t total // Total amount of work to do
82
+ );
83
+ @endcode
84
+
85
+ @param args Optional arguments passed to @b File constructors.
86
+ */
87
+ template<
88
+ class Hasher,
89
+ class File,
90
+ class Progress,
91
+ class... Args
92
+ >
93
+ void
94
+ rekey(
95
+ path_type const& dat_path,
96
+ path_type const& key_path,
97
+ path_type const& log_path,
98
+ std::size_t blockSize,
99
+ float loadFactor,
100
+ std::uint64_t itemCount,
101
+ std::size_t bufferSize,
102
+ error_code& ec,
103
+ Progress&& progress,
104
+ Args&&... args);
105
+
106
+ } // nudb
107
+
108
+ #include <nudb/impl/rekey.ipp>
109
+
110
+ #endif
@@ -0,0 +1,27 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_STORE_HPP
9
+ #define NUDB_STORE_HPP
10
+
11
+ #include <nudb/basic_store.hpp>
12
+ #include <nudb/native_file.hpp>
13
+ #include <nudb/xxhasher.hpp>
14
+
15
+ namespace nudb {
16
+
17
+ /** A key/value database.
18
+
19
+ The @b Hasher used is is @ref xxhasher, which works very
20
+ well for almost all cases. The @b File is @ref native_file which
21
+ works on Windows and POSIX platforms.
22
+ */
23
+ using store = basic_store<xxhasher, native_file>;
24
+
25
+ } // nudb
26
+
27
+ #endif
@@ -0,0 +1,63 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_TYPE_TRAITS_HPP
9
+ #define NUDB_TYPE_TRAITS_HPP
10
+
11
+ #include <cstddef>
12
+ #include <cstdint>
13
+
14
+ namespace nudb {
15
+
16
+ #if ! GENERATING_DOCS
17
+
18
+ namespace detail {
19
+
20
+ // Holds a full digest
21
+ using nhash_t = std::uint64_t;
22
+
23
+ } // detail
24
+
25
+ /** Holds a bucket index or bucket count.
26
+
27
+ The maximum number of buckets in a key file is 2^32-1.
28
+ */
29
+ //using nbuck_t = std::uint32_t;
30
+ using nbuck_t = std::size_t;
31
+
32
+ /** Holds a key index or count in bucket.
33
+
34
+ A bucket is limited to 2^16-1 items. The practical
35
+ limit is lower, since a bucket cannot be larger than
36
+ the block size.
37
+ */
38
+ //using nkey_t = std::uint16_t;
39
+ using nkey_t = std::size_t;
40
+
41
+ /** Holds a file size or offset.
42
+
43
+ Operating system support for large files is required.
44
+ Practically, data files cannot exceed 2^48 since offsets
45
+ are stored as 48 bit unsigned values.
46
+ */
47
+ using noff_t = std::uint64_t;
48
+
49
+ /** Holds a block, key, or value size.
50
+
51
+ Block size is limited to 2^16
52
+
53
+ Key file blocks are limited to the block size.
54
+
55
+ Value sizes are limited to 2^31-1.
56
+ */
57
+ using nsize_t = std::size_t;
58
+
59
+ #endif
60
+
61
+ } // nudb
62
+
63
+ #endif
@@ -0,0 +1,200 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_VERIFY_HPP
9
+ #define NUDB_VERIFY_HPP
10
+
11
+ #include <nudb/file.hpp>
12
+ #include <nudb/type_traits.hpp>
13
+ #include <nudb/detail/bucket.hpp>
14
+ #include <nudb/detail/bulkio.hpp>
15
+ #include <nudb/detail/format.hpp>
16
+ #include <algorithm>
17
+ #include <cstddef>
18
+ #include <cstdint>
19
+ #include <string>
20
+
21
+ namespace nudb {
22
+
23
+ /// Describes database statistics calculated by @ref verify.
24
+ struct verify_info
25
+ {
26
+ /** Indicates the verify algorithm used.
27
+
28
+ @li @b 0 Normal algorithm
29
+ @li @b 1 Fast algorith
30
+ */
31
+ int algorithm; // 0 = normal, 1 = fast
32
+
33
+ /// The path to the data file
34
+ path_type dat_path;
35
+
36
+ /// The path to the key file
37
+ path_type key_path;
38
+
39
+ /// The API version used to create the database
40
+ std::size_t version = 0;
41
+
42
+ /// The unique identifier
43
+ std::uint64_t uid = 0;
44
+
45
+ /// The application-defined constant
46
+ std::uint64_t appnum = 0;
47
+
48
+ /// The size of each key, in bytes
49
+ nsize_t key_size = 0;
50
+
51
+ /// The salt used in the key file
52
+ std::uint64_t salt = 0;
53
+
54
+ /// The salt fingerprint
55
+ std::uint64_t pepper = 0;
56
+
57
+ /// The block size used in the key file
58
+ nsize_t block_size = 0;
59
+
60
+ /// The target load factor used in the key file
61
+ float load_factor = 0;
62
+
63
+ /// The maximum number of keys each bucket can hold
64
+ nkey_t capacity = 0;
65
+
66
+ /// The number of buckets in the key file
67
+ nbuck_t buckets = 0;
68
+
69
+ /// The size of a bucket in bytes
70
+ nsize_t bucket_size = 0;
71
+
72
+ /// The size of the key file
73
+ noff_t key_file_size = 0;
74
+
75
+ /// The size of the data file
76
+ noff_t dat_file_size = 0;
77
+
78
+ /// The number of keys found
79
+ std::uint64_t key_count = 0;
80
+
81
+ /// The number of values found
82
+ std::uint64_t value_count = 0;
83
+
84
+ /// The total number of bytes occupied by values
85
+ std::uint64_t value_bytes = 0;
86
+
87
+ /// The number of spill records in use
88
+ std::uint64_t spill_count = 0;
89
+
90
+ /// The total number of spill records
91
+ std::uint64_t spill_count_tot = 0;
92
+
93
+ /// The number of bytes occupied by spill records in use
94
+ std::uint64_t spill_bytes = 0;
95
+
96
+ /// The number of bytes occupied by all spill records
97
+ std::uint64_t spill_bytes_tot = 0;
98
+
99
+ /// Average number of key file reads per fetch
100
+ float avg_fetch = 0;
101
+
102
+ /// The fraction of the data file that is wasted
103
+ float waste = 0;
104
+
105
+ /// The data amplification ratio
106
+ float overhead = 0;
107
+
108
+ /// The measured bucket load fraction
109
+ float actual_load = 0;
110
+
111
+ /// A histogram of the number of buckets having N spill records
112
+ std::array<nbuck_t, 10> hist;
113
+
114
+ /// Default constructor
115
+ verify_info()
116
+ {
117
+ hist.fill(0);
118
+ }
119
+ };
120
+
121
+ /** Verify consistency of the key and data files.
122
+
123
+ This function opens the key and data files, and
124
+ performs the following checks on the contents:
125
+
126
+ @li Data file header validity
127
+
128
+ @li Key file header validity
129
+
130
+ @li Data and key file header agreements
131
+
132
+ @li Check that each value is contained in a bucket
133
+
134
+ @li Check that each bucket item reflects a value
135
+
136
+ @li Ensure no values with duplicate keys
137
+
138
+ Undefined behavior results when verifying a database
139
+ that still has a log file. Use @ref recover on such
140
+ databases first.
141
+
142
+ This function selects one of two algorithms to use, the
143
+ normal version, and a faster version that can take advantage
144
+ of a buffer of sufficient size. Depending on the value of
145
+ the bufferSize argument, the appropriate algorithm is chosen.
146
+
147
+ A good value of bufferSize is one that is a large fraction
148
+ of the key file size. For example, 20% of the size of the
149
+ key file. Larger is better, with the highest usable value
150
+ depending on the size of the key file. If presented with
151
+ a buffer size that is too large to be of extra use, the
152
+ fast algorithm will simply allocate what it needs.
153
+
154
+ @par Template Parameters
155
+
156
+ @tparam Hasher The hash function to use. This type must
157
+ meet the requirements of @b HashFunction. The hash function
158
+ must be the same as that used to create the database, or
159
+ else an error is returned.
160
+
161
+ @param info A structure which will be default constructed
162
+ inside this function, and filled in if the operation completes
163
+ successfully. If an error is indicated, the contents of this
164
+ variable are undefined.
165
+
166
+ @param dat_path The path to the data file.
167
+
168
+ @param key_path The path to the key file.
169
+
170
+ @param bufferSize The number of bytes to allocate for the buffer.
171
+ If this number is too small, or zero, a slower algorithm will be
172
+ used that does not require a buffer.
173
+
174
+ @param progress A function which will be called periodically
175
+ as the algorithm proceeds. The equivalent signature of the
176
+ progress function must be:
177
+ @code
178
+ void progress(
179
+ std::uint64_t amount, // Amount of work done so far
180
+ std::uint64_t total // Total amount of work to do
181
+ );
182
+ @endcode
183
+
184
+ @param ec Set to the error, if any occurred.
185
+ */
186
+ template<class Hasher, class Progress>
187
+ void
188
+ verify(
189
+ verify_info& info,
190
+ path_type const& dat_path,
191
+ path_type const& key_path,
192
+ std::size_t bufferSize,
193
+ Progress&& progress,
194
+ error_code& ec);
195
+
196
+ } // nudb
197
+
198
+ #include <nudb/impl/verify.ipp>
199
+
200
+ #endif
@@ -0,0 +1,21 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_VERSION_HPP
9
+ #define NUDB_VERSION_HPP
10
+
11
+ // follows http://semver.org
12
+
13
+ // NUDB_VERSION % 100 is the patch level
14
+ // NUDB_VERSION / 100 % 1000 is the minor version
15
+ // NUDB_VERSION / 100000 is the major version
16
+ //
17
+ #define NUDB_VERSION 200000
18
+
19
+ #define NUDB_VERSION_STRING "2.0.0"
20
+
21
+ #endif
@@ -0,0 +1,63 @@
1
+ //
2
+ // Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
3
+ //
4
+ // Distributed under the Boost Software License, Version 1.0. (See accompanying
5
+ // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6
+ //
7
+
8
+ #ifndef NUDB_VISIT_HPP
9
+ #define NUDB_VISIT_HPP
10
+
11
+ #include <nudb/error.hpp>
12
+ #include <nudb/file.hpp>
13
+
14
+ namespace nudb {
15
+
16
+ /** Visit each key/data pair in a data file.
17
+
18
+ This function will open and iterate the contents of a
19
+ data file, invoking the callback for each key/value
20
+ pair found. Only a data file is necessary, the key
21
+ file may be omitted.
22
+
23
+ @param path The path to the data file.
24
+
25
+ @param callback A function which will be called with
26
+ each item found in the data file. The equivalent signature
27
+ of the callback must be:
28
+ @code
29
+ void callback(
30
+ void const* key, // A pointer to the item key
31
+ std::size_t key_size, // The size of the key (always the same)
32
+ void const* data, // A pointer to the item data
33
+ std::size_t data_size, // The size of the item data
34
+ error_code& ec // Indicates an error (out parameter)
35
+ );
36
+ @endcode
37
+ If the callback sets ec to an error, the visit is terminated.
38
+
39
+ @param progress A function which will be called periodically
40
+ as the algorithm proceeds. The equivalent signature of the
41
+ progress function must be:
42
+ @code
43
+ void progress(
44
+ std::uint64_t amount, // Amount of work done so far
45
+ std::uint64_t total // Total amount of work to do
46
+ );
47
+ @endcode
48
+
49
+ @param ec Set to the error, if any occurred.
50
+ */
51
+ template<class Callback, class Progress>
52
+ void
53
+ visit(
54
+ path_type const& path,
55
+ Callback&& callback,
56
+ Progress&& progress,
57
+ error_code& ec);
58
+
59
+ } // nudb
60
+
61
+ #include <nudb/impl/visit.ipp>
62
+
63
+ #endif