rrudb 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +1 -0
- data/LICENSE.txt +22 -0
- data/README.md +26 -0
- data/examples/example.rb +39 -0
- data/ext/rudb/NuDB/include/nudb/CMakeLists.txt +104 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/basic_seconds_clock.hpp +200 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/chrono_util.hpp +58 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/test/fail_file.hpp +343 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/test/temp_dir.hpp +73 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/test/test_store.hpp +451 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/test/xor_shift_engine.hpp +105 -0
- data/ext/rudb/NuDB/include/nudb/_experimental/util.hpp +288 -0
- data/ext/rudb/NuDB/include/nudb/basic_store.hpp +461 -0
- data/ext/rudb/NuDB/include/nudb/concepts.hpp +205 -0
- data/ext/rudb/NuDB/include/nudb/context.hpp +144 -0
- data/ext/rudb/NuDB/include/nudb/create.hpp +117 -0
- data/ext/rudb/NuDB/include/nudb/detail/arena.hpp +296 -0
- data/ext/rudb/NuDB/include/nudb/detail/bucket.hpp +473 -0
- data/ext/rudb/NuDB/include/nudb/detail/buffer.hpp +86 -0
- data/ext/rudb/NuDB/include/nudb/detail/bulkio.hpp +196 -0
- data/ext/rudb/NuDB/include/nudb/detail/cache.hpp +236 -0
- data/ext/rudb/NuDB/include/nudb/detail/endian.hpp +93 -0
- data/ext/rudb/NuDB/include/nudb/detail/field.hpp +265 -0
- data/ext/rudb/NuDB/include/nudb/detail/format.hpp +630 -0
- data/ext/rudb/NuDB/include/nudb/detail/gentex.hpp +259 -0
- data/ext/rudb/NuDB/include/nudb/detail/mutex.hpp +26 -0
- data/ext/rudb/NuDB/include/nudb/detail/pool.hpp +243 -0
- data/ext/rudb/NuDB/include/nudb/detail/store_base.hpp +45 -0
- data/ext/rudb/NuDB/include/nudb/detail/stream.hpp +149 -0
- data/ext/rudb/NuDB/include/nudb/detail/xxhash.hpp +328 -0
- data/ext/rudb/NuDB/include/nudb/error.hpp +257 -0
- data/ext/rudb/NuDB/include/nudb/file.hpp +55 -0
- data/ext/rudb/NuDB/include/nudb/impl/basic_store.ipp +785 -0
- data/ext/rudb/NuDB/include/nudb/impl/context.ipp +241 -0
- data/ext/rudb/NuDB/include/nudb/impl/create.ipp +163 -0
- data/ext/rudb/NuDB/include/nudb/impl/error.ipp +175 -0
- data/ext/rudb/NuDB/include/nudb/impl/posix_file.ipp +248 -0
- data/ext/rudb/NuDB/include/nudb/impl/recover.ipp +209 -0
- data/ext/rudb/NuDB/include/nudb/impl/rekey.ipp +248 -0
- data/ext/rudb/NuDB/include/nudb/impl/verify.ipp +634 -0
- data/ext/rudb/NuDB/include/nudb/impl/visit.ipp +96 -0
- data/ext/rudb/NuDB/include/nudb/impl/win32_file.ipp +264 -0
- data/ext/rudb/NuDB/include/nudb/native_file.hpp +76 -0
- data/ext/rudb/NuDB/include/nudb/nudb.hpp +27 -0
- data/ext/rudb/NuDB/include/nudb/posix_file.hpp +228 -0
- data/ext/rudb/NuDB/include/nudb/progress.hpp +32 -0
- data/ext/rudb/NuDB/include/nudb/recover.hpp +73 -0
- data/ext/rudb/NuDB/include/nudb/rekey.hpp +110 -0
- data/ext/rudb/NuDB/include/nudb/store.hpp +27 -0
- data/ext/rudb/NuDB/include/nudb/type_traits.hpp +63 -0
- data/ext/rudb/NuDB/include/nudb/verify.hpp +200 -0
- data/ext/rudb/NuDB/include/nudb/version.hpp +21 -0
- data/ext/rudb/NuDB/include/nudb/visit.hpp +63 -0
- data/ext/rudb/NuDB/include/nudb/win32_file.hpp +246 -0
- data/ext/rudb/NuDB/include/nudb/xxhasher.hpp +45 -0
- data/ext/rudb/extconf.rb +12 -0
- data/ext/rudb/rudb.cpp +234 -0
- data/lib/rudb/version.rb +3 -0
- data/lib/rudb.rb +1 -0
- metadata +104 -0
@@ -0,0 +1,634 @@
|
|
1
|
+
//
|
2
|
+
// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com)
|
3
|
+
//
|
4
|
+
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
5
|
+
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
6
|
+
//
|
7
|
+
|
8
|
+
#ifndef NUDB_IMPL_VERIFY_IPP
|
9
|
+
#define NUDB_IMPL_VERIFY_IPP
|
10
|
+
|
11
|
+
#include <nudb/concepts.hpp>
|
12
|
+
#include <nudb/native_file.hpp>
|
13
|
+
#include <nudb/type_traits.hpp>
|
14
|
+
#include <nudb/detail/bucket.hpp>
|
15
|
+
#include <nudb/detail/bulkio.hpp>
|
16
|
+
#include <nudb/detail/format.hpp>
|
17
|
+
#include <boost/core/ignore_unused.hpp>
|
18
|
+
#include <algorithm>
|
19
|
+
#include <cstddef>
|
20
|
+
#include <limits>
|
21
|
+
#include <string>
|
22
|
+
|
23
|
+
namespace nudb {
|
24
|
+
|
25
|
+
namespace detail {
|
26
|
+
|
27
|
+
// Normal verify that does not require a buffer
|
28
|
+
//
|
29
|
+
template<
|
30
|
+
class Hasher,
|
31
|
+
class File,
|
32
|
+
class Progress>
|
33
|
+
void
|
34
|
+
verify_normal(
|
35
|
+
verify_info& info,
|
36
|
+
File& df,
|
37
|
+
File& kf,
|
38
|
+
dat_file_header& dh,
|
39
|
+
key_file_header& kh,
|
40
|
+
Progress&& progress,
|
41
|
+
error_code& ec)
|
42
|
+
{
|
43
|
+
static_assert(is_File<File>::value,
|
44
|
+
"File requirements not met");
|
45
|
+
static_assert(is_Hasher<Hasher>::value,
|
46
|
+
"Hasher requirements not met");
|
47
|
+
static_assert(is_Progress<Progress>::value,
|
48
|
+
"Progress requirements not met");
|
49
|
+
boost::ignore_unused(dh);
|
50
|
+
info.algorithm = 0;
|
51
|
+
auto const readSize = 1024 * kh.block_size;
|
52
|
+
|
53
|
+
// This ratio balances the 2 work phases.
|
54
|
+
// The number is determined empirically.
|
55
|
+
auto const adjust = 1.75;
|
56
|
+
|
57
|
+
// Calculate the work required
|
58
|
+
auto const keys = static_cast<std::uint64_t>(
|
59
|
+
double(kh.load_factor) / 65536.0 * kh.buckets * kh.capacity);
|
60
|
+
std::uint64_t const nwork = static_cast<std::uint64_t>(
|
61
|
+
info.dat_file_size + keys * kh.block_size +
|
62
|
+
adjust * (info.key_file_size + keys * kh.block_size));
|
63
|
+
std::uint64_t work = 0;
|
64
|
+
progress(0, nwork);
|
65
|
+
|
66
|
+
// Iterate Data File
|
67
|
+
// Data Record
|
68
|
+
auto const dh_len =
|
69
|
+
field<uint48_t>::size + // Size
|
70
|
+
kh.key_size; // Key
|
71
|
+
std::uint64_t fetches = 0;
|
72
|
+
buffer buf{kh.block_size + dh_len};
|
73
|
+
bucket b{kh.block_size, buf.get()};
|
74
|
+
std::uint8_t* pd = buf.get() + kh.block_size;
|
75
|
+
{
|
76
|
+
bulk_reader<File> r{df, dat_file_header::size,
|
77
|
+
info.dat_file_size, readSize};
|
78
|
+
while(! r.eof())
|
79
|
+
{
|
80
|
+
auto const offset = r.offset();
|
81
|
+
// Data Record or Spill Record
|
82
|
+
auto is = r.prepare(
|
83
|
+
field<uint48_t>::size, ec); // Size
|
84
|
+
if(ec)
|
85
|
+
return;
|
86
|
+
nsize_t size;
|
87
|
+
read_size48(is, size);
|
88
|
+
if(size > 0)
|
89
|
+
{
|
90
|
+
// Data Record
|
91
|
+
is = r.prepare(
|
92
|
+
kh.key_size + // Key
|
93
|
+
size, ec); // Data
|
94
|
+
if(ec)
|
95
|
+
return;
|
96
|
+
std::uint8_t const* const key =
|
97
|
+
is.data(kh.key_size);
|
98
|
+
std::uint8_t const* const data =
|
99
|
+
is.data(size);
|
100
|
+
(void)data;
|
101
|
+
auto const h = hash<Hasher>(
|
102
|
+
key, kh.key_size, kh.salt);
|
103
|
+
// Check bucket and spills
|
104
|
+
auto const n = bucket_index(
|
105
|
+
h, kh.buckets, kh.modulus);
|
106
|
+
b.read(kf,
|
107
|
+
static_cast<noff_t>(n + 1) * kh.block_size, ec);
|
108
|
+
if(ec)
|
109
|
+
return;
|
110
|
+
work += kh.block_size;
|
111
|
+
++fetches;
|
112
|
+
for(;;)
|
113
|
+
{
|
114
|
+
for(auto i = b.lower_bound(h);
|
115
|
+
i < b.size(); ++i)
|
116
|
+
{
|
117
|
+
auto const item = b[i];
|
118
|
+
if(item.hash != h)
|
119
|
+
break;
|
120
|
+
if(item.offset == offset)
|
121
|
+
goto found;
|
122
|
+
++fetches;
|
123
|
+
}
|
124
|
+
auto const spill = b.spill();
|
125
|
+
if(! spill)
|
126
|
+
{
|
127
|
+
ec = error::orphaned_value;
|
128
|
+
return;
|
129
|
+
}
|
130
|
+
b.read(df, spill, ec);
|
131
|
+
if(ec == error::short_read)
|
132
|
+
{
|
133
|
+
ec = error::short_spill;
|
134
|
+
return;
|
135
|
+
}
|
136
|
+
if(ec)
|
137
|
+
return;
|
138
|
+
++fetches;
|
139
|
+
}
|
140
|
+
found:
|
141
|
+
// Update
|
142
|
+
++info.value_count;
|
143
|
+
info.value_bytes += size;
|
144
|
+
}
|
145
|
+
else
|
146
|
+
{
|
147
|
+
// Spill Record
|
148
|
+
is = r.prepare(
|
149
|
+
field<std::uint16_t>::size, ec);
|
150
|
+
if(ec == error::short_read)
|
151
|
+
{
|
152
|
+
ec = error::short_spill;
|
153
|
+
return;
|
154
|
+
}
|
155
|
+
if(ec)
|
156
|
+
return;
|
157
|
+
read<std::uint16_t>(is, size); // Size
|
158
|
+
if(size != info.bucket_size)
|
159
|
+
{
|
160
|
+
ec = error::invalid_spill_size;
|
161
|
+
return;
|
162
|
+
}
|
163
|
+
if(ec)
|
164
|
+
return;
|
165
|
+
b.read(r, ec); // Bucket
|
166
|
+
if(ec == error::short_read)
|
167
|
+
{
|
168
|
+
ec = error::short_spill;
|
169
|
+
return;
|
170
|
+
}
|
171
|
+
if(ec)
|
172
|
+
return;
|
173
|
+
++info.spill_count_tot;
|
174
|
+
info.spill_bytes_tot +=
|
175
|
+
field<uint48_t>::size + // Zero
|
176
|
+
field<uint16_t>::size + // Size
|
177
|
+
b.actual_size(); // Bucket
|
178
|
+
}
|
179
|
+
progress(work + offset, nwork);
|
180
|
+
}
|
181
|
+
work += info.dat_file_size;
|
182
|
+
}
|
183
|
+
|
184
|
+
// Iterate Key File
|
185
|
+
{
|
186
|
+
for(std::size_t n = 0; n < kh.buckets; ++n)
|
187
|
+
{
|
188
|
+
std::size_t nspill = 0;
|
189
|
+
b.read(kf, static_cast<noff_t>(
|
190
|
+
n + 1) * kh.block_size, ec);
|
191
|
+
if(ec)
|
192
|
+
return;
|
193
|
+
work += static_cast<std::uint64_t>(
|
194
|
+
adjust * kh.block_size);
|
195
|
+
bool spill = false;
|
196
|
+
for(;;)
|
197
|
+
{
|
198
|
+
info.key_count += b.size();
|
199
|
+
for(nkey_t i = 0; i < b.size(); ++i)
|
200
|
+
{
|
201
|
+
auto const e = b[i];
|
202
|
+
df.read(e.offset, pd, dh_len, ec);
|
203
|
+
if(ec == error::short_read)
|
204
|
+
{
|
205
|
+
ec = error::missing_value;
|
206
|
+
return;
|
207
|
+
}
|
208
|
+
if(ec)
|
209
|
+
return;
|
210
|
+
if(! spill)
|
211
|
+
work += static_cast<std::uint64_t>(
|
212
|
+
adjust * kh.block_size);
|
213
|
+
// Data Record
|
214
|
+
istream is{pd, dh_len};
|
215
|
+
std::uint64_t size;
|
216
|
+
// VFALCO This should really be a 32-bit field
|
217
|
+
read<uint48_t>(is, size); // Size
|
218
|
+
void const* key =
|
219
|
+
is.data(kh.key_size); // Key
|
220
|
+
if(size != e.size)
|
221
|
+
{
|
222
|
+
ec = error::size_mismatch;
|
223
|
+
return;
|
224
|
+
}
|
225
|
+
auto const h = hash<Hasher>(key,
|
226
|
+
kh.key_size, kh.salt);
|
227
|
+
if(h != e.hash)
|
228
|
+
{
|
229
|
+
ec = error::hash_mismatch;
|
230
|
+
return;
|
231
|
+
}
|
232
|
+
}
|
233
|
+
if(! b.spill())
|
234
|
+
break;
|
235
|
+
b.read(df, b.spill(), ec);
|
236
|
+
if(ec)
|
237
|
+
return;
|
238
|
+
spill = true;
|
239
|
+
++nspill;
|
240
|
+
++info.spill_count;
|
241
|
+
info.spill_bytes +=
|
242
|
+
field<uint48_t>::size + // Zero
|
243
|
+
field<uint16_t>::size + // Size
|
244
|
+
b.actual_size(); // SpillBucket
|
245
|
+
}
|
246
|
+
if(nspill >= info.hist.size())
|
247
|
+
nspill = info.hist.size() - 1;
|
248
|
+
++info.hist[nspill];
|
249
|
+
progress(work, nwork);
|
250
|
+
}
|
251
|
+
}
|
252
|
+
float sum = 0;
|
253
|
+
for(size_t i = 0; i < info.hist.size(); ++i)
|
254
|
+
sum += info.hist[i] * (i + 1);
|
255
|
+
if(info.value_count)
|
256
|
+
info.avg_fetch =
|
257
|
+
float(fetches) / info.value_count;
|
258
|
+
else
|
259
|
+
info.avg_fetch = 0;
|
260
|
+
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
|
261
|
+
float(info.dat_file_size);
|
262
|
+
if(info.value_count)
|
263
|
+
info.overhead =
|
264
|
+
float(info.key_file_size + info.dat_file_size) /
|
265
|
+
(
|
266
|
+
info.value_bytes +
|
267
|
+
info.key_count *
|
268
|
+
(info.key_size +
|
269
|
+
// Data Record
|
270
|
+
field<uint48_t>::size) // Size
|
271
|
+
) - 1;
|
272
|
+
else
|
273
|
+
info.overhead = 0;
|
274
|
+
info.actual_load = info.key_count / float(
|
275
|
+
info.capacity * info.buckets);
|
276
|
+
}
|
277
|
+
|
278
|
+
// Fast version of verify that uses a buffer
|
279
|
+
//
|
280
|
+
template<class Hasher, class File, class Progress>
|
281
|
+
void
|
282
|
+
verify_fast(
|
283
|
+
verify_info& info,
|
284
|
+
File& df,
|
285
|
+
File& kf,
|
286
|
+
dat_file_header& dh,
|
287
|
+
key_file_header& kh,
|
288
|
+
std::size_t bufferSize,
|
289
|
+
Progress&& progress,
|
290
|
+
error_code& ec)
|
291
|
+
{
|
292
|
+
boost::ignore_unused(dh);
|
293
|
+
|
294
|
+
info.algorithm = 1;
|
295
|
+
auto const readSize = 1024 * kh.block_size;
|
296
|
+
|
297
|
+
// Counts unverified keys per bucket
|
298
|
+
if(kh.buckets > std::numeric_limits<nbuck_t>::max())
|
299
|
+
{
|
300
|
+
ec = error::too_many_buckets;
|
301
|
+
return;
|
302
|
+
}
|
303
|
+
std::unique_ptr<nkey_t[]> nkeys(
|
304
|
+
new nkey_t[kh.buckets]);
|
305
|
+
|
306
|
+
// Verify contiguous sequential sections of the
|
307
|
+
// key file using multiple passes over the data.
|
308
|
+
//
|
309
|
+
if(bufferSize < 2 * kh.block_size + sizeof(nkey_t))
|
310
|
+
throw std::logic_error("invalid buffer size");
|
311
|
+
auto chunkSize = std::min(kh.buckets,
|
312
|
+
(bufferSize - kh.block_size) /
|
313
|
+
(kh.block_size + sizeof(nkey_t)));
|
314
|
+
auto const passes =
|
315
|
+
(kh.buckets + chunkSize - 1) / chunkSize;
|
316
|
+
|
317
|
+
// Calculate the work required
|
318
|
+
std::uint64_t work = 0;
|
319
|
+
std::uint64_t const nwork =
|
320
|
+
passes * info.dat_file_size + info.key_file_size;
|
321
|
+
progress(0, nwork);
|
322
|
+
|
323
|
+
std::uint64_t fetches = 0;
|
324
|
+
buffer buf{(chunkSize + 1) * kh.block_size};
|
325
|
+
bucket tmp{kh.block_size,
|
326
|
+
buf.get() + chunkSize * kh.block_size};
|
327
|
+
for(nsize_t b0 = 0; b0 < kh.buckets; b0 += chunkSize)
|
328
|
+
{
|
329
|
+
// Load key file chunk to buffer
|
330
|
+
auto const b1 = std::min(b0 + chunkSize, kh.buckets);
|
331
|
+
// Buffered range is [b0, b1)
|
332
|
+
auto const bn = b1 - b0;
|
333
|
+
kf.read(
|
334
|
+
static_cast<noff_t>(b0 + 1) * kh.block_size,
|
335
|
+
buf.get(),
|
336
|
+
static_cast<noff_t>(bn * kh.block_size),
|
337
|
+
ec);
|
338
|
+
if(ec)
|
339
|
+
return;
|
340
|
+
work += bn * kh.block_size;
|
341
|
+
progress(work, nwork);
|
342
|
+
// Count keys in buckets, including spills
|
343
|
+
for(nbuck_t i = 0 ; i < bn; ++i)
|
344
|
+
{
|
345
|
+
bucket b{kh.block_size,
|
346
|
+
buf.get() + i * kh.block_size};
|
347
|
+
nkeys[i] = b.size();
|
348
|
+
std::size_t nspill = 0;
|
349
|
+
auto spill = b.spill();
|
350
|
+
while(spill != 0)
|
351
|
+
{
|
352
|
+
tmp.read(df, spill, ec);
|
353
|
+
if(ec == error::short_read)
|
354
|
+
{
|
355
|
+
ec = error::short_spill;
|
356
|
+
return;
|
357
|
+
}
|
358
|
+
if(ec)
|
359
|
+
return;
|
360
|
+
nkeys[i] += tmp.size();
|
361
|
+
spill = tmp.spill();
|
362
|
+
++nspill;
|
363
|
+
++info.spill_count;
|
364
|
+
info.spill_bytes +=
|
365
|
+
field<uint48_t>::size + // Zero
|
366
|
+
field<uint16_t>::size + // Size
|
367
|
+
tmp.actual_size(); // SpillBucket
|
368
|
+
}
|
369
|
+
if(nspill >= info.hist.size())
|
370
|
+
nspill = info.hist.size() - 1;
|
371
|
+
++info.hist[nspill];
|
372
|
+
info.key_count += nkeys[i];
|
373
|
+
}
|
374
|
+
// Iterate Data File
|
375
|
+
bulk_reader<File> r(df, dat_file_header::size,
|
376
|
+
info.dat_file_size, readSize);
|
377
|
+
while(! r.eof())
|
378
|
+
{
|
379
|
+
auto const offset = r.offset();
|
380
|
+
// Data Record or Spill Record
|
381
|
+
auto is = r.prepare(
|
382
|
+
field<uint48_t>::size, ec); // Size
|
383
|
+
if(ec == error::short_read)
|
384
|
+
{
|
385
|
+
ec = error::short_data_record;
|
386
|
+
return;
|
387
|
+
}
|
388
|
+
if(ec)
|
389
|
+
return;
|
390
|
+
nsize_t size;
|
391
|
+
detail::read_size48(is, size);
|
392
|
+
if(size > 0)
|
393
|
+
{
|
394
|
+
// Data Record
|
395
|
+
is = r.prepare(
|
396
|
+
kh.key_size + // Key
|
397
|
+
size, ec); // Data
|
398
|
+
if(ec == error::short_read)
|
399
|
+
{
|
400
|
+
ec = error::short_value;
|
401
|
+
return;
|
402
|
+
}
|
403
|
+
if(ec)
|
404
|
+
return;
|
405
|
+
std::uint8_t const* const key =
|
406
|
+
is.data(kh.key_size);
|
407
|
+
std::uint8_t const* const data =
|
408
|
+
is.data(size);
|
409
|
+
(void)data;
|
410
|
+
auto const h = hash<Hasher>(
|
411
|
+
key, kh.key_size, kh.salt);
|
412
|
+
auto const n = bucket_index(
|
413
|
+
h, kh.buckets, kh.modulus);
|
414
|
+
if(n < b0 || n >= b1)
|
415
|
+
continue;
|
416
|
+
// Check bucket and spills
|
417
|
+
bucket b{kh.block_size, buf.get() +
|
418
|
+
(n - b0) * kh.block_size};
|
419
|
+
++fetches;
|
420
|
+
for(;;)
|
421
|
+
{
|
422
|
+
for(auto i = b.lower_bound(h);
|
423
|
+
i < b.size(); ++i)
|
424
|
+
{
|
425
|
+
auto const item = b[i];
|
426
|
+
if(item.hash != h)
|
427
|
+
break;
|
428
|
+
if(item.offset == offset)
|
429
|
+
goto found;
|
430
|
+
++fetches;
|
431
|
+
}
|
432
|
+
auto const spill = b.spill();
|
433
|
+
if(! spill)
|
434
|
+
{
|
435
|
+
ec = error::orphaned_value;
|
436
|
+
return;
|
437
|
+
}
|
438
|
+
b = tmp;
|
439
|
+
b.read(df, spill, ec);
|
440
|
+
if(ec == error::short_read)
|
441
|
+
{
|
442
|
+
ec = error::short_spill;
|
443
|
+
return;
|
444
|
+
}
|
445
|
+
if(ec)
|
446
|
+
return;
|
447
|
+
++fetches;
|
448
|
+
}
|
449
|
+
found:
|
450
|
+
// Update
|
451
|
+
++info.value_count;
|
452
|
+
info.value_bytes += size;
|
453
|
+
if(nkeys[n - b0]-- == 0)
|
454
|
+
{
|
455
|
+
ec = error::orphaned_value;
|
456
|
+
return;
|
457
|
+
}
|
458
|
+
}
|
459
|
+
else
|
460
|
+
{
|
461
|
+
// Spill Record
|
462
|
+
is = r.prepare(
|
463
|
+
field<std::uint16_t>::size, ec);
|
464
|
+
if(ec == error::short_read)
|
465
|
+
{
|
466
|
+
ec = error::short_spill;
|
467
|
+
return;
|
468
|
+
}
|
469
|
+
if(ec)
|
470
|
+
return;
|
471
|
+
read<std::uint16_t>(is, size); // Size
|
472
|
+
if(bucket_size(
|
473
|
+
bucket_capacity(size)) != size)
|
474
|
+
{
|
475
|
+
ec = error::invalid_spill_size;
|
476
|
+
return;
|
477
|
+
}
|
478
|
+
r.prepare(size, ec); // Bucket
|
479
|
+
if(ec == error::short_read)
|
480
|
+
{
|
481
|
+
ec = error::short_spill;
|
482
|
+
return;
|
483
|
+
}
|
484
|
+
if(ec)
|
485
|
+
return;
|
486
|
+
if(b0 == 0)
|
487
|
+
{
|
488
|
+
++info.spill_count_tot;
|
489
|
+
info.spill_bytes_tot +=
|
490
|
+
field<uint48_t>::size + // Zero
|
491
|
+
field<uint16_t>::size + // Size
|
492
|
+
tmp.actual_size(); // Bucket
|
493
|
+
}
|
494
|
+
}
|
495
|
+
progress(work + offset, nwork);
|
496
|
+
}
|
497
|
+
// Make sure every key in every bucket was visited
|
498
|
+
for(std::size_t i = 0; i < bn; ++i)
|
499
|
+
{
|
500
|
+
if(nkeys[i] != 0)
|
501
|
+
{
|
502
|
+
ec = error::missing_value;
|
503
|
+
return;
|
504
|
+
}
|
505
|
+
}
|
506
|
+
work += info.dat_file_size;
|
507
|
+
}
|
508
|
+
|
509
|
+
float sum = 0;
|
510
|
+
for(std::size_t i = 0; i < info.hist.size(); ++i)
|
511
|
+
sum += info.hist[i] * (i + 1);
|
512
|
+
if(info.value_count)
|
513
|
+
info.avg_fetch =
|
514
|
+
float(fetches) / info.value_count;
|
515
|
+
else
|
516
|
+
info.avg_fetch = 0;
|
517
|
+
info.waste = (info.spill_bytes_tot - info.spill_bytes) /
|
518
|
+
float(info.dat_file_size);
|
519
|
+
if(info.value_count)
|
520
|
+
info.overhead =
|
521
|
+
float(info.key_file_size + info.dat_file_size) /
|
522
|
+
(
|
523
|
+
info.value_bytes +
|
524
|
+
info.key_count *
|
525
|
+
(info.key_size +
|
526
|
+
// Data Record
|
527
|
+
field<uint48_t>::size) // Size
|
528
|
+
) - 1;
|
529
|
+
else
|
530
|
+
info.overhead = 0;
|
531
|
+
info.actual_load = info.key_count / float(
|
532
|
+
info.capacity * info.buckets);
|
533
|
+
}
|
534
|
+
|
535
|
+
} // detail
|
536
|
+
|
537
|
+
template<class Hasher, class Progress>
|
538
|
+
void
|
539
|
+
verify(
|
540
|
+
verify_info& info,
|
541
|
+
path_type const& dat_path,
|
542
|
+
path_type const& key_path,
|
543
|
+
std::size_t bufferSize,
|
544
|
+
Progress&& progress,
|
545
|
+
error_code& ec)
|
546
|
+
{
|
547
|
+
static_assert(is_Hasher<Hasher>::value,
|
548
|
+
"Hasher requirements not met");
|
549
|
+
static_assert(is_Progress<Progress>::value,
|
550
|
+
"Progress requirements not met");
|
551
|
+
info = {};
|
552
|
+
using namespace detail;
|
553
|
+
using File = native_file;
|
554
|
+
File df;
|
555
|
+
df.open(file_mode::scan, dat_path, ec);
|
556
|
+
if(ec)
|
557
|
+
return;
|
558
|
+
File kf;
|
559
|
+
kf.open (file_mode::read, key_path, ec);
|
560
|
+
if(ec)
|
561
|
+
return;
|
562
|
+
dat_file_header dh;
|
563
|
+
read(df, dh, ec);
|
564
|
+
if(ec)
|
565
|
+
return;
|
566
|
+
verify(dh, ec);
|
567
|
+
if(ec)
|
568
|
+
return;
|
569
|
+
key_file_header kh;
|
570
|
+
read(kf, kh, ec);
|
571
|
+
if(ec)
|
572
|
+
return;
|
573
|
+
verify<Hasher>(kh, ec);
|
574
|
+
if(ec)
|
575
|
+
return;
|
576
|
+
verify<Hasher>(dh, kh, ec);
|
577
|
+
if(ec)
|
578
|
+
return;
|
579
|
+
info.dat_path = dat_path;
|
580
|
+
info.key_path = key_path;
|
581
|
+
info.version = dh.version;
|
582
|
+
info.uid = dh.uid;
|
583
|
+
info.appnum = dh.appnum;
|
584
|
+
info.key_size = dh.key_size;
|
585
|
+
info.salt = kh.salt;
|
586
|
+
info.pepper = kh.pepper;
|
587
|
+
info.block_size = kh.block_size;
|
588
|
+
info.load_factor = kh.load_factor / 65536.f;
|
589
|
+
info.capacity = kh.capacity;
|
590
|
+
info.buckets = kh.buckets;
|
591
|
+
info.bucket_size = bucket_size(kh.capacity);
|
592
|
+
info.key_file_size = kf.size(ec);
|
593
|
+
if(ec)
|
594
|
+
return;
|
595
|
+
info.dat_file_size = df.size(ec);
|
596
|
+
if(ec)
|
597
|
+
return;
|
598
|
+
|
599
|
+
// Determine which algorithm requires the least amount
|
600
|
+
// of file I/O given the available buffer size
|
601
|
+
std::size_t chunkSize;
|
602
|
+
if(bufferSize >= 2 * kh.block_size + sizeof(nkey_t))
|
603
|
+
chunkSize = std::min(kh.buckets,
|
604
|
+
(bufferSize - kh.block_size) /
|
605
|
+
(kh.block_size + sizeof(nkey_t)));
|
606
|
+
else
|
607
|
+
chunkSize = 0;
|
608
|
+
std::size_t passes;
|
609
|
+
if(chunkSize > 0)
|
610
|
+
passes = (kh.buckets + chunkSize - 1) / chunkSize;
|
611
|
+
else
|
612
|
+
passes = 0;
|
613
|
+
if(! chunkSize ||
|
614
|
+
((
|
615
|
+
info.dat_file_size +
|
616
|
+
(kh.buckets * kh.load_factor * kh.capacity * kh.block_size) +
|
617
|
+
info.key_file_size
|
618
|
+
) < (
|
619
|
+
passes * info.dat_file_size + info.key_file_size
|
620
|
+
)))
|
621
|
+
{
|
622
|
+
detail::verify_normal<Hasher>(info,
|
623
|
+
df, kf, dh, kh, progress, ec);
|
624
|
+
}
|
625
|
+
else
|
626
|
+
{
|
627
|
+
detail::verify_fast<Hasher>(info,
|
628
|
+
df, kf, dh, kh, bufferSize, progress, ec);
|
629
|
+
}
|
630
|
+
}
|
631
|
+
|
632
|
+
} // nudb
|
633
|
+
|
634
|
+
#endif
|