leveldb-ruby 0.7 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/leveldb/Makefile +70 -29
- data/leveldb/build_detect_platform +74 -0
- data/leveldb/db/builder.cc +2 -4
- data/leveldb/db/builder.h +4 -6
- data/leveldb/db/c.cc +471 -0
- data/leveldb/db/corruption_test.cc +21 -16
- data/leveldb/db/db_bench.cc +400 -200
- data/leveldb/db/db_impl.cc +276 -131
- data/leveldb/db/db_impl.h +22 -10
- data/leveldb/db/db_iter.cc +2 -1
- data/leveldb/db/db_test.cc +391 -43
- data/leveldb/db/dbformat.cc +31 -0
- data/leveldb/db/dbformat.h +51 -1
- data/leveldb/db/filename.h +1 -1
- data/leveldb/db/log_format.h +1 -1
- data/leveldb/db/log_reader.cc +16 -11
- data/leveldb/db/memtable.cc +37 -0
- data/leveldb/db/memtable.h +6 -0
- data/leveldb/db/repair.cc +17 -14
- data/leveldb/db/skiplist_test.cc +2 -2
- data/leveldb/db/version_edit.cc +7 -9
- data/leveldb/db/version_edit.h +2 -1
- data/leveldb/db/version_set.cc +416 -104
- data/leveldb/db/version_set.h +78 -14
- data/leveldb/db/version_set_test.cc +179 -0
- data/leveldb/db/write_batch_internal.h +2 -0
- data/leveldb/include/leveldb/c.h +246 -0
- data/leveldb/include/leveldb/db.h +14 -2
- data/leveldb/include/leveldb/env.h +31 -10
- data/leveldb/include/leveldb/options.h +7 -18
- data/leveldb/include/leveldb/slice.h +2 -2
- data/leveldb/include/leveldb/status.h +1 -1
- data/leveldb/port/atomic_pointer.h +144 -0
- data/leveldb/port/port.h +0 -2
- data/leveldb/port/port_android.h +7 -1
- data/leveldb/port/port_example.h +11 -1
- data/leveldb/port/port_posix.h +56 -38
- data/leveldb/table/format.cc +12 -8
- data/leveldb/table/table_test.cc +16 -7
- data/leveldb/util/cache.cc +173 -100
- data/leveldb/util/cache_test.cc +28 -11
- data/leveldb/util/coding.h +4 -4
- data/leveldb/util/comparator.cc +1 -0
- data/leveldb/util/env.cc +10 -5
- data/leveldb/util/env_posix.cc +48 -87
- data/leveldb/util/histogram.cc +11 -0
- data/leveldb/util/histogram.h +1 -0
- data/leveldb/util/posix_logger.h +98 -0
- data/leveldb/util/testharness.cc +12 -0
- data/leveldb/util/testharness.h +10 -1
- data/lib/leveldb.rb +11 -3
- metadata +41 -22
data/leveldb/db/version_set.cc
CHANGED
@@ -41,6 +41,14 @@ static uint64_t MaxFileSizeForLevel(int level) {
|
|
41
41
|
return kTargetFileSize; // We could vary per level to reduce number of files?
|
42
42
|
}
|
43
43
|
|
44
|
+
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
45
|
+
int64_t sum = 0;
|
46
|
+
for (size_t i = 0; i < files.size(); i++) {
|
47
|
+
sum += files[i]->file_size;
|
48
|
+
}
|
49
|
+
return sum;
|
50
|
+
}
|
51
|
+
|
44
52
|
namespace {
|
45
53
|
std::string IntSetToString(const std::set<uint64_t>& s) {
|
46
54
|
std::string result = "{";
|
@@ -75,6 +83,78 @@ Version::~Version() {
|
|
75
83
|
}
|
76
84
|
}
|
77
85
|
|
86
|
+
int FindFile(const InternalKeyComparator& icmp,
|
87
|
+
const std::vector<FileMetaData*>& files,
|
88
|
+
const Slice& key) {
|
89
|
+
uint32_t left = 0;
|
90
|
+
uint32_t right = files.size();
|
91
|
+
while (left < right) {
|
92
|
+
uint32_t mid = (left + right) / 2;
|
93
|
+
const FileMetaData* f = files[mid];
|
94
|
+
if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) {
|
95
|
+
// Key at "mid.largest" is < "target". Therefore all
|
96
|
+
// files at or before "mid" are uninteresting.
|
97
|
+
left = mid + 1;
|
98
|
+
} else {
|
99
|
+
// Key at "mid.largest" is >= "target". Therefore all files
|
100
|
+
// after "mid" are uninteresting.
|
101
|
+
right = mid;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
return right;
|
105
|
+
}
|
106
|
+
|
107
|
+
static bool AfterFile(const Comparator* ucmp,
|
108
|
+
const Slice* user_key, const FileMetaData* f) {
|
109
|
+
// NULL user_key occurs before all keys and is therefore never after *f
|
110
|
+
return (user_key != NULL &&
|
111
|
+
ucmp->Compare(*user_key, f->largest.user_key()) > 0);
|
112
|
+
}
|
113
|
+
|
114
|
+
static bool BeforeFile(const Comparator* ucmp,
|
115
|
+
const Slice* user_key, const FileMetaData* f) {
|
116
|
+
// NULL user_key occurs after all keys and is therefore never before *f
|
117
|
+
return (user_key != NULL &&
|
118
|
+
ucmp->Compare(*user_key, f->smallest.user_key()) < 0);
|
119
|
+
}
|
120
|
+
|
121
|
+
bool SomeFileOverlapsRange(
|
122
|
+
const InternalKeyComparator& icmp,
|
123
|
+
bool disjoint_sorted_files,
|
124
|
+
const std::vector<FileMetaData*>& files,
|
125
|
+
const Slice* smallest_user_key,
|
126
|
+
const Slice* largest_user_key) {
|
127
|
+
const Comparator* ucmp = icmp.user_comparator();
|
128
|
+
if (!disjoint_sorted_files) {
|
129
|
+
// Need to check against all files
|
130
|
+
for (int i = 0; i < files.size(); i++) {
|
131
|
+
const FileMetaData* f = files[i];
|
132
|
+
if (AfterFile(ucmp, smallest_user_key, f) ||
|
133
|
+
BeforeFile(ucmp, largest_user_key, f)) {
|
134
|
+
// No overlap
|
135
|
+
} else {
|
136
|
+
return true; // Overlap
|
137
|
+
}
|
138
|
+
}
|
139
|
+
return false;
|
140
|
+
}
|
141
|
+
|
142
|
+
// Binary search over file list
|
143
|
+
uint32_t index = 0;
|
144
|
+
if (smallest_user_key != NULL) {
|
145
|
+
// Find the earliest possible internal key for smallest_user_key
|
146
|
+
InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek);
|
147
|
+
index = FindFile(icmp, files, small.Encode());
|
148
|
+
}
|
149
|
+
|
150
|
+
if (index >= files.size()) {
|
151
|
+
// beginning of range is after all files, so no overlap.
|
152
|
+
return false;
|
153
|
+
}
|
154
|
+
|
155
|
+
return !BeforeFile(ucmp, largest_user_key, files[index]);
|
156
|
+
}
|
157
|
+
|
78
158
|
// An internal iterator. For a given version/level pair, yields
|
79
159
|
// information about the files in the level. For a given entry, key()
|
80
160
|
// is the largest key that occurs in the file, and value() is an
|
@@ -92,22 +172,7 @@ class Version::LevelFileNumIterator : public Iterator {
|
|
92
172
|
return index_ < flist_->size();
|
93
173
|
}
|
94
174
|
virtual void Seek(const Slice& target) {
|
95
|
-
|
96
|
-
uint32_t right = flist_->size() - 1;
|
97
|
-
while (left < right) {
|
98
|
-
uint32_t mid = (left + right) / 2;
|
99
|
-
int cmp = icmp_.Compare((*flist_)[mid]->largest.Encode(), target);
|
100
|
-
if (cmp < 0) {
|
101
|
-
// Key at "mid.largest" is < than "target". Therefore all
|
102
|
-
// files at or before "mid" are uninteresting.
|
103
|
-
left = mid + 1;
|
104
|
-
} else {
|
105
|
-
// Key at "mid.largest" is >= "target". Therefore all files
|
106
|
-
// after "mid" are uninteresting.
|
107
|
-
right = mid;
|
108
|
-
}
|
109
|
-
}
|
110
|
-
index_ = left;
|
175
|
+
index_ = FindFile(icmp_, *flist_, target);
|
111
176
|
}
|
112
177
|
virtual void SeekToFirst() { index_ = 0; }
|
113
178
|
virtual void SeekToLast() {
|
@@ -185,6 +250,146 @@ void Version::AddIterators(const ReadOptions& options,
|
|
185
250
|
}
|
186
251
|
}
|
187
252
|
|
253
|
+
// If "*iter" points at a value or deletion for user_key, store
|
254
|
+
// either the value, or a NotFound error and return true.
|
255
|
+
// Else return false.
|
256
|
+
static bool GetValue(Iterator* iter, const Slice& user_key,
|
257
|
+
std::string* value,
|
258
|
+
Status* s) {
|
259
|
+
if (!iter->Valid()) {
|
260
|
+
return false;
|
261
|
+
}
|
262
|
+
ParsedInternalKey parsed_key;
|
263
|
+
if (!ParseInternalKey(iter->key(), &parsed_key)) {
|
264
|
+
*s = Status::Corruption("corrupted key for ", user_key);
|
265
|
+
return true;
|
266
|
+
}
|
267
|
+
if (parsed_key.user_key != user_key) {
|
268
|
+
return false;
|
269
|
+
}
|
270
|
+
switch (parsed_key.type) {
|
271
|
+
case kTypeDeletion:
|
272
|
+
*s = Status::NotFound(Slice()); // Use an empty error message for speed
|
273
|
+
break;
|
274
|
+
case kTypeValue: {
|
275
|
+
Slice v = iter->value();
|
276
|
+
value->assign(v.data(), v.size());
|
277
|
+
break;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
return true;
|
281
|
+
}
|
282
|
+
|
283
|
+
static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
284
|
+
return a->number > b->number;
|
285
|
+
}
|
286
|
+
|
287
|
+
Status Version::Get(const ReadOptions& options,
|
288
|
+
const LookupKey& k,
|
289
|
+
std::string* value,
|
290
|
+
GetStats* stats) {
|
291
|
+
Slice ikey = k.internal_key();
|
292
|
+
Slice user_key = k.user_key();
|
293
|
+
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
294
|
+
Status s;
|
295
|
+
|
296
|
+
stats->seek_file = NULL;
|
297
|
+
stats->seek_file_level = -1;
|
298
|
+
FileMetaData* last_file_read = NULL;
|
299
|
+
int last_file_read_level = -1;
|
300
|
+
|
301
|
+
// We can search level-by-level since entries never hop across
|
302
|
+
// levels. Therefore we are guaranteed that if we find data
|
303
|
+
// in an smaller level, later levels are irrelevant.
|
304
|
+
std::vector<FileMetaData*> tmp;
|
305
|
+
FileMetaData* tmp2;
|
306
|
+
for (int level = 0; level < config::kNumLevels; level++) {
|
307
|
+
size_t num_files = files_[level].size();
|
308
|
+
if (num_files == 0) continue;
|
309
|
+
|
310
|
+
// Get the list of files to search in this level
|
311
|
+
FileMetaData* const* files = &files_[level][0];
|
312
|
+
if (level == 0) {
|
313
|
+
// Level-0 files may overlap each other. Find all files that
|
314
|
+
// overlap user_key and process them in order from newest to oldest.
|
315
|
+
tmp.reserve(num_files);
|
316
|
+
for (uint32_t i = 0; i < num_files; i++) {
|
317
|
+
FileMetaData* f = files[i];
|
318
|
+
if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
|
319
|
+
ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
320
|
+
tmp.push_back(f);
|
321
|
+
}
|
322
|
+
}
|
323
|
+
if (tmp.empty()) continue;
|
324
|
+
|
325
|
+
std::sort(tmp.begin(), tmp.end(), NewestFirst);
|
326
|
+
files = &tmp[0];
|
327
|
+
num_files = tmp.size();
|
328
|
+
} else {
|
329
|
+
// Binary search to find earliest index whose largest key >= ikey.
|
330
|
+
uint32_t index = FindFile(vset_->icmp_, files_[level], ikey);
|
331
|
+
if (index >= num_files) {
|
332
|
+
files = NULL;
|
333
|
+
num_files = 0;
|
334
|
+
} else {
|
335
|
+
tmp2 = files[index];
|
336
|
+
if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) {
|
337
|
+
// All of "tmp2" is past any data for user_key
|
338
|
+
files = NULL;
|
339
|
+
num_files = 0;
|
340
|
+
} else {
|
341
|
+
files = &tmp2;
|
342
|
+
num_files = 1;
|
343
|
+
}
|
344
|
+
}
|
345
|
+
}
|
346
|
+
|
347
|
+
for (uint32_t i = 0; i < num_files; ++i) {
|
348
|
+
if (last_file_read != NULL && stats->seek_file == NULL) {
|
349
|
+
// We have had more than one seek for this read. Charge the 1st file.
|
350
|
+
stats->seek_file = last_file_read;
|
351
|
+
stats->seek_file_level = last_file_read_level;
|
352
|
+
}
|
353
|
+
|
354
|
+
FileMetaData* f = files[i];
|
355
|
+
last_file_read = f;
|
356
|
+
last_file_read_level = level;
|
357
|
+
|
358
|
+
Iterator* iter = vset_->table_cache_->NewIterator(
|
359
|
+
options,
|
360
|
+
f->number,
|
361
|
+
f->file_size);
|
362
|
+
iter->Seek(ikey);
|
363
|
+
const bool done = GetValue(iter, user_key, value, &s);
|
364
|
+
if (!iter->status().ok()) {
|
365
|
+
s = iter->status();
|
366
|
+
delete iter;
|
367
|
+
return s;
|
368
|
+
} else {
|
369
|
+
delete iter;
|
370
|
+
if (done) {
|
371
|
+
return s;
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
return Status::NotFound(Slice()); // Use an empty error message for speed
|
378
|
+
}
|
379
|
+
|
380
|
+
bool Version::UpdateStats(const GetStats& stats) {
|
381
|
+
FileMetaData* f = stats.seek_file;
|
382
|
+
if (f != NULL) {
|
383
|
+
f->allowed_seeks--;
|
384
|
+
if (f->allowed_seeks <= 0 && file_to_compact_ == NULL) {
|
385
|
+
file_to_compact_ = f;
|
386
|
+
file_to_compact_level_ = stats.seek_file_level;
|
387
|
+
return true;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
return false;
|
391
|
+
}
|
392
|
+
|
188
393
|
void Version::Ref() {
|
189
394
|
++refs_;
|
190
395
|
}
|
@@ -198,26 +403,89 @@ void Version::Unref() {
|
|
198
403
|
}
|
199
404
|
}
|
200
405
|
|
406
|
+
bool Version::OverlapInLevel(int level,
|
407
|
+
const Slice* smallest_user_key,
|
408
|
+
const Slice* largest_user_key) {
|
409
|
+
return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level],
|
410
|
+
smallest_user_key, largest_user_key);
|
411
|
+
}
|
412
|
+
|
413
|
+
int Version::PickLevelForMemTableOutput(
|
414
|
+
const Slice& smallest_user_key,
|
415
|
+
const Slice& largest_user_key) {
|
416
|
+
int level = 0;
|
417
|
+
if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) {
|
418
|
+
// Push to next level if there is no overlap in next level,
|
419
|
+
// and the #bytes overlapping in the level after that are limited.
|
420
|
+
InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
|
421
|
+
InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
|
422
|
+
std::vector<FileMetaData*> overlaps;
|
423
|
+
while (level < config::kMaxMemCompactLevel) {
|
424
|
+
if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
|
425
|
+
break;
|
426
|
+
}
|
427
|
+
GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
|
428
|
+
const int64_t sum = TotalFileSize(overlaps);
|
429
|
+
if (sum > kMaxGrandParentOverlapBytes) {
|
430
|
+
break;
|
431
|
+
}
|
432
|
+
level++;
|
433
|
+
}
|
434
|
+
}
|
435
|
+
return level;
|
436
|
+
}
|
437
|
+
|
438
|
+
// Store in "*inputs" all files in "level" that overlap [begin,end]
|
439
|
+
void Version::GetOverlappingInputs(
|
440
|
+
int level,
|
441
|
+
const InternalKey* begin,
|
442
|
+
const InternalKey* end,
|
443
|
+
std::vector<FileMetaData*>* inputs) {
|
444
|
+
inputs->clear();
|
445
|
+
Slice user_begin, user_end;
|
446
|
+
if (begin != NULL) {
|
447
|
+
user_begin = begin->user_key();
|
448
|
+
}
|
449
|
+
if (end != NULL) {
|
450
|
+
user_end = end->user_key();
|
451
|
+
}
|
452
|
+
const Comparator* user_cmp = vset_->icmp_.user_comparator();
|
453
|
+
for (size_t i = 0; i < files_[level].size(); i++) {
|
454
|
+
FileMetaData* f = files_[level][i];
|
455
|
+
if (begin != NULL &&
|
456
|
+
user_cmp->Compare(f->largest.user_key(), user_begin) < 0) {
|
457
|
+
// "f" is completely before specified range; skip it
|
458
|
+
} else if (end != NULL &&
|
459
|
+
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
460
|
+
// "f" is completely after specified range; skip it
|
461
|
+
} else {
|
462
|
+
inputs->push_back(f);
|
463
|
+
}
|
464
|
+
}
|
465
|
+
}
|
466
|
+
|
201
467
|
std::string Version::DebugString() const {
|
202
468
|
std::string r;
|
203
469
|
for (int level = 0; level < config::kNumLevels; level++) {
|
204
|
-
// E.g.,
|
205
|
-
|
470
|
+
// E.g.,
|
471
|
+
// --- level 1 ---
|
472
|
+
// 17:123['a' .. 'd']
|
473
|
+
// 20:43['e' .. 'g']
|
474
|
+
r.append("--- level ");
|
206
475
|
AppendNumberTo(&r, level);
|
207
|
-
r.
|
476
|
+
r.append(" ---\n");
|
208
477
|
const std::vector<FileMetaData*>& files = files_[level];
|
209
478
|
for (size_t i = 0; i < files.size(); i++) {
|
210
479
|
r.push_back(' ');
|
211
480
|
AppendNumberTo(&r, files[i]->number);
|
212
481
|
r.push_back(':');
|
213
482
|
AppendNumberTo(&r, files[i]->file_size);
|
214
|
-
r.append("[
|
215
|
-
|
216
|
-
r.append("
|
217
|
-
|
218
|
-
r.append("
|
483
|
+
r.append("[");
|
484
|
+
r.append(files[i]->smallest.DebugString());
|
485
|
+
r.append(" .. ");
|
486
|
+
r.append(files[i]->largest.DebugString());
|
487
|
+
r.append("]\n");
|
219
488
|
}
|
220
|
-
r.push_back('\n');
|
221
489
|
}
|
222
490
|
return r;
|
223
491
|
}
|
@@ -267,10 +535,15 @@ class VersionSet::Builder {
|
|
267
535
|
|
268
536
|
~Builder() {
|
269
537
|
for (int level = 0; level < config::kNumLevels; level++) {
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
for (
|
538
|
+
const FileSet* added = levels_[level].added_files;
|
539
|
+
std::vector<FileMetaData*> to_unref;
|
540
|
+
to_unref.reserve(added->size());
|
541
|
+
for (FileSet::const_iterator it = added->begin();
|
542
|
+
it != added->end(); ++it) {
|
543
|
+
to_unref.push_back(*it);
|
544
|
+
}
|
545
|
+
delete added;
|
546
|
+
for (uint32_t i = 0; i < to_unref.size(); i++) {
|
274
547
|
FileMetaData* f = to_unref[i];
|
275
548
|
f->refs--;
|
276
549
|
if (f->refs <= 0) {
|
@@ -305,6 +578,23 @@ class VersionSet::Builder {
|
|
305
578
|
const int level = edit->new_files_[i].first;
|
306
579
|
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
307
580
|
f->refs = 1;
|
581
|
+
|
582
|
+
// We arrange to automatically compact this file after
|
583
|
+
// a certain number of seeks. Let's assume:
|
584
|
+
// (1) One seek costs 10ms
|
585
|
+
// (2) Writing or reading 1MB costs 10ms (100MB/s)
|
586
|
+
// (3) A compaction of 1MB does 25MB of IO:
|
587
|
+
// 1MB read from this level
|
588
|
+
// 10-12MB read from next level (boundaries may be misaligned)
|
589
|
+
// 10-12MB written to next level
|
590
|
+
// This implies that 25 seeks cost the same as the compaction
|
591
|
+
// of 1MB of data. I.e., one seek costs approximately the
|
592
|
+
// same as the compaction of 40KB of data. We are a little
|
593
|
+
// conservative and allow approximately one seek for every 16KB
|
594
|
+
// of data before triggering a compaction.
|
595
|
+
f->allowed_seeks = (f->file_size / 16384);
|
596
|
+
if (f->allowed_seeks < 100) f->allowed_seeks = 100;
|
597
|
+
|
308
598
|
levels_[level].deleted_files.erase(f->number);
|
309
599
|
levels_[level].added_files->insert(f);
|
310
600
|
}
|
@@ -344,13 +634,13 @@ class VersionSet::Builder {
|
|
344
634
|
#ifndef NDEBUG
|
345
635
|
// Make sure there is no overlap in levels > 0
|
346
636
|
if (level > 0) {
|
347
|
-
for (
|
637
|
+
for (uint32_t i = 1; i < v->files_[level].size(); i++) {
|
348
638
|
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
349
639
|
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
350
640
|
if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) {
|
351
641
|
fprintf(stderr, "overlapping ranges in same level %s vs. %s\n",
|
352
|
-
|
353
|
-
|
642
|
+
prev_end.DebugString().c_str(),
|
643
|
+
this_begin.DebugString().c_str());
|
354
644
|
abort();
|
355
645
|
}
|
356
646
|
}
|
@@ -363,8 +653,14 @@ class VersionSet::Builder {
|
|
363
653
|
if (levels_[level].deleted_files.count(f->number) > 0) {
|
364
654
|
// File is deleted: do nothing
|
365
655
|
} else {
|
656
|
+
std::vector<FileMetaData*>* files = &v->files_[level];
|
657
|
+
if (level > 0 && !files->empty()) {
|
658
|
+
// Must not overlap
|
659
|
+
assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest,
|
660
|
+
f->smallest) < 0);
|
661
|
+
}
|
366
662
|
f->refs++;
|
367
|
-
|
663
|
+
files->push_back(f);
|
368
664
|
}
|
369
665
|
}
|
370
666
|
};
|
@@ -414,7 +710,7 @@ void VersionSet::AppendVersion(Version* v) {
|
|
414
710
|
v->next_->prev_ = v;
|
415
711
|
}
|
416
712
|
|
417
|
-
Status VersionSet::LogAndApply(VersionEdit* edit) {
|
713
|
+
Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
|
418
714
|
if (edit->has_log_number_) {
|
419
715
|
assert(edit->log_number_ >= log_number_);
|
420
716
|
assert(edit->log_number_ < next_file_number_);
|
@@ -442,6 +738,8 @@ Status VersionSet::LogAndApply(VersionEdit* edit) {
|
|
442
738
|
std::string new_manifest_file;
|
443
739
|
Status s;
|
444
740
|
if (descriptor_log_ == NULL) {
|
741
|
+
// No reason to unlock *mu here since we only hit this path in the
|
742
|
+
// first call to LogAndApply (when opening the database).
|
445
743
|
assert(descriptor_file_ == NULL);
|
446
744
|
new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
|
447
745
|
edit->SetNextFile(next_file_number_);
|
@@ -452,20 +750,27 @@ Status VersionSet::LogAndApply(VersionEdit* edit) {
|
|
452
750
|
}
|
453
751
|
}
|
454
752
|
|
455
|
-
//
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
753
|
+
// Unlock during expensive MANIFEST log write
|
754
|
+
{
|
755
|
+
mu->Unlock();
|
756
|
+
|
757
|
+
// Write new record to MANIFEST log
|
460
758
|
if (s.ok()) {
|
461
|
-
|
759
|
+
std::string record;
|
760
|
+
edit->EncodeTo(&record);
|
761
|
+
s = descriptor_log_->AddRecord(record);
|
762
|
+
if (s.ok()) {
|
763
|
+
s = descriptor_file_->Sync();
|
764
|
+
}
|
462
765
|
}
|
463
|
-
}
|
464
766
|
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
767
|
+
// If we just created a new descriptor file, install it by writing a
|
768
|
+
// new CURRENT file that points to it.
|
769
|
+
if (s.ok() && !new_manifest_file.empty()) {
|
770
|
+
s = SetCurrentFile(env_, dbname_, manifest_file_number_);
|
771
|
+
}
|
772
|
+
|
773
|
+
mu->Lock();
|
469
774
|
}
|
470
775
|
|
471
776
|
// Install the new version
|
@@ -581,6 +886,9 @@ Status VersionSet::Recover() {
|
|
581
886
|
if (!have_prev_log_number) {
|
582
887
|
prev_log_number = 0;
|
583
888
|
}
|
889
|
+
|
890
|
+
MarkFileNumberUsed(prev_log_number);
|
891
|
+
MarkFileNumberUsed(log_number);
|
584
892
|
}
|
585
893
|
|
586
894
|
if (s.ok()) {
|
@@ -599,12 +907,10 @@ Status VersionSet::Recover() {
|
|
599
907
|
return s;
|
600
908
|
}
|
601
909
|
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
sum += files[i]->file_size;
|
910
|
+
void VersionSet::MarkFileNumberUsed(uint64_t number) {
|
911
|
+
if (next_file_number_ <= number) {
|
912
|
+
next_file_number_ = number + 1;
|
606
913
|
}
|
607
|
-
return sum;
|
608
914
|
}
|
609
915
|
|
610
916
|
void VersionSet::Finalize(Version* v) {
|
@@ -749,10 +1055,11 @@ int64_t VersionSet::NumLevelBytes(int level) const {
|
|
749
1055
|
int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
750
1056
|
int64_t result = 0;
|
751
1057
|
std::vector<FileMetaData*> overlaps;
|
752
|
-
for (int level =
|
1058
|
+
for (int level = 1; level < config::kNumLevels - 1; level++) {
|
753
1059
|
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
754
1060
|
const FileMetaData* f = current_->files_[level][i];
|
755
|
-
GetOverlappingInputs(level+1, f->smallest, f->largest,
|
1061
|
+
current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest,
|
1062
|
+
&overlaps);
|
756
1063
|
const int64_t sum = TotalFileSize(overlaps);
|
757
1064
|
if (sum > result) {
|
758
1065
|
result = sum;
|
@@ -762,27 +1069,6 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
|
762
1069
|
return result;
|
763
1070
|
}
|
764
1071
|
|
765
|
-
// Store in "*inputs" all files in "level" that overlap [begin,end]
|
766
|
-
void VersionSet::GetOverlappingInputs(
|
767
|
-
int level,
|
768
|
-
const InternalKey& begin,
|
769
|
-
const InternalKey& end,
|
770
|
-
std::vector<FileMetaData*>* inputs) {
|
771
|
-
inputs->clear();
|
772
|
-
Slice user_begin = begin.user_key();
|
773
|
-
Slice user_end = end.user_key();
|
774
|
-
const Comparator* user_cmp = icmp_.user_comparator();
|
775
|
-
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
776
|
-
FileMetaData* f = current_->files_[level][i];
|
777
|
-
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
778
|
-
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
779
|
-
// Either completely before or after range; skip it
|
780
|
-
} else {
|
781
|
-
inputs->push_back(f);
|
782
|
-
}
|
783
|
-
}
|
784
|
-
}
|
785
|
-
|
786
1072
|
// Stores the minimal range that covers all entries in inputs in
|
787
1073
|
// *smallest, *largest.
|
788
1074
|
// REQUIRES: inputs is not empty
|
@@ -854,31 +1140,43 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
|
854
1140
|
}
|
855
1141
|
|
856
1142
|
Compaction* VersionSet::PickCompaction() {
|
857
|
-
|
1143
|
+
Compaction* c;
|
1144
|
+
int level;
|
1145
|
+
|
1146
|
+
// We prefer compactions triggered by too much data in a level over
|
1147
|
+
// the compactions triggered by seeks.
|
1148
|
+
const bool size_compaction = (current_->compaction_score_ >= 1);
|
1149
|
+
const bool seek_compaction = (current_->file_to_compact_ != NULL);
|
1150
|
+
if (size_compaction) {
|
1151
|
+
level = current_->compaction_level_;
|
1152
|
+
assert(level >= 0);
|
1153
|
+
assert(level+1 < config::kNumLevels);
|
1154
|
+
c = new Compaction(level);
|
1155
|
+
|
1156
|
+
// Pick the first file that comes after compact_pointer_[level]
|
1157
|
+
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
1158
|
+
FileMetaData* f = current_->files_[level][i];
|
1159
|
+
if (compact_pointer_[level].empty() ||
|
1160
|
+
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
1161
|
+
c->inputs_[0].push_back(f);
|
1162
|
+
break;
|
1163
|
+
}
|
1164
|
+
}
|
1165
|
+
if (c->inputs_[0].empty()) {
|
1166
|
+
// Wrap-around to the beginning of the key space
|
1167
|
+
c->inputs_[0].push_back(current_->files_[level][0]);
|
1168
|
+
}
|
1169
|
+
} else if (seek_compaction) {
|
1170
|
+
level = current_->file_to_compact_level_;
|
1171
|
+
c = new Compaction(level);
|
1172
|
+
c->inputs_[0].push_back(current_->file_to_compact_);
|
1173
|
+
} else {
|
858
1174
|
return NULL;
|
859
1175
|
}
|
860
|
-
const int level = current_->compaction_level_;
|
861
|
-
assert(level >= 0);
|
862
|
-
assert(level+1 < config::kNumLevels);
|
863
1176
|
|
864
|
-
Compaction* c = new Compaction(level);
|
865
1177
|
c->input_version_ = current_;
|
866
1178
|
c->input_version_->Ref();
|
867
1179
|
|
868
|
-
// Pick the first file that comes after compact_pointer_[level]
|
869
|
-
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
870
|
-
FileMetaData* f = current_->files_[level][i];
|
871
|
-
if (compact_pointer_[level].empty() ||
|
872
|
-
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
873
|
-
c->inputs_[0].push_back(f);
|
874
|
-
break;
|
875
|
-
}
|
876
|
-
}
|
877
|
-
if (c->inputs_[0].empty()) {
|
878
|
-
// Wrap-around to the beginning of the key space
|
879
|
-
c->inputs_[0].push_back(current_->files_[level][0]);
|
880
|
-
}
|
881
|
-
|
882
1180
|
// Files in level 0 may overlap each other, so pick up all overlapping ones
|
883
1181
|
if (level == 0) {
|
884
1182
|
InternalKey smallest, largest;
|
@@ -886,7 +1184,7 @@ Compaction* VersionSet::PickCompaction() {
|
|
886
1184
|
// Note that the next call will discard the file we placed in
|
887
1185
|
// c->inputs_[0] earlier and replace it with an overlapping set
|
888
1186
|
// which will include the picked file.
|
889
|
-
GetOverlappingInputs(0, smallest, largest, &c->inputs_[0]);
|
1187
|
+
current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]);
|
890
1188
|
assert(!c->inputs_[0].empty());
|
891
1189
|
}
|
892
1190
|
|
@@ -900,7 +1198,7 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
900
1198
|
InternalKey smallest, largest;
|
901
1199
|
GetRange(c->inputs_[0], &smallest, &largest);
|
902
1200
|
|
903
|
-
GetOverlappingInputs(level+1, smallest, largest, &c->inputs_[1]);
|
1201
|
+
current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]);
|
904
1202
|
|
905
1203
|
// Get entire range covered by compaction
|
906
1204
|
InternalKey all_start, all_limit;
|
@@ -910,14 +1208,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
910
1208
|
// changing the number of "level+1" files we pick up.
|
911
1209
|
if (!c->inputs_[1].empty()) {
|
912
1210
|
std::vector<FileMetaData*> expanded0;
|
913
|
-
GetOverlappingInputs(level, all_start, all_limit, &expanded0);
|
1211
|
+
current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0);
|
914
1212
|
if (expanded0.size() > c->inputs_[0].size()) {
|
915
1213
|
InternalKey new_start, new_limit;
|
916
1214
|
GetRange(expanded0, &new_start, &new_limit);
|
917
1215
|
std::vector<FileMetaData*> expanded1;
|
918
|
-
GetOverlappingInputs(level+1, new_start, new_limit,
|
1216
|
+
current_->GetOverlappingInputs(level+1, &new_start, &new_limit,
|
1217
|
+
&expanded1);
|
919
1218
|
if (expanded1.size() == c->inputs_[1].size()) {
|
920
|
-
Log(
|
1219
|
+
Log(options_->info_log,
|
921
1220
|
"Expanding@%d %d+%d to %d+%d\n",
|
922
1221
|
level,
|
923
1222
|
int(c->inputs_[0].size()),
|
@@ -936,14 +1235,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
936
1235
|
// Compute the set of grandparent files that overlap this compaction
|
937
1236
|
// (parent == level+1; grandparent == level+2)
|
938
1237
|
if (level + 2 < config::kNumLevels) {
|
939
|
-
GetOverlappingInputs(level + 2, all_start, all_limit,
|
1238
|
+
current_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
|
1239
|
+
&c->grandparents_);
|
940
1240
|
}
|
941
1241
|
|
942
1242
|
if (false) {
|
943
|
-
Log(
|
1243
|
+
Log(options_->info_log, "Compacting %d '%s' .. '%s'",
|
944
1244
|
level,
|
945
|
-
|
946
|
-
|
1245
|
+
smallest.DebugString().c_str(),
|
1246
|
+
largest.DebugString().c_str());
|
947
1247
|
}
|
948
1248
|
|
949
1249
|
// Update the place where we will do the next compaction for this level.
|
@@ -956,14 +1256,26 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
956
1256
|
|
957
1257
|
Compaction* VersionSet::CompactRange(
|
958
1258
|
int level,
|
959
|
-
const InternalKey
|
960
|
-
const InternalKey
|
1259
|
+
const InternalKey* begin,
|
1260
|
+
const InternalKey* end) {
|
961
1261
|
std::vector<FileMetaData*> inputs;
|
962
|
-
GetOverlappingInputs(level, begin, end, &inputs);
|
1262
|
+
current_->GetOverlappingInputs(level, begin, end, &inputs);
|
963
1263
|
if (inputs.empty()) {
|
964
1264
|
return NULL;
|
965
1265
|
}
|
966
1266
|
|
1267
|
+
// Avoid compacting too much in one shot in case the range is large.
|
1268
|
+
const uint64_t limit = MaxFileSizeForLevel(level);
|
1269
|
+
uint64_t total = 0;
|
1270
|
+
for (int i = 0; i < inputs.size(); i++) {
|
1271
|
+
uint64_t s = inputs[i]->file_size;
|
1272
|
+
total += s;
|
1273
|
+
if (total >= limit) {
|
1274
|
+
inputs.resize(i + 1);
|
1275
|
+
break;
|
1276
|
+
}
|
1277
|
+
}
|
1278
|
+
|
967
1279
|
Compaction* c = new Compaction(level);
|
968
1280
|
c->input_version_ = current_;
|
969
1281
|
c->input_version_->Ref();
|