leveldb-ruby 0.7 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/leveldb/Makefile +70 -29
- data/leveldb/build_detect_platform +74 -0
- data/leveldb/db/builder.cc +2 -4
- data/leveldb/db/builder.h +4 -6
- data/leveldb/db/c.cc +471 -0
- data/leveldb/db/corruption_test.cc +21 -16
- data/leveldb/db/db_bench.cc +400 -200
- data/leveldb/db/db_impl.cc +276 -131
- data/leveldb/db/db_impl.h +22 -10
- data/leveldb/db/db_iter.cc +2 -1
- data/leveldb/db/db_test.cc +391 -43
- data/leveldb/db/dbformat.cc +31 -0
- data/leveldb/db/dbformat.h +51 -1
- data/leveldb/db/filename.h +1 -1
- data/leveldb/db/log_format.h +1 -1
- data/leveldb/db/log_reader.cc +16 -11
- data/leveldb/db/memtable.cc +37 -0
- data/leveldb/db/memtable.h +6 -0
- data/leveldb/db/repair.cc +17 -14
- data/leveldb/db/skiplist_test.cc +2 -2
- data/leveldb/db/version_edit.cc +7 -9
- data/leveldb/db/version_edit.h +2 -1
- data/leveldb/db/version_set.cc +416 -104
- data/leveldb/db/version_set.h +78 -14
- data/leveldb/db/version_set_test.cc +179 -0
- data/leveldb/db/write_batch_internal.h +2 -0
- data/leveldb/include/leveldb/c.h +246 -0
- data/leveldb/include/leveldb/db.h +14 -2
- data/leveldb/include/leveldb/env.h +31 -10
- data/leveldb/include/leveldb/options.h +7 -18
- data/leveldb/include/leveldb/slice.h +2 -2
- data/leveldb/include/leveldb/status.h +1 -1
- data/leveldb/port/atomic_pointer.h +144 -0
- data/leveldb/port/port.h +0 -2
- data/leveldb/port/port_android.h +7 -1
- data/leveldb/port/port_example.h +11 -1
- data/leveldb/port/port_posix.h +56 -38
- data/leveldb/table/format.cc +12 -8
- data/leveldb/table/table_test.cc +16 -7
- data/leveldb/util/cache.cc +173 -100
- data/leveldb/util/cache_test.cc +28 -11
- data/leveldb/util/coding.h +4 -4
- data/leveldb/util/comparator.cc +1 -0
- data/leveldb/util/env.cc +10 -5
- data/leveldb/util/env_posix.cc +48 -87
- data/leveldb/util/histogram.cc +11 -0
- data/leveldb/util/histogram.h +1 -0
- data/leveldb/util/posix_logger.h +98 -0
- data/leveldb/util/testharness.cc +12 -0
- data/leveldb/util/testharness.h +10 -1
- data/lib/leveldb.rb +11 -3
- metadata +41 -22
data/leveldb/db/version_set.cc
CHANGED
@@ -41,6 +41,14 @@ static uint64_t MaxFileSizeForLevel(int level) {
|
|
41
41
|
return kTargetFileSize; // We could vary per level to reduce number of files?
|
42
42
|
}
|
43
43
|
|
44
|
+
static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
|
45
|
+
int64_t sum = 0;
|
46
|
+
for (size_t i = 0; i < files.size(); i++) {
|
47
|
+
sum += files[i]->file_size;
|
48
|
+
}
|
49
|
+
return sum;
|
50
|
+
}
|
51
|
+
|
44
52
|
namespace {
|
45
53
|
std::string IntSetToString(const std::set<uint64_t>& s) {
|
46
54
|
std::string result = "{";
|
@@ -75,6 +83,78 @@ Version::~Version() {
|
|
75
83
|
}
|
76
84
|
}
|
77
85
|
|
86
|
+
int FindFile(const InternalKeyComparator& icmp,
|
87
|
+
const std::vector<FileMetaData*>& files,
|
88
|
+
const Slice& key) {
|
89
|
+
uint32_t left = 0;
|
90
|
+
uint32_t right = files.size();
|
91
|
+
while (left < right) {
|
92
|
+
uint32_t mid = (left + right) / 2;
|
93
|
+
const FileMetaData* f = files[mid];
|
94
|
+
if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) {
|
95
|
+
// Key at "mid.largest" is < "target". Therefore all
|
96
|
+
// files at or before "mid" are uninteresting.
|
97
|
+
left = mid + 1;
|
98
|
+
} else {
|
99
|
+
// Key at "mid.largest" is >= "target". Therefore all files
|
100
|
+
// after "mid" are uninteresting.
|
101
|
+
right = mid;
|
102
|
+
}
|
103
|
+
}
|
104
|
+
return right;
|
105
|
+
}
|
106
|
+
|
107
|
+
static bool AfterFile(const Comparator* ucmp,
|
108
|
+
const Slice* user_key, const FileMetaData* f) {
|
109
|
+
// NULL user_key occurs before all keys and is therefore never after *f
|
110
|
+
return (user_key != NULL &&
|
111
|
+
ucmp->Compare(*user_key, f->largest.user_key()) > 0);
|
112
|
+
}
|
113
|
+
|
114
|
+
static bool BeforeFile(const Comparator* ucmp,
|
115
|
+
const Slice* user_key, const FileMetaData* f) {
|
116
|
+
// NULL user_key occurs after all keys and is therefore never before *f
|
117
|
+
return (user_key != NULL &&
|
118
|
+
ucmp->Compare(*user_key, f->smallest.user_key()) < 0);
|
119
|
+
}
|
120
|
+
|
121
|
+
bool SomeFileOverlapsRange(
|
122
|
+
const InternalKeyComparator& icmp,
|
123
|
+
bool disjoint_sorted_files,
|
124
|
+
const std::vector<FileMetaData*>& files,
|
125
|
+
const Slice* smallest_user_key,
|
126
|
+
const Slice* largest_user_key) {
|
127
|
+
const Comparator* ucmp = icmp.user_comparator();
|
128
|
+
if (!disjoint_sorted_files) {
|
129
|
+
// Need to check against all files
|
130
|
+
for (int i = 0; i < files.size(); i++) {
|
131
|
+
const FileMetaData* f = files[i];
|
132
|
+
if (AfterFile(ucmp, smallest_user_key, f) ||
|
133
|
+
BeforeFile(ucmp, largest_user_key, f)) {
|
134
|
+
// No overlap
|
135
|
+
} else {
|
136
|
+
return true; // Overlap
|
137
|
+
}
|
138
|
+
}
|
139
|
+
return false;
|
140
|
+
}
|
141
|
+
|
142
|
+
// Binary search over file list
|
143
|
+
uint32_t index = 0;
|
144
|
+
if (smallest_user_key != NULL) {
|
145
|
+
// Find the earliest possible internal key for smallest_user_key
|
146
|
+
InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek);
|
147
|
+
index = FindFile(icmp, files, small.Encode());
|
148
|
+
}
|
149
|
+
|
150
|
+
if (index >= files.size()) {
|
151
|
+
// beginning of range is after all files, so no overlap.
|
152
|
+
return false;
|
153
|
+
}
|
154
|
+
|
155
|
+
return !BeforeFile(ucmp, largest_user_key, files[index]);
|
156
|
+
}
|
157
|
+
|
78
158
|
// An internal iterator. For a given version/level pair, yields
|
79
159
|
// information about the files in the level. For a given entry, key()
|
80
160
|
// is the largest key that occurs in the file, and value() is an
|
@@ -92,22 +172,7 @@ class Version::LevelFileNumIterator : public Iterator {
|
|
92
172
|
return index_ < flist_->size();
|
93
173
|
}
|
94
174
|
virtual void Seek(const Slice& target) {
|
95
|
-
|
96
|
-
uint32_t right = flist_->size() - 1;
|
97
|
-
while (left < right) {
|
98
|
-
uint32_t mid = (left + right) / 2;
|
99
|
-
int cmp = icmp_.Compare((*flist_)[mid]->largest.Encode(), target);
|
100
|
-
if (cmp < 0) {
|
101
|
-
// Key at "mid.largest" is < than "target". Therefore all
|
102
|
-
// files at or before "mid" are uninteresting.
|
103
|
-
left = mid + 1;
|
104
|
-
} else {
|
105
|
-
// Key at "mid.largest" is >= "target". Therefore all files
|
106
|
-
// after "mid" are uninteresting.
|
107
|
-
right = mid;
|
108
|
-
}
|
109
|
-
}
|
110
|
-
index_ = left;
|
175
|
+
index_ = FindFile(icmp_, *flist_, target);
|
111
176
|
}
|
112
177
|
virtual void SeekToFirst() { index_ = 0; }
|
113
178
|
virtual void SeekToLast() {
|
@@ -185,6 +250,146 @@ void Version::AddIterators(const ReadOptions& options,
|
|
185
250
|
}
|
186
251
|
}
|
187
252
|
|
253
|
+
// If "*iter" points at a value or deletion for user_key, store
|
254
|
+
// either the value, or a NotFound error and return true.
|
255
|
+
// Else return false.
|
256
|
+
static bool GetValue(Iterator* iter, const Slice& user_key,
|
257
|
+
std::string* value,
|
258
|
+
Status* s) {
|
259
|
+
if (!iter->Valid()) {
|
260
|
+
return false;
|
261
|
+
}
|
262
|
+
ParsedInternalKey parsed_key;
|
263
|
+
if (!ParseInternalKey(iter->key(), &parsed_key)) {
|
264
|
+
*s = Status::Corruption("corrupted key for ", user_key);
|
265
|
+
return true;
|
266
|
+
}
|
267
|
+
if (parsed_key.user_key != user_key) {
|
268
|
+
return false;
|
269
|
+
}
|
270
|
+
switch (parsed_key.type) {
|
271
|
+
case kTypeDeletion:
|
272
|
+
*s = Status::NotFound(Slice()); // Use an empty error message for speed
|
273
|
+
break;
|
274
|
+
case kTypeValue: {
|
275
|
+
Slice v = iter->value();
|
276
|
+
value->assign(v.data(), v.size());
|
277
|
+
break;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
return true;
|
281
|
+
}
|
282
|
+
|
283
|
+
static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
|
284
|
+
return a->number > b->number;
|
285
|
+
}
|
286
|
+
|
287
|
+
Status Version::Get(const ReadOptions& options,
|
288
|
+
const LookupKey& k,
|
289
|
+
std::string* value,
|
290
|
+
GetStats* stats) {
|
291
|
+
Slice ikey = k.internal_key();
|
292
|
+
Slice user_key = k.user_key();
|
293
|
+
const Comparator* ucmp = vset_->icmp_.user_comparator();
|
294
|
+
Status s;
|
295
|
+
|
296
|
+
stats->seek_file = NULL;
|
297
|
+
stats->seek_file_level = -1;
|
298
|
+
FileMetaData* last_file_read = NULL;
|
299
|
+
int last_file_read_level = -1;
|
300
|
+
|
301
|
+
// We can search level-by-level since entries never hop across
|
302
|
+
// levels. Therefore we are guaranteed that if we find data
|
303
|
+
// in an smaller level, later levels are irrelevant.
|
304
|
+
std::vector<FileMetaData*> tmp;
|
305
|
+
FileMetaData* tmp2;
|
306
|
+
for (int level = 0; level < config::kNumLevels; level++) {
|
307
|
+
size_t num_files = files_[level].size();
|
308
|
+
if (num_files == 0) continue;
|
309
|
+
|
310
|
+
// Get the list of files to search in this level
|
311
|
+
FileMetaData* const* files = &files_[level][0];
|
312
|
+
if (level == 0) {
|
313
|
+
// Level-0 files may overlap each other. Find all files that
|
314
|
+
// overlap user_key and process them in order from newest to oldest.
|
315
|
+
tmp.reserve(num_files);
|
316
|
+
for (uint32_t i = 0; i < num_files; i++) {
|
317
|
+
FileMetaData* f = files[i];
|
318
|
+
if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
|
319
|
+
ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
|
320
|
+
tmp.push_back(f);
|
321
|
+
}
|
322
|
+
}
|
323
|
+
if (tmp.empty()) continue;
|
324
|
+
|
325
|
+
std::sort(tmp.begin(), tmp.end(), NewestFirst);
|
326
|
+
files = &tmp[0];
|
327
|
+
num_files = tmp.size();
|
328
|
+
} else {
|
329
|
+
// Binary search to find earliest index whose largest key >= ikey.
|
330
|
+
uint32_t index = FindFile(vset_->icmp_, files_[level], ikey);
|
331
|
+
if (index >= num_files) {
|
332
|
+
files = NULL;
|
333
|
+
num_files = 0;
|
334
|
+
} else {
|
335
|
+
tmp2 = files[index];
|
336
|
+
if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) {
|
337
|
+
// All of "tmp2" is past any data for user_key
|
338
|
+
files = NULL;
|
339
|
+
num_files = 0;
|
340
|
+
} else {
|
341
|
+
files = &tmp2;
|
342
|
+
num_files = 1;
|
343
|
+
}
|
344
|
+
}
|
345
|
+
}
|
346
|
+
|
347
|
+
for (uint32_t i = 0; i < num_files; ++i) {
|
348
|
+
if (last_file_read != NULL && stats->seek_file == NULL) {
|
349
|
+
// We have had more than one seek for this read. Charge the 1st file.
|
350
|
+
stats->seek_file = last_file_read;
|
351
|
+
stats->seek_file_level = last_file_read_level;
|
352
|
+
}
|
353
|
+
|
354
|
+
FileMetaData* f = files[i];
|
355
|
+
last_file_read = f;
|
356
|
+
last_file_read_level = level;
|
357
|
+
|
358
|
+
Iterator* iter = vset_->table_cache_->NewIterator(
|
359
|
+
options,
|
360
|
+
f->number,
|
361
|
+
f->file_size);
|
362
|
+
iter->Seek(ikey);
|
363
|
+
const bool done = GetValue(iter, user_key, value, &s);
|
364
|
+
if (!iter->status().ok()) {
|
365
|
+
s = iter->status();
|
366
|
+
delete iter;
|
367
|
+
return s;
|
368
|
+
} else {
|
369
|
+
delete iter;
|
370
|
+
if (done) {
|
371
|
+
return s;
|
372
|
+
}
|
373
|
+
}
|
374
|
+
}
|
375
|
+
}
|
376
|
+
|
377
|
+
return Status::NotFound(Slice()); // Use an empty error message for speed
|
378
|
+
}
|
379
|
+
|
380
|
+
bool Version::UpdateStats(const GetStats& stats) {
|
381
|
+
FileMetaData* f = stats.seek_file;
|
382
|
+
if (f != NULL) {
|
383
|
+
f->allowed_seeks--;
|
384
|
+
if (f->allowed_seeks <= 0 && file_to_compact_ == NULL) {
|
385
|
+
file_to_compact_ = f;
|
386
|
+
file_to_compact_level_ = stats.seek_file_level;
|
387
|
+
return true;
|
388
|
+
}
|
389
|
+
}
|
390
|
+
return false;
|
391
|
+
}
|
392
|
+
|
188
393
|
void Version::Ref() {
|
189
394
|
++refs_;
|
190
395
|
}
|
@@ -198,26 +403,89 @@ void Version::Unref() {
|
|
198
403
|
}
|
199
404
|
}
|
200
405
|
|
406
|
+
bool Version::OverlapInLevel(int level,
|
407
|
+
const Slice* smallest_user_key,
|
408
|
+
const Slice* largest_user_key) {
|
409
|
+
return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level],
|
410
|
+
smallest_user_key, largest_user_key);
|
411
|
+
}
|
412
|
+
|
413
|
+
int Version::PickLevelForMemTableOutput(
|
414
|
+
const Slice& smallest_user_key,
|
415
|
+
const Slice& largest_user_key) {
|
416
|
+
int level = 0;
|
417
|
+
if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) {
|
418
|
+
// Push to next level if there is no overlap in next level,
|
419
|
+
// and the #bytes overlapping in the level after that are limited.
|
420
|
+
InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
|
421
|
+
InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
|
422
|
+
std::vector<FileMetaData*> overlaps;
|
423
|
+
while (level < config::kMaxMemCompactLevel) {
|
424
|
+
if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
|
425
|
+
break;
|
426
|
+
}
|
427
|
+
GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
|
428
|
+
const int64_t sum = TotalFileSize(overlaps);
|
429
|
+
if (sum > kMaxGrandParentOverlapBytes) {
|
430
|
+
break;
|
431
|
+
}
|
432
|
+
level++;
|
433
|
+
}
|
434
|
+
}
|
435
|
+
return level;
|
436
|
+
}
|
437
|
+
|
438
|
+
// Store in "*inputs" all files in "level" that overlap [begin,end]
|
439
|
+
void Version::GetOverlappingInputs(
|
440
|
+
int level,
|
441
|
+
const InternalKey* begin,
|
442
|
+
const InternalKey* end,
|
443
|
+
std::vector<FileMetaData*>* inputs) {
|
444
|
+
inputs->clear();
|
445
|
+
Slice user_begin, user_end;
|
446
|
+
if (begin != NULL) {
|
447
|
+
user_begin = begin->user_key();
|
448
|
+
}
|
449
|
+
if (end != NULL) {
|
450
|
+
user_end = end->user_key();
|
451
|
+
}
|
452
|
+
const Comparator* user_cmp = vset_->icmp_.user_comparator();
|
453
|
+
for (size_t i = 0; i < files_[level].size(); i++) {
|
454
|
+
FileMetaData* f = files_[level][i];
|
455
|
+
if (begin != NULL &&
|
456
|
+
user_cmp->Compare(f->largest.user_key(), user_begin) < 0) {
|
457
|
+
// "f" is completely before specified range; skip it
|
458
|
+
} else if (end != NULL &&
|
459
|
+
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
460
|
+
// "f" is completely after specified range; skip it
|
461
|
+
} else {
|
462
|
+
inputs->push_back(f);
|
463
|
+
}
|
464
|
+
}
|
465
|
+
}
|
466
|
+
|
201
467
|
std::string Version::DebugString() const {
|
202
468
|
std::string r;
|
203
469
|
for (int level = 0; level < config::kNumLevels; level++) {
|
204
|
-
// E.g.,
|
205
|
-
|
470
|
+
// E.g.,
|
471
|
+
// --- level 1 ---
|
472
|
+
// 17:123['a' .. 'd']
|
473
|
+
// 20:43['e' .. 'g']
|
474
|
+
r.append("--- level ");
|
206
475
|
AppendNumberTo(&r, level);
|
207
|
-
r.
|
476
|
+
r.append(" ---\n");
|
208
477
|
const std::vector<FileMetaData*>& files = files_[level];
|
209
478
|
for (size_t i = 0; i < files.size(); i++) {
|
210
479
|
r.push_back(' ');
|
211
480
|
AppendNumberTo(&r, files[i]->number);
|
212
481
|
r.push_back(':');
|
213
482
|
AppendNumberTo(&r, files[i]->file_size);
|
214
|
-
r.append("[
|
215
|
-
|
216
|
-
r.append("
|
217
|
-
|
218
|
-
r.append("
|
483
|
+
r.append("[");
|
484
|
+
r.append(files[i]->smallest.DebugString());
|
485
|
+
r.append(" .. ");
|
486
|
+
r.append(files[i]->largest.DebugString());
|
487
|
+
r.append("]\n");
|
219
488
|
}
|
220
|
-
r.push_back('\n');
|
221
489
|
}
|
222
490
|
return r;
|
223
491
|
}
|
@@ -267,10 +535,15 @@ class VersionSet::Builder {
|
|
267
535
|
|
268
536
|
~Builder() {
|
269
537
|
for (int level = 0; level < config::kNumLevels; level++) {
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
for (
|
538
|
+
const FileSet* added = levels_[level].added_files;
|
539
|
+
std::vector<FileMetaData*> to_unref;
|
540
|
+
to_unref.reserve(added->size());
|
541
|
+
for (FileSet::const_iterator it = added->begin();
|
542
|
+
it != added->end(); ++it) {
|
543
|
+
to_unref.push_back(*it);
|
544
|
+
}
|
545
|
+
delete added;
|
546
|
+
for (uint32_t i = 0; i < to_unref.size(); i++) {
|
274
547
|
FileMetaData* f = to_unref[i];
|
275
548
|
f->refs--;
|
276
549
|
if (f->refs <= 0) {
|
@@ -305,6 +578,23 @@ class VersionSet::Builder {
|
|
305
578
|
const int level = edit->new_files_[i].first;
|
306
579
|
FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
|
307
580
|
f->refs = 1;
|
581
|
+
|
582
|
+
// We arrange to automatically compact this file after
|
583
|
+
// a certain number of seeks. Let's assume:
|
584
|
+
// (1) One seek costs 10ms
|
585
|
+
// (2) Writing or reading 1MB costs 10ms (100MB/s)
|
586
|
+
// (3) A compaction of 1MB does 25MB of IO:
|
587
|
+
// 1MB read from this level
|
588
|
+
// 10-12MB read from next level (boundaries may be misaligned)
|
589
|
+
// 10-12MB written to next level
|
590
|
+
// This implies that 25 seeks cost the same as the compaction
|
591
|
+
// of 1MB of data. I.e., one seek costs approximately the
|
592
|
+
// same as the compaction of 40KB of data. We are a little
|
593
|
+
// conservative and allow approximately one seek for every 16KB
|
594
|
+
// of data before triggering a compaction.
|
595
|
+
f->allowed_seeks = (f->file_size / 16384);
|
596
|
+
if (f->allowed_seeks < 100) f->allowed_seeks = 100;
|
597
|
+
|
308
598
|
levels_[level].deleted_files.erase(f->number);
|
309
599
|
levels_[level].added_files->insert(f);
|
310
600
|
}
|
@@ -344,13 +634,13 @@ class VersionSet::Builder {
|
|
344
634
|
#ifndef NDEBUG
|
345
635
|
// Make sure there is no overlap in levels > 0
|
346
636
|
if (level > 0) {
|
347
|
-
for (
|
637
|
+
for (uint32_t i = 1; i < v->files_[level].size(); i++) {
|
348
638
|
const InternalKey& prev_end = v->files_[level][i-1]->largest;
|
349
639
|
const InternalKey& this_begin = v->files_[level][i]->smallest;
|
350
640
|
if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) {
|
351
641
|
fprintf(stderr, "overlapping ranges in same level %s vs. %s\n",
|
352
|
-
|
353
|
-
|
642
|
+
prev_end.DebugString().c_str(),
|
643
|
+
this_begin.DebugString().c_str());
|
354
644
|
abort();
|
355
645
|
}
|
356
646
|
}
|
@@ -363,8 +653,14 @@ class VersionSet::Builder {
|
|
363
653
|
if (levels_[level].deleted_files.count(f->number) > 0) {
|
364
654
|
// File is deleted: do nothing
|
365
655
|
} else {
|
656
|
+
std::vector<FileMetaData*>* files = &v->files_[level];
|
657
|
+
if (level > 0 && !files->empty()) {
|
658
|
+
// Must not overlap
|
659
|
+
assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest,
|
660
|
+
f->smallest) < 0);
|
661
|
+
}
|
366
662
|
f->refs++;
|
367
|
-
|
663
|
+
files->push_back(f);
|
368
664
|
}
|
369
665
|
}
|
370
666
|
};
|
@@ -414,7 +710,7 @@ void VersionSet::AppendVersion(Version* v) {
|
|
414
710
|
v->next_->prev_ = v;
|
415
711
|
}
|
416
712
|
|
417
|
-
Status VersionSet::LogAndApply(VersionEdit* edit) {
|
713
|
+
Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
|
418
714
|
if (edit->has_log_number_) {
|
419
715
|
assert(edit->log_number_ >= log_number_);
|
420
716
|
assert(edit->log_number_ < next_file_number_);
|
@@ -442,6 +738,8 @@ Status VersionSet::LogAndApply(VersionEdit* edit) {
|
|
442
738
|
std::string new_manifest_file;
|
443
739
|
Status s;
|
444
740
|
if (descriptor_log_ == NULL) {
|
741
|
+
// No reason to unlock *mu here since we only hit this path in the
|
742
|
+
// first call to LogAndApply (when opening the database).
|
445
743
|
assert(descriptor_file_ == NULL);
|
446
744
|
new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
|
447
745
|
edit->SetNextFile(next_file_number_);
|
@@ -452,20 +750,27 @@ Status VersionSet::LogAndApply(VersionEdit* edit) {
|
|
452
750
|
}
|
453
751
|
}
|
454
752
|
|
455
|
-
//
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
753
|
+
// Unlock during expensive MANIFEST log write
|
754
|
+
{
|
755
|
+
mu->Unlock();
|
756
|
+
|
757
|
+
// Write new record to MANIFEST log
|
460
758
|
if (s.ok()) {
|
461
|
-
|
759
|
+
std::string record;
|
760
|
+
edit->EncodeTo(&record);
|
761
|
+
s = descriptor_log_->AddRecord(record);
|
762
|
+
if (s.ok()) {
|
763
|
+
s = descriptor_file_->Sync();
|
764
|
+
}
|
462
765
|
}
|
463
|
-
}
|
464
766
|
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
767
|
+
// If we just created a new descriptor file, install it by writing a
|
768
|
+
// new CURRENT file that points to it.
|
769
|
+
if (s.ok() && !new_manifest_file.empty()) {
|
770
|
+
s = SetCurrentFile(env_, dbname_, manifest_file_number_);
|
771
|
+
}
|
772
|
+
|
773
|
+
mu->Lock();
|
469
774
|
}
|
470
775
|
|
471
776
|
// Install the new version
|
@@ -581,6 +886,9 @@ Status VersionSet::Recover() {
|
|
581
886
|
if (!have_prev_log_number) {
|
582
887
|
prev_log_number = 0;
|
583
888
|
}
|
889
|
+
|
890
|
+
MarkFileNumberUsed(prev_log_number);
|
891
|
+
MarkFileNumberUsed(log_number);
|
584
892
|
}
|
585
893
|
|
586
894
|
if (s.ok()) {
|
@@ -599,12 +907,10 @@ Status VersionSet::Recover() {
|
|
599
907
|
return s;
|
600
908
|
}
|
601
909
|
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
sum += files[i]->file_size;
|
910
|
+
void VersionSet::MarkFileNumberUsed(uint64_t number) {
|
911
|
+
if (next_file_number_ <= number) {
|
912
|
+
next_file_number_ = number + 1;
|
606
913
|
}
|
607
|
-
return sum;
|
608
914
|
}
|
609
915
|
|
610
916
|
void VersionSet::Finalize(Version* v) {
|
@@ -749,10 +1055,11 @@ int64_t VersionSet::NumLevelBytes(int level) const {
|
|
749
1055
|
int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
750
1056
|
int64_t result = 0;
|
751
1057
|
std::vector<FileMetaData*> overlaps;
|
752
|
-
for (int level =
|
1058
|
+
for (int level = 1; level < config::kNumLevels - 1; level++) {
|
753
1059
|
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
754
1060
|
const FileMetaData* f = current_->files_[level][i];
|
755
|
-
GetOverlappingInputs(level+1, f->smallest, f->largest,
|
1061
|
+
current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest,
|
1062
|
+
&overlaps);
|
756
1063
|
const int64_t sum = TotalFileSize(overlaps);
|
757
1064
|
if (sum > result) {
|
758
1065
|
result = sum;
|
@@ -762,27 +1069,6 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
|
|
762
1069
|
return result;
|
763
1070
|
}
|
764
1071
|
|
765
|
-
// Store in "*inputs" all files in "level" that overlap [begin,end]
|
766
|
-
void VersionSet::GetOverlappingInputs(
|
767
|
-
int level,
|
768
|
-
const InternalKey& begin,
|
769
|
-
const InternalKey& end,
|
770
|
-
std::vector<FileMetaData*>* inputs) {
|
771
|
-
inputs->clear();
|
772
|
-
Slice user_begin = begin.user_key();
|
773
|
-
Slice user_end = end.user_key();
|
774
|
-
const Comparator* user_cmp = icmp_.user_comparator();
|
775
|
-
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
776
|
-
FileMetaData* f = current_->files_[level][i];
|
777
|
-
if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
|
778
|
-
user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
|
779
|
-
// Either completely before or after range; skip it
|
780
|
-
} else {
|
781
|
-
inputs->push_back(f);
|
782
|
-
}
|
783
|
-
}
|
784
|
-
}
|
785
|
-
|
786
1072
|
// Stores the minimal range that covers all entries in inputs in
|
787
1073
|
// *smallest, *largest.
|
788
1074
|
// REQUIRES: inputs is not empty
|
@@ -854,31 +1140,43 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
|
|
854
1140
|
}
|
855
1141
|
|
856
1142
|
Compaction* VersionSet::PickCompaction() {
|
857
|
-
|
1143
|
+
Compaction* c;
|
1144
|
+
int level;
|
1145
|
+
|
1146
|
+
// We prefer compactions triggered by too much data in a level over
|
1147
|
+
// the compactions triggered by seeks.
|
1148
|
+
const bool size_compaction = (current_->compaction_score_ >= 1);
|
1149
|
+
const bool seek_compaction = (current_->file_to_compact_ != NULL);
|
1150
|
+
if (size_compaction) {
|
1151
|
+
level = current_->compaction_level_;
|
1152
|
+
assert(level >= 0);
|
1153
|
+
assert(level+1 < config::kNumLevels);
|
1154
|
+
c = new Compaction(level);
|
1155
|
+
|
1156
|
+
// Pick the first file that comes after compact_pointer_[level]
|
1157
|
+
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
1158
|
+
FileMetaData* f = current_->files_[level][i];
|
1159
|
+
if (compact_pointer_[level].empty() ||
|
1160
|
+
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
1161
|
+
c->inputs_[0].push_back(f);
|
1162
|
+
break;
|
1163
|
+
}
|
1164
|
+
}
|
1165
|
+
if (c->inputs_[0].empty()) {
|
1166
|
+
// Wrap-around to the beginning of the key space
|
1167
|
+
c->inputs_[0].push_back(current_->files_[level][0]);
|
1168
|
+
}
|
1169
|
+
} else if (seek_compaction) {
|
1170
|
+
level = current_->file_to_compact_level_;
|
1171
|
+
c = new Compaction(level);
|
1172
|
+
c->inputs_[0].push_back(current_->file_to_compact_);
|
1173
|
+
} else {
|
858
1174
|
return NULL;
|
859
1175
|
}
|
860
|
-
const int level = current_->compaction_level_;
|
861
|
-
assert(level >= 0);
|
862
|
-
assert(level+1 < config::kNumLevels);
|
863
1176
|
|
864
|
-
Compaction* c = new Compaction(level);
|
865
1177
|
c->input_version_ = current_;
|
866
1178
|
c->input_version_->Ref();
|
867
1179
|
|
868
|
-
// Pick the first file that comes after compact_pointer_[level]
|
869
|
-
for (size_t i = 0; i < current_->files_[level].size(); i++) {
|
870
|
-
FileMetaData* f = current_->files_[level][i];
|
871
|
-
if (compact_pointer_[level].empty() ||
|
872
|
-
icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
|
873
|
-
c->inputs_[0].push_back(f);
|
874
|
-
break;
|
875
|
-
}
|
876
|
-
}
|
877
|
-
if (c->inputs_[0].empty()) {
|
878
|
-
// Wrap-around to the beginning of the key space
|
879
|
-
c->inputs_[0].push_back(current_->files_[level][0]);
|
880
|
-
}
|
881
|
-
|
882
1180
|
// Files in level 0 may overlap each other, so pick up all overlapping ones
|
883
1181
|
if (level == 0) {
|
884
1182
|
InternalKey smallest, largest;
|
@@ -886,7 +1184,7 @@ Compaction* VersionSet::PickCompaction() {
|
|
886
1184
|
// Note that the next call will discard the file we placed in
|
887
1185
|
// c->inputs_[0] earlier and replace it with an overlapping set
|
888
1186
|
// which will include the picked file.
|
889
|
-
GetOverlappingInputs(0, smallest, largest, &c->inputs_[0]);
|
1187
|
+
current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]);
|
890
1188
|
assert(!c->inputs_[0].empty());
|
891
1189
|
}
|
892
1190
|
|
@@ -900,7 +1198,7 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
900
1198
|
InternalKey smallest, largest;
|
901
1199
|
GetRange(c->inputs_[0], &smallest, &largest);
|
902
1200
|
|
903
|
-
GetOverlappingInputs(level+1, smallest, largest, &c->inputs_[1]);
|
1201
|
+
current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]);
|
904
1202
|
|
905
1203
|
// Get entire range covered by compaction
|
906
1204
|
InternalKey all_start, all_limit;
|
@@ -910,14 +1208,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
910
1208
|
// changing the number of "level+1" files we pick up.
|
911
1209
|
if (!c->inputs_[1].empty()) {
|
912
1210
|
std::vector<FileMetaData*> expanded0;
|
913
|
-
GetOverlappingInputs(level, all_start, all_limit, &expanded0);
|
1211
|
+
current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0);
|
914
1212
|
if (expanded0.size() > c->inputs_[0].size()) {
|
915
1213
|
InternalKey new_start, new_limit;
|
916
1214
|
GetRange(expanded0, &new_start, &new_limit);
|
917
1215
|
std::vector<FileMetaData*> expanded1;
|
918
|
-
GetOverlappingInputs(level+1, new_start, new_limit,
|
1216
|
+
current_->GetOverlappingInputs(level+1, &new_start, &new_limit,
|
1217
|
+
&expanded1);
|
919
1218
|
if (expanded1.size() == c->inputs_[1].size()) {
|
920
|
-
Log(
|
1219
|
+
Log(options_->info_log,
|
921
1220
|
"Expanding@%d %d+%d to %d+%d\n",
|
922
1221
|
level,
|
923
1222
|
int(c->inputs_[0].size()),
|
@@ -936,14 +1235,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
936
1235
|
// Compute the set of grandparent files that overlap this compaction
|
937
1236
|
// (parent == level+1; grandparent == level+2)
|
938
1237
|
if (level + 2 < config::kNumLevels) {
|
939
|
-
GetOverlappingInputs(level + 2, all_start, all_limit,
|
1238
|
+
current_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
|
1239
|
+
&c->grandparents_);
|
940
1240
|
}
|
941
1241
|
|
942
1242
|
if (false) {
|
943
|
-
Log(
|
1243
|
+
Log(options_->info_log, "Compacting %d '%s' .. '%s'",
|
944
1244
|
level,
|
945
|
-
|
946
|
-
|
1245
|
+
smallest.DebugString().c_str(),
|
1246
|
+
largest.DebugString().c_str());
|
947
1247
|
}
|
948
1248
|
|
949
1249
|
// Update the place where we will do the next compaction for this level.
|
@@ -956,14 +1256,26 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
|
|
956
1256
|
|
957
1257
|
Compaction* VersionSet::CompactRange(
|
958
1258
|
int level,
|
959
|
-
const InternalKey
|
960
|
-
const InternalKey
|
1259
|
+
const InternalKey* begin,
|
1260
|
+
const InternalKey* end) {
|
961
1261
|
std::vector<FileMetaData*> inputs;
|
962
|
-
GetOverlappingInputs(level, begin, end, &inputs);
|
1262
|
+
current_->GetOverlappingInputs(level, begin, end, &inputs);
|
963
1263
|
if (inputs.empty()) {
|
964
1264
|
return NULL;
|
965
1265
|
}
|
966
1266
|
|
1267
|
+
// Avoid compacting too much in one shot in case the range is large.
|
1268
|
+
const uint64_t limit = MaxFileSizeForLevel(level);
|
1269
|
+
uint64_t total = 0;
|
1270
|
+
for (int i = 0; i < inputs.size(); i++) {
|
1271
|
+
uint64_t s = inputs[i]->file_size;
|
1272
|
+
total += s;
|
1273
|
+
if (total >= limit) {
|
1274
|
+
inputs.resize(i + 1);
|
1275
|
+
break;
|
1276
|
+
}
|
1277
|
+
}
|
1278
|
+
|
967
1279
|
Compaction* c = new Compaction(level);
|
968
1280
|
c->input_version_ = current_;
|
969
1281
|
c->input_version_->Ref();
|