leveldb-ruby 0.7 → 0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README +1 -1
- data/leveldb/Makefile +70 -29
- data/leveldb/build_detect_platform +74 -0
- data/leveldb/db/builder.cc +2 -4
- data/leveldb/db/builder.h +4 -6
- data/leveldb/db/c.cc +471 -0
- data/leveldb/db/corruption_test.cc +21 -16
- data/leveldb/db/db_bench.cc +400 -200
- data/leveldb/db/db_impl.cc +276 -131
- data/leveldb/db/db_impl.h +22 -10
- data/leveldb/db/db_iter.cc +2 -1
- data/leveldb/db/db_test.cc +391 -43
- data/leveldb/db/dbformat.cc +31 -0
- data/leveldb/db/dbformat.h +51 -1
- data/leveldb/db/filename.h +1 -1
- data/leveldb/db/log_format.h +1 -1
- data/leveldb/db/log_reader.cc +16 -11
- data/leveldb/db/memtable.cc +37 -0
- data/leveldb/db/memtable.h +6 -0
- data/leveldb/db/repair.cc +17 -14
- data/leveldb/db/skiplist_test.cc +2 -2
- data/leveldb/db/version_edit.cc +7 -9
- data/leveldb/db/version_edit.h +2 -1
- data/leveldb/db/version_set.cc +416 -104
- data/leveldb/db/version_set.h +78 -14
- data/leveldb/db/version_set_test.cc +179 -0
- data/leveldb/db/write_batch_internal.h +2 -0
- data/leveldb/include/leveldb/c.h +246 -0
- data/leveldb/include/leveldb/db.h +14 -2
- data/leveldb/include/leveldb/env.h +31 -10
- data/leveldb/include/leveldb/options.h +7 -18
- data/leveldb/include/leveldb/slice.h +2 -2
- data/leveldb/include/leveldb/status.h +1 -1
- data/leveldb/port/atomic_pointer.h +144 -0
- data/leveldb/port/port.h +0 -2
- data/leveldb/port/port_android.h +7 -1
- data/leveldb/port/port_example.h +11 -1
- data/leveldb/port/port_posix.h +56 -38
- data/leveldb/table/format.cc +12 -8
- data/leveldb/table/table_test.cc +16 -7
- data/leveldb/util/cache.cc +173 -100
- data/leveldb/util/cache_test.cc +28 -11
- data/leveldb/util/coding.h +4 -4
- data/leveldb/util/comparator.cc +1 -0
- data/leveldb/util/env.cc +10 -5
- data/leveldb/util/env_posix.cc +48 -87
- data/leveldb/util/histogram.cc +11 -0
- data/leveldb/util/histogram.h +1 -0
- data/leveldb/util/posix_logger.h +98 -0
- data/leveldb/util/testharness.cc +12 -0
- data/leveldb/util/testharness.h +10 -1
- data/lib/leveldb.rb +11 -3
- metadata +41 -22
data/leveldb/db/db_impl.cc
CHANGED
@@ -68,16 +68,6 @@ struct DBImpl::CompactionState {
|
|
68
68
|
}
|
69
69
|
};
|
70
70
|
|
71
|
-
namespace {
|
72
|
-
class NullWritableFile : public WritableFile {
|
73
|
-
public:
|
74
|
-
virtual Status Append(const Slice& data) { return Status::OK(); }
|
75
|
-
virtual Status Close() { return Status::OK(); }
|
76
|
-
virtual Status Flush() { return Status::OK(); }
|
77
|
-
virtual Status Sync() { return Status::OK(); }
|
78
|
-
};
|
79
|
-
}
|
80
|
-
|
81
71
|
// Fix user-supplied options to be reasonable
|
82
72
|
template <class T,class V>
|
83
73
|
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
@@ -96,11 +86,10 @@ Options SanitizeOptions(const std::string& dbname,
|
|
96
86
|
// Open a log file in the same directory as the db
|
97
87
|
src.env->CreateDir(dbname); // In case it does not exist
|
98
88
|
src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
|
99
|
-
Status s = src.env->
|
100
|
-
&result.info_log);
|
89
|
+
Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log);
|
101
90
|
if (!s.ok()) {
|
102
91
|
// No place suitable for logging
|
103
|
-
result.info_log =
|
92
|
+
result.info_log = NULL;
|
104
93
|
}
|
105
94
|
}
|
106
95
|
if (result.block_cache == NULL) {
|
@@ -119,13 +108,15 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
|
119
108
|
db_lock_(NULL),
|
120
109
|
shutting_down_(NULL),
|
121
110
|
bg_cv_(&mutex_),
|
122
|
-
compacting_cv_(&mutex_),
|
123
111
|
mem_(new MemTable(internal_comparator_)),
|
124
112
|
imm_(NULL),
|
125
113
|
logfile_(NULL),
|
114
|
+
logfile_number_(0),
|
126
115
|
log_(NULL),
|
116
|
+
logger_(NULL),
|
117
|
+
logger_cv_(&mutex_),
|
127
118
|
bg_compaction_scheduled_(false),
|
128
|
-
|
119
|
+
manual_compaction_(NULL) {
|
129
120
|
mem_->Ref();
|
130
121
|
has_imm_.Release_Store(NULL);
|
131
122
|
|
@@ -141,10 +132,8 @@ DBImpl::~DBImpl() {
|
|
141
132
|
// Wait for background work to finish
|
142
133
|
mutex_.Lock();
|
143
134
|
shutting_down_.Release_Store(this); // Any non-NULL value is ok
|
144
|
-
|
145
|
-
|
146
|
-
bg_cv_.Wait();
|
147
|
-
}
|
135
|
+
while (bg_compaction_scheduled_) {
|
136
|
+
bg_cv_.Wait();
|
148
137
|
}
|
149
138
|
mutex_.Unlock();
|
150
139
|
|
@@ -203,7 +192,7 @@ void DBImpl::MaybeIgnoreError(Status* s) const {
|
|
203
192
|
if (s->ok() || options_.paranoid_checks) {
|
204
193
|
// No change needed
|
205
194
|
} else {
|
206
|
-
Log(
|
195
|
+
Log(options_.info_log, "Ignoring error %s", s->ToString().c_str());
|
207
196
|
*s = Status::OK();
|
208
197
|
}
|
209
198
|
}
|
@@ -222,7 +211,7 @@ void DBImpl::DeleteObsoleteFiles() {
|
|
222
211
|
bool keep = true;
|
223
212
|
switch (type) {
|
224
213
|
case kLogFile:
|
225
|
-
keep = ((number
|
214
|
+
keep = ((number >= versions_->LogNumber()) ||
|
226
215
|
(number == versions_->PrevLogNumber()));
|
227
216
|
break;
|
228
217
|
case kDescriptorFile:
|
@@ -249,7 +238,7 @@ void DBImpl::DeleteObsoleteFiles() {
|
|
249
238
|
if (type == kTableFile) {
|
250
239
|
table_cache_->Evict(number);
|
251
240
|
}
|
252
|
-
Log(
|
241
|
+
Log(options_.info_log, "Delete type=%d #%lld\n",
|
253
242
|
int(type),
|
254
243
|
static_cast<unsigned long long>(number));
|
255
244
|
env_->DeleteFile(dbname_ + "/" + filenames[i]);
|
@@ -290,14 +279,44 @@ Status DBImpl::Recover(VersionEdit* edit) {
|
|
290
279
|
|
291
280
|
s = versions_->Recover();
|
292
281
|
if (s.ok()) {
|
293
|
-
// Recover from the log files named in the descriptor
|
294
282
|
SequenceNumber max_sequence(0);
|
295
|
-
|
296
|
-
|
283
|
+
|
284
|
+
// Recover from all newer log files than the ones named in the
|
285
|
+
// descriptor (new log files may have been added by the previous
|
286
|
+
// incarnation without registering them in the descriptor).
|
287
|
+
//
|
288
|
+
// Note that PrevLogNumber() is no longer used, but we pay
|
289
|
+
// attention to it in case we are recovering a database
|
290
|
+
// produced by an older version of leveldb.
|
291
|
+
const uint64_t min_log = versions_->LogNumber();
|
292
|
+
const uint64_t prev_log = versions_->PrevLogNumber();
|
293
|
+
std::vector<std::string> filenames;
|
294
|
+
s = env_->GetChildren(dbname_, &filenames);
|
295
|
+
if (!s.ok()) {
|
296
|
+
return s;
|
297
|
+
}
|
298
|
+
uint64_t number;
|
299
|
+
FileType type;
|
300
|
+
std::vector<uint64_t> logs;
|
301
|
+
for (size_t i = 0; i < filenames.size(); i++) {
|
302
|
+
if (ParseFileName(filenames[i], &number, &type)
|
303
|
+
&& type == kLogFile
|
304
|
+
&& ((number >= min_log) || (number == prev_log))) {
|
305
|
+
logs.push_back(number);
|
306
|
+
}
|
297
307
|
}
|
298
|
-
|
299
|
-
|
308
|
+
|
309
|
+
// Recover in the order in which the logs were generated
|
310
|
+
std::sort(logs.begin(), logs.end());
|
311
|
+
for (size_t i = 0; i < logs.size(); i++) {
|
312
|
+
s = RecoverLogFile(logs[i], edit, &max_sequence);
|
313
|
+
|
314
|
+
// The previous incarnation may not have written any MANIFEST
|
315
|
+
// records after allocating this log number. So we manually
|
316
|
+
// update the file number allocation counter in VersionSet.
|
317
|
+
versions_->MarkFileNumberUsed(logs[i]);
|
300
318
|
}
|
319
|
+
|
301
320
|
if (s.ok()) {
|
302
321
|
if (versions_->LastSequence() < max_sequence) {
|
303
322
|
versions_->SetLastSequence(max_sequence);
|
@@ -313,11 +332,11 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
313
332
|
SequenceNumber* max_sequence) {
|
314
333
|
struct LogReporter : public log::Reader::Reporter {
|
315
334
|
Env* env;
|
316
|
-
|
335
|
+
Logger* info_log;
|
317
336
|
const char* fname;
|
318
337
|
Status* status; // NULL if options_.paranoid_checks==false
|
319
338
|
virtual void Corruption(size_t bytes, const Status& s) {
|
320
|
-
Log(
|
339
|
+
Log(info_log, "%s%s: dropping %d bytes; %s",
|
321
340
|
(this->status == NULL ? "(ignoring error) " : ""),
|
322
341
|
fname, static_cast<int>(bytes), s.ToString().c_str());
|
323
342
|
if (this->status != NULL && this->status->ok()) *this->status = s;
|
@@ -347,7 +366,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
347
366
|
// large sequence numbers).
|
348
367
|
log::Reader reader(file, &reporter, true/*checksum*/,
|
349
368
|
0/*initial_offset*/);
|
350
|
-
Log(
|
369
|
+
Log(options_.info_log, "Recovering log #%llu",
|
351
370
|
(unsigned long long) log_number);
|
352
371
|
|
353
372
|
// Read all the records and add to a memtable
|
@@ -381,7 +400,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
381
400
|
}
|
382
401
|
|
383
402
|
if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
|
384
|
-
status = WriteLevel0Table(mem, edit);
|
403
|
+
status = WriteLevel0Table(mem, edit, NULL);
|
385
404
|
if (!status.ok()) {
|
386
405
|
// Reflect errors immediately so that conditions like full
|
387
406
|
// file-systems cause the DB::Open() to fail.
|
@@ -393,7 +412,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
393
412
|
}
|
394
413
|
|
395
414
|
if (status.ok() && mem != NULL) {
|
396
|
-
status = WriteLevel0Table(mem, edit);
|
415
|
+
status = WriteLevel0Table(mem, edit, NULL);
|
397
416
|
// Reflect errors immediately so that conditions like full
|
398
417
|
// file-systems cause the DB::Open() to fail.
|
399
418
|
}
|
@@ -403,50 +422,72 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
403
422
|
return status;
|
404
423
|
}
|
405
424
|
|
406
|
-
Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit
|
425
|
+
Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
|
426
|
+
Version* base) {
|
407
427
|
mutex_.AssertHeld();
|
408
428
|
const uint64_t start_micros = env_->NowMicros();
|
409
429
|
FileMetaData meta;
|
410
430
|
meta.number = versions_->NewFileNumber();
|
411
431
|
pending_outputs_.insert(meta.number);
|
412
432
|
Iterator* iter = mem->NewIterator();
|
413
|
-
Log(
|
433
|
+
Log(options_.info_log, "Level-0 table #%llu: started",
|
414
434
|
(unsigned long long) meta.number);
|
415
435
|
|
416
436
|
Status s;
|
417
437
|
{
|
418
438
|
mutex_.Unlock();
|
419
|
-
s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta
|
439
|
+
s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
|
420
440
|
mutex_.Lock();
|
421
441
|
}
|
422
442
|
|
423
|
-
Log(
|
443
|
+
Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s",
|
424
444
|
(unsigned long long) meta.number,
|
425
445
|
(unsigned long long) meta.file_size,
|
426
446
|
s.ToString().c_str());
|
427
447
|
delete iter;
|
428
448
|
pending_outputs_.erase(meta.number);
|
429
449
|
|
450
|
+
|
451
|
+
// Note that if file_size is zero, the file has been deleted and
|
452
|
+
// should not be added to the manifest.
|
453
|
+
int level = 0;
|
454
|
+
if (s.ok() && meta.file_size > 0) {
|
455
|
+
const Slice min_user_key = meta.smallest.user_key();
|
456
|
+
const Slice max_user_key = meta.largest.user_key();
|
457
|
+
if (base != NULL) {
|
458
|
+
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
|
459
|
+
}
|
460
|
+
edit->AddFile(level, meta.number, meta.file_size,
|
461
|
+
meta.smallest, meta.largest);
|
462
|
+
}
|
463
|
+
|
430
464
|
CompactionStats stats;
|
431
465
|
stats.micros = env_->NowMicros() - start_micros;
|
432
466
|
stats.bytes_written = meta.file_size;
|
433
|
-
stats_[
|
467
|
+
stats_[level].Add(stats);
|
434
468
|
return s;
|
435
469
|
}
|
436
470
|
|
437
471
|
Status DBImpl::CompactMemTable() {
|
438
472
|
mutex_.AssertHeld();
|
439
473
|
assert(imm_ != NULL);
|
440
|
-
assert(compacting_);
|
441
474
|
|
442
475
|
// Save the contents of the memtable as a new Table
|
443
476
|
VersionEdit edit;
|
444
|
-
|
477
|
+
Version* base = versions_->current();
|
478
|
+
base->Ref();
|
479
|
+
Status s = WriteLevel0Table(imm_, &edit, base);
|
480
|
+
base->Unref();
|
481
|
+
|
482
|
+
if (s.ok() && shutting_down_.Acquire_Load()) {
|
483
|
+
s = Status::IOError("Deleting DB during memtable compaction");
|
484
|
+
}
|
445
485
|
|
446
486
|
// Replace immutable memtable with the generated Table
|
447
487
|
if (s.ok()) {
|
448
488
|
edit.SetPrevLogNumber(0);
|
449
|
-
|
489
|
+
edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed
|
490
|
+
s = versions_->LogAndApply(&edit, &mutex_);
|
450
491
|
}
|
451
492
|
|
452
493
|
if (s.ok()) {
|
@@ -457,40 +498,71 @@ Status DBImpl::CompactMemTable() {
|
|
457
498
|
DeleteObsoleteFiles();
|
458
499
|
}
|
459
500
|
|
460
|
-
compacting_cv_.SignalAll(); // Wake up waiter even if there was an error
|
461
501
|
return s;
|
462
502
|
}
|
463
503
|
|
464
|
-
void DBImpl::
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
504
|
+
void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
|
505
|
+
int max_level_with_files = 1;
|
506
|
+
{
|
507
|
+
MutexLock l(&mutex_);
|
508
|
+
Version* base = versions_->current();
|
509
|
+
for (int level = 1; level < config::kNumLevels; level++) {
|
510
|
+
if (base->OverlapInLevel(level, begin, end)) {
|
511
|
+
max_level_with_files = level;
|
512
|
+
}
|
513
|
+
}
|
514
|
+
}
|
515
|
+
TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
|
516
|
+
for (int level = 0; level < max_level_with_files; level++) {
|
517
|
+
TEST_CompactRange(level, begin, end);
|
471
518
|
}
|
472
|
-
|
473
|
-
level,
|
474
|
-
InternalKey(begin, kMaxSequenceNumber, kValueTypeForSeek),
|
475
|
-
InternalKey(end, 0, static_cast<ValueType>(0)));
|
519
|
+
}
|
476
520
|
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
521
|
+
void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
|
522
|
+
assert(level >= 0);
|
523
|
+
assert(level + 1 < config::kNumLevels);
|
524
|
+
|
525
|
+
InternalKey begin_storage, end_storage;
|
526
|
+
|
527
|
+
ManualCompaction manual;
|
528
|
+
manual.level = level;
|
529
|
+
manual.done = false;
|
530
|
+
if (begin == NULL) {
|
531
|
+
manual.begin = NULL;
|
532
|
+
} else {
|
533
|
+
begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek);
|
534
|
+
manual.begin = &begin_storage;
|
535
|
+
}
|
536
|
+
if (end == NULL) {
|
537
|
+
manual.end = NULL;
|
538
|
+
} else {
|
539
|
+
end_storage = InternalKey(*end, 0, static_cast<ValueType>(0));
|
540
|
+
manual.end = &end_storage;
|
481
541
|
}
|
482
542
|
|
483
|
-
|
484
|
-
|
543
|
+
MutexLock l(&mutex_);
|
544
|
+
while (!manual.done) {
|
545
|
+
while (manual_compaction_ != NULL) {
|
546
|
+
bg_cv_.Wait();
|
547
|
+
}
|
548
|
+
manual_compaction_ = &manual;
|
549
|
+
MaybeScheduleCompaction();
|
550
|
+
while (manual_compaction_ == &manual) {
|
551
|
+
bg_cv_.Wait();
|
552
|
+
}
|
553
|
+
}
|
485
554
|
}
|
486
555
|
|
487
556
|
Status DBImpl::TEST_CompactMemTable() {
|
488
557
|
MutexLock l(&mutex_);
|
558
|
+
LoggerId self;
|
559
|
+
AcquireLoggingResponsibility(&self);
|
489
560
|
Status s = MakeRoomForWrite(true /* force compaction */);
|
561
|
+
ReleaseLoggingResponsibility(&self);
|
490
562
|
if (s.ok()) {
|
491
563
|
// Wait until the compaction completes
|
492
564
|
while (imm_ != NULL && bg_error_.ok()) {
|
493
|
-
|
565
|
+
bg_cv_.Wait();
|
494
566
|
}
|
495
567
|
if (imm_ != NULL) {
|
496
568
|
s = bg_error_;
|
@@ -503,11 +575,11 @@ void DBImpl::MaybeScheduleCompaction() {
|
|
503
575
|
mutex_.AssertHeld();
|
504
576
|
if (bg_compaction_scheduled_) {
|
505
577
|
// Already scheduled
|
506
|
-
} else if (compacting_) {
|
507
|
-
// Some other thread is running a compaction. Do not conflict with it.
|
508
578
|
} else if (shutting_down_.Acquire_Load()) {
|
509
579
|
// DB is being deleted; no more background compactions
|
510
|
-
} else if (imm_ == NULL &&
|
580
|
+
} else if (imm_ == NULL &&
|
581
|
+
manual_compaction_ == NULL &&
|
582
|
+
!versions_->NeedsCompaction()) {
|
511
583
|
// No work to be done
|
512
584
|
} else {
|
513
585
|
bg_compaction_scheduled_ = true;
|
@@ -522,50 +594,63 @@ void DBImpl::BGWork(void* db) {
|
|
522
594
|
void DBImpl::BackgroundCall() {
|
523
595
|
MutexLock l(&mutex_);
|
524
596
|
assert(bg_compaction_scheduled_);
|
525
|
-
if (!shutting_down_.Acquire_Load()
|
526
|
-
!compacting_) {
|
597
|
+
if (!shutting_down_.Acquire_Load()) {
|
527
598
|
BackgroundCompaction();
|
528
599
|
}
|
529
600
|
bg_compaction_scheduled_ = false;
|
530
|
-
bg_cv_.SignalAll();
|
531
601
|
|
532
602
|
// Previous compaction may have produced too many files in a level,
|
533
603
|
// so reschedule another compaction if needed.
|
534
604
|
MaybeScheduleCompaction();
|
605
|
+
bg_cv_.SignalAll();
|
535
606
|
}
|
536
607
|
|
537
608
|
void DBImpl::BackgroundCompaction() {
|
538
609
|
mutex_.AssertHeld();
|
539
|
-
assert(!compacting_);
|
540
610
|
|
541
611
|
if (imm_ != NULL) {
|
542
|
-
compacting_ = true;
|
543
612
|
CompactMemTable();
|
544
|
-
compacting_ = false;
|
545
|
-
compacting_cv_.SignalAll();
|
546
613
|
return;
|
547
614
|
}
|
548
615
|
|
549
|
-
Compaction* c
|
550
|
-
|
551
|
-
|
552
|
-
|
616
|
+
Compaction* c;
|
617
|
+
bool is_manual = (manual_compaction_ != NULL);
|
618
|
+
InternalKey manual_end;
|
619
|
+
if (is_manual) {
|
620
|
+
ManualCompaction* m = manual_compaction_;
|
621
|
+
c = versions_->CompactRange(m->level, m->begin, m->end);
|
622
|
+
m->done = (c == NULL);
|
623
|
+
if (c != NULL) {
|
624
|
+
manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
|
625
|
+
}
|
626
|
+
Log(options_.info_log,
|
627
|
+
"Manual compaction at level-%d from %s .. %s; will stop at %s\n",
|
628
|
+
m->level,
|
629
|
+
(m->begin ? m->begin->DebugString().c_str() : "(begin)"),
|
630
|
+
(m->end ? m->end->DebugString().c_str() : "(end)"),
|
631
|
+
(m->done ? "(end)" : manual_end.DebugString().c_str()));
|
632
|
+
} else {
|
633
|
+
c = versions_->PickCompaction();
|
553
634
|
}
|
554
635
|
|
555
636
|
Status status;
|
556
|
-
if (c
|
637
|
+
if (c == NULL) {
|
638
|
+
// Nothing to do
|
639
|
+
} else if (!is_manual && c->IsTrivialMove()) {
|
557
640
|
// Move file to next level
|
558
641
|
assert(c->num_input_files(0) == 1);
|
559
642
|
FileMetaData* f = c->input(0, 0);
|
560
643
|
c->edit()->DeleteFile(c->level(), f->number);
|
561
644
|
c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
|
562
645
|
f->smallest, f->largest);
|
563
|
-
status = versions_->LogAndApply(c->edit());
|
564
|
-
|
646
|
+
status = versions_->LogAndApply(c->edit(), &mutex_);
|
647
|
+
VersionSet::LevelSummaryStorage tmp;
|
648
|
+
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
|
565
649
|
static_cast<unsigned long long>(f->number),
|
566
650
|
c->level() + 1,
|
567
651
|
static_cast<unsigned long long>(f->file_size),
|
568
|
-
status.ToString().c_str()
|
652
|
+
status.ToString().c_str(),
|
653
|
+
versions_->LevelSummary(&tmp));
|
569
654
|
} else {
|
570
655
|
CompactionState* compact = new CompactionState(c);
|
571
656
|
status = DoCompactionWork(compact);
|
@@ -578,12 +663,23 @@ void DBImpl::BackgroundCompaction() {
|
|
578
663
|
} else if (shutting_down_.Acquire_Load()) {
|
579
664
|
// Ignore compaction errors found during shutting down
|
580
665
|
} else {
|
581
|
-
Log(
|
666
|
+
Log(options_.info_log,
|
582
667
|
"Compaction error: %s", status.ToString().c_str());
|
583
668
|
if (options_.paranoid_checks && bg_error_.ok()) {
|
584
669
|
bg_error_ = status;
|
585
670
|
}
|
586
671
|
}
|
672
|
+
|
673
|
+
if (is_manual) {
|
674
|
+
ManualCompaction* m = manual_compaction_;
|
675
|
+
if (!m->done) {
|
676
|
+
// We only compacted part of the requested range. Update *m
|
677
|
+
// to the range that is left to be compacted.
|
678
|
+
m->tmp_storage = manual_end;
|
679
|
+
m->begin = &m->tmp_storage;
|
680
|
+
}
|
681
|
+
manual_compaction_ = NULL;
|
682
|
+
}
|
587
683
|
}
|
588
684
|
|
589
685
|
void DBImpl::CleanupCompaction(CompactionState* compact) {
|
@@ -669,7 +765,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
|
|
669
765
|
s = iter->status();
|
670
766
|
delete iter;
|
671
767
|
if (s.ok()) {
|
672
|
-
Log(
|
768
|
+
Log(options_.info_log,
|
673
769
|
"Generated table #%llu: %lld keys, %lld bytes",
|
674
770
|
(unsigned long long) output_number,
|
675
771
|
(unsigned long long) current_entries,
|
@@ -682,7 +778,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
|
|
682
778
|
|
683
779
|
Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
684
780
|
mutex_.AssertHeld();
|
685
|
-
Log(
|
781
|
+
Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
|
686
782
|
compact->compaction->num_input_files(0),
|
687
783
|
compact->compaction->level(),
|
688
784
|
compact->compaction->num_input_files(1),
|
@@ -701,7 +797,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
|
701
797
|
}
|
702
798
|
compact->outputs.clear();
|
703
799
|
|
704
|
-
Status s = versions_->LogAndApply(compact->compaction->edit());
|
800
|
+
Status s = versions_->LogAndApply(compact->compaction->edit(), &mutex_);
|
705
801
|
if (s.ok()) {
|
706
802
|
compact->compaction->ReleaseInputs();
|
707
803
|
DeleteObsoleteFiles();
|
@@ -718,7 +814,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
718
814
|
const uint64_t start_micros = env_->NowMicros();
|
719
815
|
int64_t imm_micros = 0; // Micros spent doing imm_ compactions
|
720
816
|
|
721
|
-
Log(
|
817
|
+
Log(options_.info_log, "Compacting %d@%d + %d@%d files",
|
722
818
|
compact->compaction->num_input_files(0),
|
723
819
|
compact->compaction->level(),
|
724
820
|
compact->compaction->num_input_files(1),
|
@@ -734,7 +830,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
734
830
|
}
|
735
831
|
|
736
832
|
// Release mutex while we're actually doing the compaction work
|
737
|
-
compacting_ = true;
|
738
833
|
mutex_.Unlock();
|
739
834
|
|
740
835
|
Iterator* input = versions_->MakeInputIterator(compact->compaction);
|
@@ -751,7 +846,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
751
846
|
mutex_.Lock();
|
752
847
|
if (imm_ != NULL) {
|
753
848
|
CompactMemTable();
|
754
|
-
|
849
|
+
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
|
755
850
|
}
|
756
851
|
mutex_.Unlock();
|
757
852
|
imm_micros += (env_->NowMicros() - imm_start);
|
@@ -802,7 +897,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
802
897
|
last_sequence_for_key = ikey.sequence;
|
803
898
|
}
|
804
899
|
#if 0
|
805
|
-
Log(
|
900
|
+
Log(options_.info_log,
|
806
901
|
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
807
902
|
"%d smallest_snapshot: %d",
|
808
903
|
ikey.user_key.ToString().c_str(),
|
@@ -867,10 +962,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
867
962
|
if (status.ok()) {
|
868
963
|
status = InstallCompactionResults(compact);
|
869
964
|
}
|
870
|
-
compacting_ = false;
|
871
|
-
compacting_cv_.SignalAll();
|
872
965
|
VersionSet::LevelSummaryStorage tmp;
|
873
|
-
Log(
|
966
|
+
Log(options_.info_log,
|
874
967
|
"compacted to: %s", versions_->LevelSummary(&tmp));
|
875
968
|
return status;
|
876
969
|
}
|
@@ -936,22 +1029,48 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
|
936
1029
|
Status DBImpl::Get(const ReadOptions& options,
|
937
1030
|
const Slice& key,
|
938
1031
|
std::string* value) {
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
found = true;
|
947
|
-
}
|
948
|
-
// Non-OK iterator status trumps everything else
|
949
|
-
Status result = iter->status();
|
950
|
-
if (result.ok() && !found) {
|
951
|
-
result = Status::NotFound(Slice()); // Use an empty error message for speed
|
1032
|
+
Status s;
|
1033
|
+
MutexLock l(&mutex_);
|
1034
|
+
SequenceNumber snapshot;
|
1035
|
+
if (options.snapshot != NULL) {
|
1036
|
+
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
|
1037
|
+
} else {
|
1038
|
+
snapshot = versions_->LastSequence();
|
952
1039
|
}
|
953
|
-
|
954
|
-
|
1040
|
+
|
1041
|
+
MemTable* mem = mem_;
|
1042
|
+
MemTable* imm = imm_;
|
1043
|
+
Version* current = versions_->current();
|
1044
|
+
mem->Ref();
|
1045
|
+
if (imm != NULL) imm->Ref();
|
1046
|
+
current->Ref();
|
1047
|
+
|
1048
|
+
bool have_stat_update = false;
|
1049
|
+
Version::GetStats stats;
|
1050
|
+
|
1051
|
+
// Unlock while reading from files and memtables
|
1052
|
+
{
|
1053
|
+
mutex_.Unlock();
|
1054
|
+
// First look in the memtable, then in the immutable memtable (if any).
|
1055
|
+
LookupKey lkey(key, snapshot);
|
1056
|
+
if (mem->Get(lkey, value, &s)) {
|
1057
|
+
// Done
|
1058
|
+
} else if (imm != NULL && imm->Get(lkey, value, &s)) {
|
1059
|
+
// Done
|
1060
|
+
} else {
|
1061
|
+
s = current->Get(options, lkey, value, &stats);
|
1062
|
+
have_stat_update = true;
|
1063
|
+
}
|
1064
|
+
mutex_.Lock();
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
if (have_stat_update && current->UpdateStats(stats)) {
|
1068
|
+
MaybeScheduleCompaction();
|
1069
|
+
}
|
1070
|
+
mem->Unref();
|
1071
|
+
if (imm != NULL) imm->Unref();
|
1072
|
+
current->Unref();
|
1073
|
+
return s;
|
955
1074
|
}
|
956
1075
|
|
957
1076
|
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
@@ -983,34 +1102,61 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
|
983
1102
|
return DB::Delete(options, key);
|
984
1103
|
}
|
985
1104
|
|
1105
|
+
// There is at most one thread that is the current logger. This call
|
1106
|
+
// waits until preceding logger(s) have finished and becomes the
|
1107
|
+
// current logger.
|
1108
|
+
void DBImpl::AcquireLoggingResponsibility(LoggerId* self) {
|
1109
|
+
while (logger_ != NULL) {
|
1110
|
+
logger_cv_.Wait();
|
1111
|
+
}
|
1112
|
+
logger_ = self;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
void DBImpl::ReleaseLoggingResponsibility(LoggerId* self) {
|
1116
|
+
assert(logger_ == self);
|
1117
|
+
logger_ = NULL;
|
1118
|
+
logger_cv_.SignalAll();
|
1119
|
+
}
|
1120
|
+
|
986
1121
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
987
1122
|
Status status;
|
988
1123
|
MutexLock l(&mutex_);
|
1124
|
+
LoggerId self;
|
1125
|
+
AcquireLoggingResponsibility(&self);
|
989
1126
|
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
990
1127
|
uint64_t last_sequence = versions_->LastSequence();
|
991
1128
|
if (status.ok()) {
|
992
1129
|
WriteBatchInternal::SetSequence(updates, last_sequence + 1);
|
993
1130
|
last_sequence += WriteBatchInternal::Count(updates);
|
994
|
-
versions_->SetLastSequence(last_sequence);
|
995
1131
|
|
996
|
-
// Add to log and apply to memtable
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
status = WriteBatchInternal::
|
1132
|
+
// Add to log and apply to memtable. We can release the lock during
|
1133
|
+
// this phase since the "logger_" flag protects against concurrent
|
1134
|
+
// loggers and concurrent writes into mem_.
|
1135
|
+
{
|
1136
|
+
assert(logger_ == &self);
|
1137
|
+
mutex_.Unlock();
|
1138
|
+
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
1139
|
+
if (status.ok() && options.sync) {
|
1140
|
+
status = logfile_->Sync();
|
1141
|
+
}
|
1142
|
+
if (status.ok()) {
|
1143
|
+
status = WriteBatchInternal::InsertInto(updates, mem_);
|
1144
|
+
}
|
1145
|
+
mutex_.Lock();
|
1146
|
+
assert(logger_ == &self);
|
1003
1147
|
}
|
1148
|
+
|
1149
|
+
versions_->SetLastSequence(last_sequence);
|
1004
1150
|
}
|
1005
|
-
|
1006
|
-
*options.post_write_snapshot =
|
1007
|
-
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
1008
|
-
}
|
1151
|
+
ReleaseLoggingResponsibility(&self);
|
1009
1152
|
return status;
|
1010
1153
|
}
|
1011
1154
|
|
1155
|
+
// REQUIRES: mutex_ is held
|
1156
|
+
// REQUIRES: this thread is the current logger
|
1012
1157
|
Status DBImpl::MakeRoomForWrite(bool force) {
|
1013
1158
|
mutex_.AssertHeld();
|
1159
|
+
assert(logger_ != NULL);
|
1014
1160
|
bool allow_delay = !force;
|
1015
1161
|
Status s;
|
1016
1162
|
while (true) {
|
@@ -1038,10 +1184,11 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
|
1038
1184
|
} else if (imm_ != NULL) {
|
1039
1185
|
// We have filled up the current memtable, but the previous
|
1040
1186
|
// one is still being compacted, so we wait.
|
1041
|
-
|
1187
|
+
bg_cv_.Wait();
|
1042
1188
|
} else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
|
1043
1189
|
// There are too many level-0 files.
|
1044
|
-
|
1190
|
+
Log(options_.info_log, "waiting...\n");
|
1191
|
+
bg_cv_.Wait();
|
1045
1192
|
} else {
|
1046
1193
|
// Attempt to switch to a new memtable and trigger compaction of old
|
1047
1194
|
assert(versions_->PrevLogNumber() == 0);
|
@@ -1051,18 +1198,10 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
|
1051
1198
|
if (!s.ok()) {
|
1052
1199
|
break;
|
1053
1200
|
}
|
1054
|
-
VersionEdit edit;
|
1055
|
-
edit.SetPrevLogNumber(versions_->LogNumber());
|
1056
|
-
edit.SetLogNumber(new_log_number);
|
1057
|
-
s = versions_->LogAndApply(&edit);
|
1058
|
-
if (!s.ok()) {
|
1059
|
-
delete lfile;
|
1060
|
-
env_->DeleteFile(LogFileName(dbname_, new_log_number));
|
1061
|
-
break;
|
1062
|
-
}
|
1063
1201
|
delete log_;
|
1064
1202
|
delete logfile_;
|
1065
1203
|
logfile_ = lfile;
|
1204
|
+
logfile_number_ = new_log_number;
|
1066
1205
|
log_ = new log::Writer(lfile);
|
1067
1206
|
imm_ = mem_;
|
1068
1207
|
has_imm_.Release_Store(imm_);
|
@@ -1088,7 +1227,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
|
1088
1227
|
in.remove_prefix(strlen("num-files-at-level"));
|
1089
1228
|
uint64_t level;
|
1090
1229
|
bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
|
1091
|
-
if (!ok || level
|
1230
|
+
if (!ok || level >= config::kNumLevels) {
|
1092
1231
|
return false;
|
1093
1232
|
} else {
|
1094
1233
|
char buf[100];
|
@@ -1121,6 +1260,9 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
|
1121
1260
|
}
|
1122
1261
|
}
|
1123
1262
|
return true;
|
1263
|
+
} else if (in == "sstables") {
|
1264
|
+
*value = versions_->current()->DebugString();
|
1265
|
+
return true;
|
1124
1266
|
}
|
1125
1267
|
|
1126
1268
|
return false;
|
@@ -1184,8 +1326,9 @@ Status DB::Open(const Options& options, const std::string& dbname,
|
|
1184
1326
|
if (s.ok()) {
|
1185
1327
|
edit.SetLogNumber(new_log_number);
|
1186
1328
|
impl->logfile_ = lfile;
|
1329
|
+
impl->logfile_number_ = new_log_number;
|
1187
1330
|
impl->log_ = new log::Writer(lfile);
|
1188
|
-
s = impl->versions_->LogAndApply(&edit);
|
1331
|
+
s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
|
1189
1332
|
}
|
1190
1333
|
if (s.ok()) {
|
1191
1334
|
impl->DeleteObsoleteFiles();
|
@@ -1214,12 +1357,14 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
|
1214
1357
|
}
|
1215
1358
|
|
1216
1359
|
FileLock* lock;
|
1217
|
-
|
1360
|
+
const std::string lockname = LockFileName(dbname);
|
1361
|
+
Status result = env->LockFile(lockname, &lock);
|
1218
1362
|
if (result.ok()) {
|
1219
1363
|
uint64_t number;
|
1220
1364
|
FileType type;
|
1221
1365
|
for (size_t i = 0; i < filenames.size(); i++) {
|
1222
|
-
if (ParseFileName(filenames[i], &number, &type)
|
1366
|
+
if (ParseFileName(filenames[i], &number, &type) &&
|
1367
|
+
filenames[i] != lockname) { // Lock file will be deleted at end
|
1223
1368
|
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
1224
1369
|
if (result.ok() && !del.ok()) {
|
1225
1370
|
result = del;
|
@@ -1227,7 +1372,7 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
|
1227
1372
|
}
|
1228
1373
|
}
|
1229
1374
|
env->UnlockFile(lock); // Ignore error since state is already gone
|
1230
|
-
env->DeleteFile(
|
1375
|
+
env->DeleteFile(lockname);
|
1231
1376
|
env->DeleteDir(dbname); // Ignore error in case dir contains other files
|
1232
1377
|
}
|
1233
1378
|
return result;
|