leveldb-ruby 0.7 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/leveldb/Makefile +70 -29
- data/leveldb/build_detect_platform +74 -0
- data/leveldb/db/builder.cc +2 -4
- data/leveldb/db/builder.h +4 -6
- data/leveldb/db/c.cc +471 -0
- data/leveldb/db/corruption_test.cc +21 -16
- data/leveldb/db/db_bench.cc +400 -200
- data/leveldb/db/db_impl.cc +276 -131
- data/leveldb/db/db_impl.h +22 -10
- data/leveldb/db/db_iter.cc +2 -1
- data/leveldb/db/db_test.cc +391 -43
- data/leveldb/db/dbformat.cc +31 -0
- data/leveldb/db/dbformat.h +51 -1
- data/leveldb/db/filename.h +1 -1
- data/leveldb/db/log_format.h +1 -1
- data/leveldb/db/log_reader.cc +16 -11
- data/leveldb/db/memtable.cc +37 -0
- data/leveldb/db/memtable.h +6 -0
- data/leveldb/db/repair.cc +17 -14
- data/leveldb/db/skiplist_test.cc +2 -2
- data/leveldb/db/version_edit.cc +7 -9
- data/leveldb/db/version_edit.h +2 -1
- data/leveldb/db/version_set.cc +416 -104
- data/leveldb/db/version_set.h +78 -14
- data/leveldb/db/version_set_test.cc +179 -0
- data/leveldb/db/write_batch_internal.h +2 -0
- data/leveldb/include/leveldb/c.h +246 -0
- data/leveldb/include/leveldb/db.h +14 -2
- data/leveldb/include/leveldb/env.h +31 -10
- data/leveldb/include/leveldb/options.h +7 -18
- data/leveldb/include/leveldb/slice.h +2 -2
- data/leveldb/include/leveldb/status.h +1 -1
- data/leveldb/port/atomic_pointer.h +144 -0
- data/leveldb/port/port.h +0 -2
- data/leveldb/port/port_android.h +7 -1
- data/leveldb/port/port_example.h +11 -1
- data/leveldb/port/port_posix.h +56 -38
- data/leveldb/table/format.cc +12 -8
- data/leveldb/table/table_test.cc +16 -7
- data/leveldb/util/cache.cc +173 -100
- data/leveldb/util/cache_test.cc +28 -11
- data/leveldb/util/coding.h +4 -4
- data/leveldb/util/comparator.cc +1 -0
- data/leveldb/util/env.cc +10 -5
- data/leveldb/util/env_posix.cc +48 -87
- data/leveldb/util/histogram.cc +11 -0
- data/leveldb/util/histogram.h +1 -0
- data/leveldb/util/posix_logger.h +98 -0
- data/leveldb/util/testharness.cc +12 -0
- data/leveldb/util/testharness.h +10 -1
- data/lib/leveldb.rb +11 -3
- metadata +41 -22
data/leveldb/db/db_impl.cc
CHANGED
@@ -68,16 +68,6 @@ struct DBImpl::CompactionState {
|
|
68
68
|
}
|
69
69
|
};
|
70
70
|
|
71
|
-
namespace {
|
72
|
-
class NullWritableFile : public WritableFile {
|
73
|
-
public:
|
74
|
-
virtual Status Append(const Slice& data) { return Status::OK(); }
|
75
|
-
virtual Status Close() { return Status::OK(); }
|
76
|
-
virtual Status Flush() { return Status::OK(); }
|
77
|
-
virtual Status Sync() { return Status::OK(); }
|
78
|
-
};
|
79
|
-
}
|
80
|
-
|
81
71
|
// Fix user-supplied options to be reasonable
|
82
72
|
template <class T,class V>
|
83
73
|
static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
|
@@ -96,11 +86,10 @@ Options SanitizeOptions(const std::string& dbname,
|
|
96
86
|
// Open a log file in the same directory as the db
|
97
87
|
src.env->CreateDir(dbname); // In case it does not exist
|
98
88
|
src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
|
99
|
-
Status s = src.env->
|
100
|
-
&result.info_log);
|
89
|
+
Status s = src.env->NewLogger(InfoLogFileName(dbname), &result.info_log);
|
101
90
|
if (!s.ok()) {
|
102
91
|
// No place suitable for logging
|
103
|
-
result.info_log =
|
92
|
+
result.info_log = NULL;
|
104
93
|
}
|
105
94
|
}
|
106
95
|
if (result.block_cache == NULL) {
|
@@ -119,13 +108,15 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
|
|
119
108
|
db_lock_(NULL),
|
120
109
|
shutting_down_(NULL),
|
121
110
|
bg_cv_(&mutex_),
|
122
|
-
compacting_cv_(&mutex_),
|
123
111
|
mem_(new MemTable(internal_comparator_)),
|
124
112
|
imm_(NULL),
|
125
113
|
logfile_(NULL),
|
114
|
+
logfile_number_(0),
|
126
115
|
log_(NULL),
|
116
|
+
logger_(NULL),
|
117
|
+
logger_cv_(&mutex_),
|
127
118
|
bg_compaction_scheduled_(false),
|
128
|
-
|
119
|
+
manual_compaction_(NULL) {
|
129
120
|
mem_->Ref();
|
130
121
|
has_imm_.Release_Store(NULL);
|
131
122
|
|
@@ -141,10 +132,8 @@ DBImpl::~DBImpl() {
|
|
141
132
|
// Wait for background work to finish
|
142
133
|
mutex_.Lock();
|
143
134
|
shutting_down_.Release_Store(this); // Any non-NULL value is ok
|
144
|
-
|
145
|
-
|
146
|
-
bg_cv_.Wait();
|
147
|
-
}
|
135
|
+
while (bg_compaction_scheduled_) {
|
136
|
+
bg_cv_.Wait();
|
148
137
|
}
|
149
138
|
mutex_.Unlock();
|
150
139
|
|
@@ -203,7 +192,7 @@ void DBImpl::MaybeIgnoreError(Status* s) const {
|
|
203
192
|
if (s->ok() || options_.paranoid_checks) {
|
204
193
|
// No change needed
|
205
194
|
} else {
|
206
|
-
Log(
|
195
|
+
Log(options_.info_log, "Ignoring error %s", s->ToString().c_str());
|
207
196
|
*s = Status::OK();
|
208
197
|
}
|
209
198
|
}
|
@@ -222,7 +211,7 @@ void DBImpl::DeleteObsoleteFiles() {
|
|
222
211
|
bool keep = true;
|
223
212
|
switch (type) {
|
224
213
|
case kLogFile:
|
225
|
-
keep = ((number
|
214
|
+
keep = ((number >= versions_->LogNumber()) ||
|
226
215
|
(number == versions_->PrevLogNumber()));
|
227
216
|
break;
|
228
217
|
case kDescriptorFile:
|
@@ -249,7 +238,7 @@ void DBImpl::DeleteObsoleteFiles() {
|
|
249
238
|
if (type == kTableFile) {
|
250
239
|
table_cache_->Evict(number);
|
251
240
|
}
|
252
|
-
Log(
|
241
|
+
Log(options_.info_log, "Delete type=%d #%lld\n",
|
253
242
|
int(type),
|
254
243
|
static_cast<unsigned long long>(number));
|
255
244
|
env_->DeleteFile(dbname_ + "/" + filenames[i]);
|
@@ -290,14 +279,44 @@ Status DBImpl::Recover(VersionEdit* edit) {
|
|
290
279
|
|
291
280
|
s = versions_->Recover();
|
292
281
|
if (s.ok()) {
|
293
|
-
// Recover from the log files named in the descriptor
|
294
282
|
SequenceNumber max_sequence(0);
|
295
|
-
|
296
|
-
|
283
|
+
|
284
|
+
// Recover from all newer log files than the ones named in the
|
285
|
+
// descriptor (new log files may have been added by the previous
|
286
|
+
// incarnation without registering them in the descriptor).
|
287
|
+
//
|
288
|
+
// Note that PrevLogNumber() is no longer used, but we pay
|
289
|
+
// attention to it in case we are recovering a database
|
290
|
+
// produced by an older version of leveldb.
|
291
|
+
const uint64_t min_log = versions_->LogNumber();
|
292
|
+
const uint64_t prev_log = versions_->PrevLogNumber();
|
293
|
+
std::vector<std::string> filenames;
|
294
|
+
s = env_->GetChildren(dbname_, &filenames);
|
295
|
+
if (!s.ok()) {
|
296
|
+
return s;
|
297
|
+
}
|
298
|
+
uint64_t number;
|
299
|
+
FileType type;
|
300
|
+
std::vector<uint64_t> logs;
|
301
|
+
for (size_t i = 0; i < filenames.size(); i++) {
|
302
|
+
if (ParseFileName(filenames[i], &number, &type)
|
303
|
+
&& type == kLogFile
|
304
|
+
&& ((number >= min_log) || (number == prev_log))) {
|
305
|
+
logs.push_back(number);
|
306
|
+
}
|
297
307
|
}
|
298
|
-
|
299
|
-
|
308
|
+
|
309
|
+
// Recover in the order in which the logs were generated
|
310
|
+
std::sort(logs.begin(), logs.end());
|
311
|
+
for (size_t i = 0; i < logs.size(); i++) {
|
312
|
+
s = RecoverLogFile(logs[i], edit, &max_sequence);
|
313
|
+
|
314
|
+
// The previous incarnation may not have written any MANIFEST
|
315
|
+
// records after allocating this log number. So we manually
|
316
|
+
// update the file number allocation counter in VersionSet.
|
317
|
+
versions_->MarkFileNumberUsed(logs[i]);
|
300
318
|
}
|
319
|
+
|
301
320
|
if (s.ok()) {
|
302
321
|
if (versions_->LastSequence() < max_sequence) {
|
303
322
|
versions_->SetLastSequence(max_sequence);
|
@@ -313,11 +332,11 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
313
332
|
SequenceNumber* max_sequence) {
|
314
333
|
struct LogReporter : public log::Reader::Reporter {
|
315
334
|
Env* env;
|
316
|
-
|
335
|
+
Logger* info_log;
|
317
336
|
const char* fname;
|
318
337
|
Status* status; // NULL if options_.paranoid_checks==false
|
319
338
|
virtual void Corruption(size_t bytes, const Status& s) {
|
320
|
-
Log(
|
339
|
+
Log(info_log, "%s%s: dropping %d bytes; %s",
|
321
340
|
(this->status == NULL ? "(ignoring error) " : ""),
|
322
341
|
fname, static_cast<int>(bytes), s.ToString().c_str());
|
323
342
|
if (this->status != NULL && this->status->ok()) *this->status = s;
|
@@ -347,7 +366,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
347
366
|
// large sequence numbers).
|
348
367
|
log::Reader reader(file, &reporter, true/*checksum*/,
|
349
368
|
0/*initial_offset*/);
|
350
|
-
Log(
|
369
|
+
Log(options_.info_log, "Recovering log #%llu",
|
351
370
|
(unsigned long long) log_number);
|
352
371
|
|
353
372
|
// Read all the records and add to a memtable
|
@@ -381,7 +400,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
381
400
|
}
|
382
401
|
|
383
402
|
if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
|
384
|
-
status = WriteLevel0Table(mem, edit);
|
403
|
+
status = WriteLevel0Table(mem, edit, NULL);
|
385
404
|
if (!status.ok()) {
|
386
405
|
// Reflect errors immediately so that conditions like full
|
387
406
|
// file-systems cause the DB::Open() to fail.
|
@@ -393,7 +412,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
393
412
|
}
|
394
413
|
|
395
414
|
if (status.ok() && mem != NULL) {
|
396
|
-
status = WriteLevel0Table(mem, edit);
|
415
|
+
status = WriteLevel0Table(mem, edit, NULL);
|
397
416
|
// Reflect errors immediately so that conditions like full
|
398
417
|
// file-systems cause the DB::Open() to fail.
|
399
418
|
}
|
@@ -403,50 +422,72 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
|
|
403
422
|
return status;
|
404
423
|
}
|
405
424
|
|
406
|
-
Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit
|
425
|
+
Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit,
|
426
|
+
Version* base) {
|
407
427
|
mutex_.AssertHeld();
|
408
428
|
const uint64_t start_micros = env_->NowMicros();
|
409
429
|
FileMetaData meta;
|
410
430
|
meta.number = versions_->NewFileNumber();
|
411
431
|
pending_outputs_.insert(meta.number);
|
412
432
|
Iterator* iter = mem->NewIterator();
|
413
|
-
Log(
|
433
|
+
Log(options_.info_log, "Level-0 table #%llu: started",
|
414
434
|
(unsigned long long) meta.number);
|
415
435
|
|
416
436
|
Status s;
|
417
437
|
{
|
418
438
|
mutex_.Unlock();
|
419
|
-
s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta
|
439
|
+
s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
|
420
440
|
mutex_.Lock();
|
421
441
|
}
|
422
442
|
|
423
|
-
Log(
|
443
|
+
Log(options_.info_log, "Level-0 table #%llu: %lld bytes %s",
|
424
444
|
(unsigned long long) meta.number,
|
425
445
|
(unsigned long long) meta.file_size,
|
426
446
|
s.ToString().c_str());
|
427
447
|
delete iter;
|
428
448
|
pending_outputs_.erase(meta.number);
|
429
449
|
|
450
|
+
|
451
|
+
// Note that if file_size is zero, the file has been deleted and
|
452
|
+
// should not be added to the manifest.
|
453
|
+
int level = 0;
|
454
|
+
if (s.ok() && meta.file_size > 0) {
|
455
|
+
const Slice min_user_key = meta.smallest.user_key();
|
456
|
+
const Slice max_user_key = meta.largest.user_key();
|
457
|
+
if (base != NULL) {
|
458
|
+
level = base->PickLevelForMemTableOutput(min_user_key, max_user_key);
|
459
|
+
}
|
460
|
+
edit->AddFile(level, meta.number, meta.file_size,
|
461
|
+
meta.smallest, meta.largest);
|
462
|
+
}
|
463
|
+
|
430
464
|
CompactionStats stats;
|
431
465
|
stats.micros = env_->NowMicros() - start_micros;
|
432
466
|
stats.bytes_written = meta.file_size;
|
433
|
-
stats_[
|
467
|
+
stats_[level].Add(stats);
|
434
468
|
return s;
|
435
469
|
}
|
436
470
|
|
437
471
|
Status DBImpl::CompactMemTable() {
|
438
472
|
mutex_.AssertHeld();
|
439
473
|
assert(imm_ != NULL);
|
440
|
-
assert(compacting_);
|
441
474
|
|
442
475
|
// Save the contents of the memtable as a new Table
|
443
476
|
VersionEdit edit;
|
444
|
-
|
477
|
+
Version* base = versions_->current();
|
478
|
+
base->Ref();
|
479
|
+
Status s = WriteLevel0Table(imm_, &edit, base);
|
480
|
+
base->Unref();
|
481
|
+
|
482
|
+
if (s.ok() && shutting_down_.Acquire_Load()) {
|
483
|
+
s = Status::IOError("Deleting DB during memtable compaction");
|
484
|
+
}
|
445
485
|
|
446
486
|
// Replace immutable memtable with the generated Table
|
447
487
|
if (s.ok()) {
|
448
488
|
edit.SetPrevLogNumber(0);
|
449
|
-
|
489
|
+
edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed
|
490
|
+
s = versions_->LogAndApply(&edit, &mutex_);
|
450
491
|
}
|
451
492
|
|
452
493
|
if (s.ok()) {
|
@@ -457,40 +498,71 @@ Status DBImpl::CompactMemTable() {
|
|
457
498
|
DeleteObsoleteFiles();
|
458
499
|
}
|
459
500
|
|
460
|
-
compacting_cv_.SignalAll(); // Wake up waiter even if there was an error
|
461
501
|
return s;
|
462
502
|
}
|
463
503
|
|
464
|
-
void DBImpl::
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
504
|
+
void DBImpl::CompactRange(const Slice* begin, const Slice* end) {
|
505
|
+
int max_level_with_files = 1;
|
506
|
+
{
|
507
|
+
MutexLock l(&mutex_);
|
508
|
+
Version* base = versions_->current();
|
509
|
+
for (int level = 1; level < config::kNumLevels; level++) {
|
510
|
+
if (base->OverlapInLevel(level, begin, end)) {
|
511
|
+
max_level_with_files = level;
|
512
|
+
}
|
513
|
+
}
|
514
|
+
}
|
515
|
+
TEST_CompactMemTable(); // TODO(sanjay): Skip if memtable does not overlap
|
516
|
+
for (int level = 0; level < max_level_with_files; level++) {
|
517
|
+
TEST_CompactRange(level, begin, end);
|
471
518
|
}
|
472
|
-
|
473
|
-
level,
|
474
|
-
InternalKey(begin, kMaxSequenceNumber, kValueTypeForSeek),
|
475
|
-
InternalKey(end, 0, static_cast<ValueType>(0)));
|
519
|
+
}
|
476
520
|
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
521
|
+
void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) {
|
522
|
+
assert(level >= 0);
|
523
|
+
assert(level + 1 < config::kNumLevels);
|
524
|
+
|
525
|
+
InternalKey begin_storage, end_storage;
|
526
|
+
|
527
|
+
ManualCompaction manual;
|
528
|
+
manual.level = level;
|
529
|
+
manual.done = false;
|
530
|
+
if (begin == NULL) {
|
531
|
+
manual.begin = NULL;
|
532
|
+
} else {
|
533
|
+
begin_storage = InternalKey(*begin, kMaxSequenceNumber, kValueTypeForSeek);
|
534
|
+
manual.begin = &begin_storage;
|
535
|
+
}
|
536
|
+
if (end == NULL) {
|
537
|
+
manual.end = NULL;
|
538
|
+
} else {
|
539
|
+
end_storage = InternalKey(*end, 0, static_cast<ValueType>(0));
|
540
|
+
manual.end = &end_storage;
|
481
541
|
}
|
482
542
|
|
483
|
-
|
484
|
-
|
543
|
+
MutexLock l(&mutex_);
|
544
|
+
while (!manual.done) {
|
545
|
+
while (manual_compaction_ != NULL) {
|
546
|
+
bg_cv_.Wait();
|
547
|
+
}
|
548
|
+
manual_compaction_ = &manual;
|
549
|
+
MaybeScheduleCompaction();
|
550
|
+
while (manual_compaction_ == &manual) {
|
551
|
+
bg_cv_.Wait();
|
552
|
+
}
|
553
|
+
}
|
485
554
|
}
|
486
555
|
|
487
556
|
Status DBImpl::TEST_CompactMemTable() {
|
488
557
|
MutexLock l(&mutex_);
|
558
|
+
LoggerId self;
|
559
|
+
AcquireLoggingResponsibility(&self);
|
489
560
|
Status s = MakeRoomForWrite(true /* force compaction */);
|
561
|
+
ReleaseLoggingResponsibility(&self);
|
490
562
|
if (s.ok()) {
|
491
563
|
// Wait until the compaction completes
|
492
564
|
while (imm_ != NULL && bg_error_.ok()) {
|
493
|
-
|
565
|
+
bg_cv_.Wait();
|
494
566
|
}
|
495
567
|
if (imm_ != NULL) {
|
496
568
|
s = bg_error_;
|
@@ -503,11 +575,11 @@ void DBImpl::MaybeScheduleCompaction() {
|
|
503
575
|
mutex_.AssertHeld();
|
504
576
|
if (bg_compaction_scheduled_) {
|
505
577
|
// Already scheduled
|
506
|
-
} else if (compacting_) {
|
507
|
-
// Some other thread is running a compaction. Do not conflict with it.
|
508
578
|
} else if (shutting_down_.Acquire_Load()) {
|
509
579
|
// DB is being deleted; no more background compactions
|
510
|
-
} else if (imm_ == NULL &&
|
580
|
+
} else if (imm_ == NULL &&
|
581
|
+
manual_compaction_ == NULL &&
|
582
|
+
!versions_->NeedsCompaction()) {
|
511
583
|
// No work to be done
|
512
584
|
} else {
|
513
585
|
bg_compaction_scheduled_ = true;
|
@@ -522,50 +594,63 @@ void DBImpl::BGWork(void* db) {
|
|
522
594
|
void DBImpl::BackgroundCall() {
|
523
595
|
MutexLock l(&mutex_);
|
524
596
|
assert(bg_compaction_scheduled_);
|
525
|
-
if (!shutting_down_.Acquire_Load()
|
526
|
-
!compacting_) {
|
597
|
+
if (!shutting_down_.Acquire_Load()) {
|
527
598
|
BackgroundCompaction();
|
528
599
|
}
|
529
600
|
bg_compaction_scheduled_ = false;
|
530
|
-
bg_cv_.SignalAll();
|
531
601
|
|
532
602
|
// Previous compaction may have produced too many files in a level,
|
533
603
|
// so reschedule another compaction if needed.
|
534
604
|
MaybeScheduleCompaction();
|
605
|
+
bg_cv_.SignalAll();
|
535
606
|
}
|
536
607
|
|
537
608
|
void DBImpl::BackgroundCompaction() {
|
538
609
|
mutex_.AssertHeld();
|
539
|
-
assert(!compacting_);
|
540
610
|
|
541
611
|
if (imm_ != NULL) {
|
542
|
-
compacting_ = true;
|
543
612
|
CompactMemTable();
|
544
|
-
compacting_ = false;
|
545
|
-
compacting_cv_.SignalAll();
|
546
613
|
return;
|
547
614
|
}
|
548
615
|
|
549
|
-
Compaction* c
|
550
|
-
|
551
|
-
|
552
|
-
|
616
|
+
Compaction* c;
|
617
|
+
bool is_manual = (manual_compaction_ != NULL);
|
618
|
+
InternalKey manual_end;
|
619
|
+
if (is_manual) {
|
620
|
+
ManualCompaction* m = manual_compaction_;
|
621
|
+
c = versions_->CompactRange(m->level, m->begin, m->end);
|
622
|
+
m->done = (c == NULL);
|
623
|
+
if (c != NULL) {
|
624
|
+
manual_end = c->input(0, c->num_input_files(0) - 1)->largest;
|
625
|
+
}
|
626
|
+
Log(options_.info_log,
|
627
|
+
"Manual compaction at level-%d from %s .. %s; will stop at %s\n",
|
628
|
+
m->level,
|
629
|
+
(m->begin ? m->begin->DebugString().c_str() : "(begin)"),
|
630
|
+
(m->end ? m->end->DebugString().c_str() : "(end)"),
|
631
|
+
(m->done ? "(end)" : manual_end.DebugString().c_str()));
|
632
|
+
} else {
|
633
|
+
c = versions_->PickCompaction();
|
553
634
|
}
|
554
635
|
|
555
636
|
Status status;
|
556
|
-
if (c
|
637
|
+
if (c == NULL) {
|
638
|
+
// Nothing to do
|
639
|
+
} else if (!is_manual && c->IsTrivialMove()) {
|
557
640
|
// Move file to next level
|
558
641
|
assert(c->num_input_files(0) == 1);
|
559
642
|
FileMetaData* f = c->input(0, 0);
|
560
643
|
c->edit()->DeleteFile(c->level(), f->number);
|
561
644
|
c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
|
562
645
|
f->smallest, f->largest);
|
563
|
-
status = versions_->LogAndApply(c->edit());
|
564
|
-
|
646
|
+
status = versions_->LogAndApply(c->edit(), &mutex_);
|
647
|
+
VersionSet::LevelSummaryStorage tmp;
|
648
|
+
Log(options_.info_log, "Moved #%lld to level-%d %lld bytes %s: %s\n",
|
565
649
|
static_cast<unsigned long long>(f->number),
|
566
650
|
c->level() + 1,
|
567
651
|
static_cast<unsigned long long>(f->file_size),
|
568
|
-
status.ToString().c_str()
|
652
|
+
status.ToString().c_str(),
|
653
|
+
versions_->LevelSummary(&tmp));
|
569
654
|
} else {
|
570
655
|
CompactionState* compact = new CompactionState(c);
|
571
656
|
status = DoCompactionWork(compact);
|
@@ -578,12 +663,23 @@ void DBImpl::BackgroundCompaction() {
|
|
578
663
|
} else if (shutting_down_.Acquire_Load()) {
|
579
664
|
// Ignore compaction errors found during shutting down
|
580
665
|
} else {
|
581
|
-
Log(
|
666
|
+
Log(options_.info_log,
|
582
667
|
"Compaction error: %s", status.ToString().c_str());
|
583
668
|
if (options_.paranoid_checks && bg_error_.ok()) {
|
584
669
|
bg_error_ = status;
|
585
670
|
}
|
586
671
|
}
|
672
|
+
|
673
|
+
if (is_manual) {
|
674
|
+
ManualCompaction* m = manual_compaction_;
|
675
|
+
if (!m->done) {
|
676
|
+
// We only compacted part of the requested range. Update *m
|
677
|
+
// to the range that is left to be compacted.
|
678
|
+
m->tmp_storage = manual_end;
|
679
|
+
m->begin = &m->tmp_storage;
|
680
|
+
}
|
681
|
+
manual_compaction_ = NULL;
|
682
|
+
}
|
587
683
|
}
|
588
684
|
|
589
685
|
void DBImpl::CleanupCompaction(CompactionState* compact) {
|
@@ -669,7 +765,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
|
|
669
765
|
s = iter->status();
|
670
766
|
delete iter;
|
671
767
|
if (s.ok()) {
|
672
|
-
Log(
|
768
|
+
Log(options_.info_log,
|
673
769
|
"Generated table #%llu: %lld keys, %lld bytes",
|
674
770
|
(unsigned long long) output_number,
|
675
771
|
(unsigned long long) current_entries,
|
@@ -682,7 +778,7 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
|
|
682
778
|
|
683
779
|
Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
684
780
|
mutex_.AssertHeld();
|
685
|
-
Log(
|
781
|
+
Log(options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
|
686
782
|
compact->compaction->num_input_files(0),
|
687
783
|
compact->compaction->level(),
|
688
784
|
compact->compaction->num_input_files(1),
|
@@ -701,7 +797,7 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
|
|
701
797
|
}
|
702
798
|
compact->outputs.clear();
|
703
799
|
|
704
|
-
Status s = versions_->LogAndApply(compact->compaction->edit());
|
800
|
+
Status s = versions_->LogAndApply(compact->compaction->edit(), &mutex_);
|
705
801
|
if (s.ok()) {
|
706
802
|
compact->compaction->ReleaseInputs();
|
707
803
|
DeleteObsoleteFiles();
|
@@ -718,7 +814,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
718
814
|
const uint64_t start_micros = env_->NowMicros();
|
719
815
|
int64_t imm_micros = 0; // Micros spent doing imm_ compactions
|
720
816
|
|
721
|
-
Log(
|
817
|
+
Log(options_.info_log, "Compacting %d@%d + %d@%d files",
|
722
818
|
compact->compaction->num_input_files(0),
|
723
819
|
compact->compaction->level(),
|
724
820
|
compact->compaction->num_input_files(1),
|
@@ -734,7 +830,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
734
830
|
}
|
735
831
|
|
736
832
|
// Release mutex while we're actually doing the compaction work
|
737
|
-
compacting_ = true;
|
738
833
|
mutex_.Unlock();
|
739
834
|
|
740
835
|
Iterator* input = versions_->MakeInputIterator(compact->compaction);
|
@@ -751,7 +846,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
751
846
|
mutex_.Lock();
|
752
847
|
if (imm_ != NULL) {
|
753
848
|
CompactMemTable();
|
754
|
-
|
849
|
+
bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
|
755
850
|
}
|
756
851
|
mutex_.Unlock();
|
757
852
|
imm_micros += (env_->NowMicros() - imm_start);
|
@@ -802,7 +897,7 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
802
897
|
last_sequence_for_key = ikey.sequence;
|
803
898
|
}
|
804
899
|
#if 0
|
805
|
-
Log(
|
900
|
+
Log(options_.info_log,
|
806
901
|
" Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
|
807
902
|
"%d smallest_snapshot: %d",
|
808
903
|
ikey.user_key.ToString().c_str(),
|
@@ -867,10 +962,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) {
|
|
867
962
|
if (status.ok()) {
|
868
963
|
status = InstallCompactionResults(compact);
|
869
964
|
}
|
870
|
-
compacting_ = false;
|
871
|
-
compacting_cv_.SignalAll();
|
872
965
|
VersionSet::LevelSummaryStorage tmp;
|
873
|
-
Log(
|
966
|
+
Log(options_.info_log,
|
874
967
|
"compacted to: %s", versions_->LevelSummary(&tmp));
|
875
968
|
return status;
|
876
969
|
}
|
@@ -936,22 +1029,48 @@ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
|
|
936
1029
|
Status DBImpl::Get(const ReadOptions& options,
|
937
1030
|
const Slice& key,
|
938
1031
|
std::string* value) {
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
found = true;
|
947
|
-
}
|
948
|
-
// Non-OK iterator status trumps everything else
|
949
|
-
Status result = iter->status();
|
950
|
-
if (result.ok() && !found) {
|
951
|
-
result = Status::NotFound(Slice()); // Use an empty error message for speed
|
1032
|
+
Status s;
|
1033
|
+
MutexLock l(&mutex_);
|
1034
|
+
SequenceNumber snapshot;
|
1035
|
+
if (options.snapshot != NULL) {
|
1036
|
+
snapshot = reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_;
|
1037
|
+
} else {
|
1038
|
+
snapshot = versions_->LastSequence();
|
952
1039
|
}
|
953
|
-
|
954
|
-
|
1040
|
+
|
1041
|
+
MemTable* mem = mem_;
|
1042
|
+
MemTable* imm = imm_;
|
1043
|
+
Version* current = versions_->current();
|
1044
|
+
mem->Ref();
|
1045
|
+
if (imm != NULL) imm->Ref();
|
1046
|
+
current->Ref();
|
1047
|
+
|
1048
|
+
bool have_stat_update = false;
|
1049
|
+
Version::GetStats stats;
|
1050
|
+
|
1051
|
+
// Unlock while reading from files and memtables
|
1052
|
+
{
|
1053
|
+
mutex_.Unlock();
|
1054
|
+
// First look in the memtable, then in the immutable memtable (if any).
|
1055
|
+
LookupKey lkey(key, snapshot);
|
1056
|
+
if (mem->Get(lkey, value, &s)) {
|
1057
|
+
// Done
|
1058
|
+
} else if (imm != NULL && imm->Get(lkey, value, &s)) {
|
1059
|
+
// Done
|
1060
|
+
} else {
|
1061
|
+
s = current->Get(options, lkey, value, &stats);
|
1062
|
+
have_stat_update = true;
|
1063
|
+
}
|
1064
|
+
mutex_.Lock();
|
1065
|
+
}
|
1066
|
+
|
1067
|
+
if (have_stat_update && current->UpdateStats(stats)) {
|
1068
|
+
MaybeScheduleCompaction();
|
1069
|
+
}
|
1070
|
+
mem->Unref();
|
1071
|
+
if (imm != NULL) imm->Unref();
|
1072
|
+
current->Unref();
|
1073
|
+
return s;
|
955
1074
|
}
|
956
1075
|
|
957
1076
|
Iterator* DBImpl::NewIterator(const ReadOptions& options) {
|
@@ -983,34 +1102,61 @@ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
|
|
983
1102
|
return DB::Delete(options, key);
|
984
1103
|
}
|
985
1104
|
|
1105
|
+
// There is at most one thread that is the current logger. This call
|
1106
|
+
// waits until preceding logger(s) have finished and becomes the
|
1107
|
+
// current logger.
|
1108
|
+
void DBImpl::AcquireLoggingResponsibility(LoggerId* self) {
|
1109
|
+
while (logger_ != NULL) {
|
1110
|
+
logger_cv_.Wait();
|
1111
|
+
}
|
1112
|
+
logger_ = self;
|
1113
|
+
}
|
1114
|
+
|
1115
|
+
void DBImpl::ReleaseLoggingResponsibility(LoggerId* self) {
|
1116
|
+
assert(logger_ == self);
|
1117
|
+
logger_ = NULL;
|
1118
|
+
logger_cv_.SignalAll();
|
1119
|
+
}
|
1120
|
+
|
986
1121
|
Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
|
987
1122
|
Status status;
|
988
1123
|
MutexLock l(&mutex_);
|
1124
|
+
LoggerId self;
|
1125
|
+
AcquireLoggingResponsibility(&self);
|
989
1126
|
status = MakeRoomForWrite(false); // May temporarily release lock and wait
|
990
1127
|
uint64_t last_sequence = versions_->LastSequence();
|
991
1128
|
if (status.ok()) {
|
992
1129
|
WriteBatchInternal::SetSequence(updates, last_sequence + 1);
|
993
1130
|
last_sequence += WriteBatchInternal::Count(updates);
|
994
|
-
versions_->SetLastSequence(last_sequence);
|
995
1131
|
|
996
|
-
// Add to log and apply to memtable
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
status = WriteBatchInternal::
|
1132
|
+
// Add to log and apply to memtable. We can release the lock during
|
1133
|
+
// this phase since the "logger_" flag protects against concurrent
|
1134
|
+
// loggers and concurrent writes into mem_.
|
1135
|
+
{
|
1136
|
+
assert(logger_ == &self);
|
1137
|
+
mutex_.Unlock();
|
1138
|
+
status = log_->AddRecord(WriteBatchInternal::Contents(updates));
|
1139
|
+
if (status.ok() && options.sync) {
|
1140
|
+
status = logfile_->Sync();
|
1141
|
+
}
|
1142
|
+
if (status.ok()) {
|
1143
|
+
status = WriteBatchInternal::InsertInto(updates, mem_);
|
1144
|
+
}
|
1145
|
+
mutex_.Lock();
|
1146
|
+
assert(logger_ == &self);
|
1003
1147
|
}
|
1148
|
+
|
1149
|
+
versions_->SetLastSequence(last_sequence);
|
1004
1150
|
}
|
1005
|
-
|
1006
|
-
*options.post_write_snapshot =
|
1007
|
-
status.ok() ? snapshots_.New(last_sequence) : NULL;
|
1008
|
-
}
|
1151
|
+
ReleaseLoggingResponsibility(&self);
|
1009
1152
|
return status;
|
1010
1153
|
}
|
1011
1154
|
|
1155
|
+
// REQUIRES: mutex_ is held
|
1156
|
+
// REQUIRES: this thread is the current logger
|
1012
1157
|
Status DBImpl::MakeRoomForWrite(bool force) {
|
1013
1158
|
mutex_.AssertHeld();
|
1159
|
+
assert(logger_ != NULL);
|
1014
1160
|
bool allow_delay = !force;
|
1015
1161
|
Status s;
|
1016
1162
|
while (true) {
|
@@ -1038,10 +1184,11 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
|
1038
1184
|
} else if (imm_ != NULL) {
|
1039
1185
|
// We have filled up the current memtable, but the previous
|
1040
1186
|
// one is still being compacted, so we wait.
|
1041
|
-
|
1187
|
+
bg_cv_.Wait();
|
1042
1188
|
} else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
|
1043
1189
|
// There are too many level-0 files.
|
1044
|
-
|
1190
|
+
Log(options_.info_log, "waiting...\n");
|
1191
|
+
bg_cv_.Wait();
|
1045
1192
|
} else {
|
1046
1193
|
// Attempt to switch to a new memtable and trigger compaction of old
|
1047
1194
|
assert(versions_->PrevLogNumber() == 0);
|
@@ -1051,18 +1198,10 @@ Status DBImpl::MakeRoomForWrite(bool force) {
|
|
1051
1198
|
if (!s.ok()) {
|
1052
1199
|
break;
|
1053
1200
|
}
|
1054
|
-
VersionEdit edit;
|
1055
|
-
edit.SetPrevLogNumber(versions_->LogNumber());
|
1056
|
-
edit.SetLogNumber(new_log_number);
|
1057
|
-
s = versions_->LogAndApply(&edit);
|
1058
|
-
if (!s.ok()) {
|
1059
|
-
delete lfile;
|
1060
|
-
env_->DeleteFile(LogFileName(dbname_, new_log_number));
|
1061
|
-
break;
|
1062
|
-
}
|
1063
1201
|
delete log_;
|
1064
1202
|
delete logfile_;
|
1065
1203
|
logfile_ = lfile;
|
1204
|
+
logfile_number_ = new_log_number;
|
1066
1205
|
log_ = new log::Writer(lfile);
|
1067
1206
|
imm_ = mem_;
|
1068
1207
|
has_imm_.Release_Store(imm_);
|
@@ -1088,7 +1227,7 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
|
1088
1227
|
in.remove_prefix(strlen("num-files-at-level"));
|
1089
1228
|
uint64_t level;
|
1090
1229
|
bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
|
1091
|
-
if (!ok || level
|
1230
|
+
if (!ok || level >= config::kNumLevels) {
|
1092
1231
|
return false;
|
1093
1232
|
} else {
|
1094
1233
|
char buf[100];
|
@@ -1121,6 +1260,9 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
|
|
1121
1260
|
}
|
1122
1261
|
}
|
1123
1262
|
return true;
|
1263
|
+
} else if (in == "sstables") {
|
1264
|
+
*value = versions_->current()->DebugString();
|
1265
|
+
return true;
|
1124
1266
|
}
|
1125
1267
|
|
1126
1268
|
return false;
|
@@ -1184,8 +1326,9 @@ Status DB::Open(const Options& options, const std::string& dbname,
|
|
1184
1326
|
if (s.ok()) {
|
1185
1327
|
edit.SetLogNumber(new_log_number);
|
1186
1328
|
impl->logfile_ = lfile;
|
1329
|
+
impl->logfile_number_ = new_log_number;
|
1187
1330
|
impl->log_ = new log::Writer(lfile);
|
1188
|
-
s = impl->versions_->LogAndApply(&edit);
|
1331
|
+
s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
|
1189
1332
|
}
|
1190
1333
|
if (s.ok()) {
|
1191
1334
|
impl->DeleteObsoleteFiles();
|
@@ -1214,12 +1357,14 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
|
1214
1357
|
}
|
1215
1358
|
|
1216
1359
|
FileLock* lock;
|
1217
|
-
|
1360
|
+
const std::string lockname = LockFileName(dbname);
|
1361
|
+
Status result = env->LockFile(lockname, &lock);
|
1218
1362
|
if (result.ok()) {
|
1219
1363
|
uint64_t number;
|
1220
1364
|
FileType type;
|
1221
1365
|
for (size_t i = 0; i < filenames.size(); i++) {
|
1222
|
-
if (ParseFileName(filenames[i], &number, &type)
|
1366
|
+
if (ParseFileName(filenames[i], &number, &type) &&
|
1367
|
+
filenames[i] != lockname) { // Lock file will be deleted at end
|
1223
1368
|
Status del = env->DeleteFile(dbname + "/" + filenames[i]);
|
1224
1369
|
if (result.ok() && !del.ok()) {
|
1225
1370
|
result = del;
|
@@ -1227,7 +1372,7 @@ Status DestroyDB(const std::string& dbname, const Options& options) {
|
|
1227
1372
|
}
|
1228
1373
|
}
|
1229
1374
|
env->UnlockFile(lock); // Ignore error since state is already gone
|
1230
|
-
env->DeleteFile(
|
1375
|
+
env->DeleteFile(lockname);
|
1231
1376
|
env->DeleteDir(dbname); // Ignore error in case dir contains other files
|
1232
1377
|
}
|
1233
1378
|
return result;
|