leveldb-ruby 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/README +1 -1
  2. data/leveldb/Makefile +70 -29
  3. data/leveldb/build_detect_platform +74 -0
  4. data/leveldb/db/builder.cc +2 -4
  5. data/leveldb/db/builder.h +4 -6
  6. data/leveldb/db/c.cc +471 -0
  7. data/leveldb/db/corruption_test.cc +21 -16
  8. data/leveldb/db/db_bench.cc +400 -200
  9. data/leveldb/db/db_impl.cc +276 -131
  10. data/leveldb/db/db_impl.h +22 -10
  11. data/leveldb/db/db_iter.cc +2 -1
  12. data/leveldb/db/db_test.cc +391 -43
  13. data/leveldb/db/dbformat.cc +31 -0
  14. data/leveldb/db/dbformat.h +51 -1
  15. data/leveldb/db/filename.h +1 -1
  16. data/leveldb/db/log_format.h +1 -1
  17. data/leveldb/db/log_reader.cc +16 -11
  18. data/leveldb/db/memtable.cc +37 -0
  19. data/leveldb/db/memtable.h +6 -0
  20. data/leveldb/db/repair.cc +17 -14
  21. data/leveldb/db/skiplist_test.cc +2 -2
  22. data/leveldb/db/version_edit.cc +7 -9
  23. data/leveldb/db/version_edit.h +2 -1
  24. data/leveldb/db/version_set.cc +416 -104
  25. data/leveldb/db/version_set.h +78 -14
  26. data/leveldb/db/version_set_test.cc +179 -0
  27. data/leveldb/db/write_batch_internal.h +2 -0
  28. data/leveldb/include/leveldb/c.h +246 -0
  29. data/leveldb/include/leveldb/db.h +14 -2
  30. data/leveldb/include/leveldb/env.h +31 -10
  31. data/leveldb/include/leveldb/options.h +7 -18
  32. data/leveldb/include/leveldb/slice.h +2 -2
  33. data/leveldb/include/leveldb/status.h +1 -1
  34. data/leveldb/port/atomic_pointer.h +144 -0
  35. data/leveldb/port/port.h +0 -2
  36. data/leveldb/port/port_android.h +7 -1
  37. data/leveldb/port/port_example.h +11 -1
  38. data/leveldb/port/port_posix.h +56 -38
  39. data/leveldb/table/format.cc +12 -8
  40. data/leveldb/table/table_test.cc +16 -7
  41. data/leveldb/util/cache.cc +173 -100
  42. data/leveldb/util/cache_test.cc +28 -11
  43. data/leveldb/util/coding.h +4 -4
  44. data/leveldb/util/comparator.cc +1 -0
  45. data/leveldb/util/env.cc +10 -5
  46. data/leveldb/util/env_posix.cc +48 -87
  47. data/leveldb/util/histogram.cc +11 -0
  48. data/leveldb/util/histogram.h +1 -0
  49. data/leveldb/util/posix_logger.h +98 -0
  50. data/leveldb/util/testharness.cc +12 -0
  51. data/leveldb/util/testharness.h +10 -1
  52. data/lib/leveldb.rb +11 -3
  53. metadata +41 -22
@@ -41,6 +41,14 @@ static uint64_t MaxFileSizeForLevel(int level) {
41
41
  return kTargetFileSize; // We could vary per level to reduce number of files?
42
42
  }
43
43
 
44
+ static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
45
+ int64_t sum = 0;
46
+ for (size_t i = 0; i < files.size(); i++) {
47
+ sum += files[i]->file_size;
48
+ }
49
+ return sum;
50
+ }
51
+
44
52
  namespace {
45
53
  std::string IntSetToString(const std::set<uint64_t>& s) {
46
54
  std::string result = "{";
@@ -75,6 +83,78 @@ Version::~Version() {
75
83
  }
76
84
  }
77
85
 
86
+ int FindFile(const InternalKeyComparator& icmp,
87
+ const std::vector<FileMetaData*>& files,
88
+ const Slice& key) {
89
+ uint32_t left = 0;
90
+ uint32_t right = files.size();
91
+ while (left < right) {
92
+ uint32_t mid = (left + right) / 2;
93
+ const FileMetaData* f = files[mid];
94
+ if (icmp.InternalKeyComparator::Compare(f->largest.Encode(), key) < 0) {
95
+ // Key at "mid.largest" is < "target". Therefore all
96
+ // files at or before "mid" are uninteresting.
97
+ left = mid + 1;
98
+ } else {
99
+ // Key at "mid.largest" is >= "target". Therefore all files
100
+ // after "mid" are uninteresting.
101
+ right = mid;
102
+ }
103
+ }
104
+ return right;
105
+ }
106
+
107
+ static bool AfterFile(const Comparator* ucmp,
108
+ const Slice* user_key, const FileMetaData* f) {
109
+ // NULL user_key occurs before all keys and is therefore never after *f
110
+ return (user_key != NULL &&
111
+ ucmp->Compare(*user_key, f->largest.user_key()) > 0);
112
+ }
113
+
114
+ static bool BeforeFile(const Comparator* ucmp,
115
+ const Slice* user_key, const FileMetaData* f) {
116
+ // NULL user_key occurs after all keys and is therefore never before *f
117
+ return (user_key != NULL &&
118
+ ucmp->Compare(*user_key, f->smallest.user_key()) < 0);
119
+ }
120
+
121
+ bool SomeFileOverlapsRange(
122
+ const InternalKeyComparator& icmp,
123
+ bool disjoint_sorted_files,
124
+ const std::vector<FileMetaData*>& files,
125
+ const Slice* smallest_user_key,
126
+ const Slice* largest_user_key) {
127
+ const Comparator* ucmp = icmp.user_comparator();
128
+ if (!disjoint_sorted_files) {
129
+ // Need to check against all files
130
+ for (int i = 0; i < files.size(); i++) {
131
+ const FileMetaData* f = files[i];
132
+ if (AfterFile(ucmp, smallest_user_key, f) ||
133
+ BeforeFile(ucmp, largest_user_key, f)) {
134
+ // No overlap
135
+ } else {
136
+ return true; // Overlap
137
+ }
138
+ }
139
+ return false;
140
+ }
141
+
142
+ // Binary search over file list
143
+ uint32_t index = 0;
144
+ if (smallest_user_key != NULL) {
145
+ // Find the earliest possible internal key for smallest_user_key
146
+ InternalKey small(*smallest_user_key, kMaxSequenceNumber,kValueTypeForSeek);
147
+ index = FindFile(icmp, files, small.Encode());
148
+ }
149
+
150
+ if (index >= files.size()) {
151
+ // beginning of range is after all files, so no overlap.
152
+ return false;
153
+ }
154
+
155
+ return !BeforeFile(ucmp, largest_user_key, files[index]);
156
+ }
157
+
78
158
  // An internal iterator. For a given version/level pair, yields
79
159
  // information about the files in the level. For a given entry, key()
80
160
  // is the largest key that occurs in the file, and value() is an
@@ -92,22 +172,7 @@ class Version::LevelFileNumIterator : public Iterator {
92
172
  return index_ < flist_->size();
93
173
  }
94
174
  virtual void Seek(const Slice& target) {
95
- uint32_t left = 0;
96
- uint32_t right = flist_->size() - 1;
97
- while (left < right) {
98
- uint32_t mid = (left + right) / 2;
99
- int cmp = icmp_.Compare((*flist_)[mid]->largest.Encode(), target);
100
- if (cmp < 0) {
101
- // Key at "mid.largest" is < than "target". Therefore all
102
- // files at or before "mid" are uninteresting.
103
- left = mid + 1;
104
- } else {
105
- // Key at "mid.largest" is >= "target". Therefore all files
106
- // after "mid" are uninteresting.
107
- right = mid;
108
- }
109
- }
110
- index_ = left;
175
+ index_ = FindFile(icmp_, *flist_, target);
111
176
  }
112
177
  virtual void SeekToFirst() { index_ = 0; }
113
178
  virtual void SeekToLast() {
@@ -185,6 +250,146 @@ void Version::AddIterators(const ReadOptions& options,
185
250
  }
186
251
  }
187
252
 
253
+ // If "*iter" points at a value or deletion for user_key, store
254
+ // either the value, or a NotFound error and return true.
255
+ // Else return false.
256
+ static bool GetValue(Iterator* iter, const Slice& user_key,
257
+ std::string* value,
258
+ Status* s) {
259
+ if (!iter->Valid()) {
260
+ return false;
261
+ }
262
+ ParsedInternalKey parsed_key;
263
+ if (!ParseInternalKey(iter->key(), &parsed_key)) {
264
+ *s = Status::Corruption("corrupted key for ", user_key);
265
+ return true;
266
+ }
267
+ if (parsed_key.user_key != user_key) {
268
+ return false;
269
+ }
270
+ switch (parsed_key.type) {
271
+ case kTypeDeletion:
272
+ *s = Status::NotFound(Slice()); // Use an empty error message for speed
273
+ break;
274
+ case kTypeValue: {
275
+ Slice v = iter->value();
276
+ value->assign(v.data(), v.size());
277
+ break;
278
+ }
279
+ }
280
+ return true;
281
+ }
282
+
283
+ static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
284
+ return a->number > b->number;
285
+ }
286
+
287
+ Status Version::Get(const ReadOptions& options,
288
+ const LookupKey& k,
289
+ std::string* value,
290
+ GetStats* stats) {
291
+ Slice ikey = k.internal_key();
292
+ Slice user_key = k.user_key();
293
+ const Comparator* ucmp = vset_->icmp_.user_comparator();
294
+ Status s;
295
+
296
+ stats->seek_file = NULL;
297
+ stats->seek_file_level = -1;
298
+ FileMetaData* last_file_read = NULL;
299
+ int last_file_read_level = -1;
300
+
301
+ // We can search level-by-level since entries never hop across
302
+ // levels. Therefore we are guaranteed that if we find data
303
+ // in an smaller level, later levels are irrelevant.
304
+ std::vector<FileMetaData*> tmp;
305
+ FileMetaData* tmp2;
306
+ for (int level = 0; level < config::kNumLevels; level++) {
307
+ size_t num_files = files_[level].size();
308
+ if (num_files == 0) continue;
309
+
310
+ // Get the list of files to search in this level
311
+ FileMetaData* const* files = &files_[level][0];
312
+ if (level == 0) {
313
+ // Level-0 files may overlap each other. Find all files that
314
+ // overlap user_key and process them in order from newest to oldest.
315
+ tmp.reserve(num_files);
316
+ for (uint32_t i = 0; i < num_files; i++) {
317
+ FileMetaData* f = files[i];
318
+ if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
319
+ ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
320
+ tmp.push_back(f);
321
+ }
322
+ }
323
+ if (tmp.empty()) continue;
324
+
325
+ std::sort(tmp.begin(), tmp.end(), NewestFirst);
326
+ files = &tmp[0];
327
+ num_files = tmp.size();
328
+ } else {
329
+ // Binary search to find earliest index whose largest key >= ikey.
330
+ uint32_t index = FindFile(vset_->icmp_, files_[level], ikey);
331
+ if (index >= num_files) {
332
+ files = NULL;
333
+ num_files = 0;
334
+ } else {
335
+ tmp2 = files[index];
336
+ if (ucmp->Compare(user_key, tmp2->smallest.user_key()) < 0) {
337
+ // All of "tmp2" is past any data for user_key
338
+ files = NULL;
339
+ num_files = 0;
340
+ } else {
341
+ files = &tmp2;
342
+ num_files = 1;
343
+ }
344
+ }
345
+ }
346
+
347
+ for (uint32_t i = 0; i < num_files; ++i) {
348
+ if (last_file_read != NULL && stats->seek_file == NULL) {
349
+ // We have had more than one seek for this read. Charge the 1st file.
350
+ stats->seek_file = last_file_read;
351
+ stats->seek_file_level = last_file_read_level;
352
+ }
353
+
354
+ FileMetaData* f = files[i];
355
+ last_file_read = f;
356
+ last_file_read_level = level;
357
+
358
+ Iterator* iter = vset_->table_cache_->NewIterator(
359
+ options,
360
+ f->number,
361
+ f->file_size);
362
+ iter->Seek(ikey);
363
+ const bool done = GetValue(iter, user_key, value, &s);
364
+ if (!iter->status().ok()) {
365
+ s = iter->status();
366
+ delete iter;
367
+ return s;
368
+ } else {
369
+ delete iter;
370
+ if (done) {
371
+ return s;
372
+ }
373
+ }
374
+ }
375
+ }
376
+
377
+ return Status::NotFound(Slice()); // Use an empty error message for speed
378
+ }
379
+
380
+ bool Version::UpdateStats(const GetStats& stats) {
381
+ FileMetaData* f = stats.seek_file;
382
+ if (f != NULL) {
383
+ f->allowed_seeks--;
384
+ if (f->allowed_seeks <= 0 && file_to_compact_ == NULL) {
385
+ file_to_compact_ = f;
386
+ file_to_compact_level_ = stats.seek_file_level;
387
+ return true;
388
+ }
389
+ }
390
+ return false;
391
+ }
392
+
188
393
  void Version::Ref() {
189
394
  ++refs_;
190
395
  }
@@ -198,26 +403,89 @@ void Version::Unref() {
198
403
  }
199
404
  }
200
405
 
406
+ bool Version::OverlapInLevel(int level,
407
+ const Slice* smallest_user_key,
408
+ const Slice* largest_user_key) {
409
+ return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level],
410
+ smallest_user_key, largest_user_key);
411
+ }
412
+
413
+ int Version::PickLevelForMemTableOutput(
414
+ const Slice& smallest_user_key,
415
+ const Slice& largest_user_key) {
416
+ int level = 0;
417
+ if (!OverlapInLevel(0, &smallest_user_key, &largest_user_key)) {
418
+ // Push to next level if there is no overlap in next level,
419
+ // and the #bytes overlapping in the level after that are limited.
420
+ InternalKey start(smallest_user_key, kMaxSequenceNumber, kValueTypeForSeek);
421
+ InternalKey limit(largest_user_key, 0, static_cast<ValueType>(0));
422
+ std::vector<FileMetaData*> overlaps;
423
+ while (level < config::kMaxMemCompactLevel) {
424
+ if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
425
+ break;
426
+ }
427
+ GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
428
+ const int64_t sum = TotalFileSize(overlaps);
429
+ if (sum > kMaxGrandParentOverlapBytes) {
430
+ break;
431
+ }
432
+ level++;
433
+ }
434
+ }
435
+ return level;
436
+ }
437
+
438
+ // Store in "*inputs" all files in "level" that overlap [begin,end]
439
+ void Version::GetOverlappingInputs(
440
+ int level,
441
+ const InternalKey* begin,
442
+ const InternalKey* end,
443
+ std::vector<FileMetaData*>* inputs) {
444
+ inputs->clear();
445
+ Slice user_begin, user_end;
446
+ if (begin != NULL) {
447
+ user_begin = begin->user_key();
448
+ }
449
+ if (end != NULL) {
450
+ user_end = end->user_key();
451
+ }
452
+ const Comparator* user_cmp = vset_->icmp_.user_comparator();
453
+ for (size_t i = 0; i < files_[level].size(); i++) {
454
+ FileMetaData* f = files_[level][i];
455
+ if (begin != NULL &&
456
+ user_cmp->Compare(f->largest.user_key(), user_begin) < 0) {
457
+ // "f" is completely before specified range; skip it
458
+ } else if (end != NULL &&
459
+ user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
460
+ // "f" is completely after specified range; skip it
461
+ } else {
462
+ inputs->push_back(f);
463
+ }
464
+ }
465
+ }
466
+
201
467
  std::string Version::DebugString() const {
202
468
  std::string r;
203
469
  for (int level = 0; level < config::kNumLevels; level++) {
204
- // E.g., level 1: 17:123['a' .. 'd'] 20:43['e' .. 'g']
205
- r.append("level ");
470
+ // E.g.,
471
+ // --- level 1 ---
472
+ // 17:123['a' .. 'd']
473
+ // 20:43['e' .. 'g']
474
+ r.append("--- level ");
206
475
  AppendNumberTo(&r, level);
207
- r.push_back(':');
476
+ r.append(" ---\n");
208
477
  const std::vector<FileMetaData*>& files = files_[level];
209
478
  for (size_t i = 0; i < files.size(); i++) {
210
479
  r.push_back(' ');
211
480
  AppendNumberTo(&r, files[i]->number);
212
481
  r.push_back(':');
213
482
  AppendNumberTo(&r, files[i]->file_size);
214
- r.append("['");
215
- AppendEscapedStringTo(&r, files[i]->smallest.Encode());
216
- r.append("' .. '");
217
- AppendEscapedStringTo(&r, files[i]->largest.Encode());
218
- r.append("']");
483
+ r.append("[");
484
+ r.append(files[i]->smallest.DebugString());
485
+ r.append(" .. ");
486
+ r.append(files[i]->largest.DebugString());
487
+ r.append("]\n");
219
488
  }
220
- r.push_back('\n');
221
489
  }
222
490
  return r;
223
491
  }
@@ -267,10 +535,15 @@ class VersionSet::Builder {
267
535
 
268
536
  ~Builder() {
269
537
  for (int level = 0; level < config::kNumLevels; level++) {
270
- std::vector<FileMetaData*> to_unref(levels_[level].added_files->begin(),
271
- levels_[level].added_files->end());
272
- delete levels_[level].added_files;
273
- for (int i = 0; i < to_unref.size(); i++) {
538
+ const FileSet* added = levels_[level].added_files;
539
+ std::vector<FileMetaData*> to_unref;
540
+ to_unref.reserve(added->size());
541
+ for (FileSet::const_iterator it = added->begin();
542
+ it != added->end(); ++it) {
543
+ to_unref.push_back(*it);
544
+ }
545
+ delete added;
546
+ for (uint32_t i = 0; i < to_unref.size(); i++) {
274
547
  FileMetaData* f = to_unref[i];
275
548
  f->refs--;
276
549
  if (f->refs <= 0) {
@@ -305,6 +578,23 @@ class VersionSet::Builder {
305
578
  const int level = edit->new_files_[i].first;
306
579
  FileMetaData* f = new FileMetaData(edit->new_files_[i].second);
307
580
  f->refs = 1;
581
+
582
+ // We arrange to automatically compact this file after
583
+ // a certain number of seeks. Let's assume:
584
+ // (1) One seek costs 10ms
585
+ // (2) Writing or reading 1MB costs 10ms (100MB/s)
586
+ // (3) A compaction of 1MB does 25MB of IO:
587
+ // 1MB read from this level
588
+ // 10-12MB read from next level (boundaries may be misaligned)
589
+ // 10-12MB written to next level
590
+ // This implies that 25 seeks cost the same as the compaction
591
+ // of 1MB of data. I.e., one seek costs approximately the
592
+ // same as the compaction of 40KB of data. We are a little
593
+ // conservative and allow approximately one seek for every 16KB
594
+ // of data before triggering a compaction.
595
+ f->allowed_seeks = (f->file_size / 16384);
596
+ if (f->allowed_seeks < 100) f->allowed_seeks = 100;
597
+
308
598
  levels_[level].deleted_files.erase(f->number);
309
599
  levels_[level].added_files->insert(f);
310
600
  }
@@ -344,13 +634,13 @@ class VersionSet::Builder {
344
634
  #ifndef NDEBUG
345
635
  // Make sure there is no overlap in levels > 0
346
636
  if (level > 0) {
347
- for (int i = 1; i < v->files_[level].size(); i++) {
637
+ for (uint32_t i = 1; i < v->files_[level].size(); i++) {
348
638
  const InternalKey& prev_end = v->files_[level][i-1]->largest;
349
639
  const InternalKey& this_begin = v->files_[level][i]->smallest;
350
640
  if (vset_->icmp_.Compare(prev_end, this_begin) >= 0) {
351
641
  fprintf(stderr, "overlapping ranges in same level %s vs. %s\n",
352
- EscapeString(prev_end.Encode()).c_str(),
353
- EscapeString(this_begin.Encode()).c_str());
642
+ prev_end.DebugString().c_str(),
643
+ this_begin.DebugString().c_str());
354
644
  abort();
355
645
  }
356
646
  }
@@ -363,8 +653,14 @@ class VersionSet::Builder {
363
653
  if (levels_[level].deleted_files.count(f->number) > 0) {
364
654
  // File is deleted: do nothing
365
655
  } else {
656
+ std::vector<FileMetaData*>* files = &v->files_[level];
657
+ if (level > 0 && !files->empty()) {
658
+ // Must not overlap
659
+ assert(vset_->icmp_.Compare((*files)[files->size()-1]->largest,
660
+ f->smallest) < 0);
661
+ }
366
662
  f->refs++;
367
- v->files_[level].push_back(f);
663
+ files->push_back(f);
368
664
  }
369
665
  }
370
666
  };
@@ -414,7 +710,7 @@ void VersionSet::AppendVersion(Version* v) {
414
710
  v->next_->prev_ = v;
415
711
  }
416
712
 
417
- Status VersionSet::LogAndApply(VersionEdit* edit) {
713
+ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
418
714
  if (edit->has_log_number_) {
419
715
  assert(edit->log_number_ >= log_number_);
420
716
  assert(edit->log_number_ < next_file_number_);
@@ -442,6 +738,8 @@ Status VersionSet::LogAndApply(VersionEdit* edit) {
442
738
  std::string new_manifest_file;
443
739
  Status s;
444
740
  if (descriptor_log_ == NULL) {
741
+ // No reason to unlock *mu here since we only hit this path in the
742
+ // first call to LogAndApply (when opening the database).
445
743
  assert(descriptor_file_ == NULL);
446
744
  new_manifest_file = DescriptorFileName(dbname_, manifest_file_number_);
447
745
  edit->SetNextFile(next_file_number_);
@@ -452,20 +750,27 @@ Status VersionSet::LogAndApply(VersionEdit* edit) {
452
750
  }
453
751
  }
454
752
 
455
- // Write new record to MANIFEST log
456
- if (s.ok()) {
457
- std::string record;
458
- edit->EncodeTo(&record);
459
- s = descriptor_log_->AddRecord(record);
753
+ // Unlock during expensive MANIFEST log write
754
+ {
755
+ mu->Unlock();
756
+
757
+ // Write new record to MANIFEST log
460
758
  if (s.ok()) {
461
- s = descriptor_file_->Sync();
759
+ std::string record;
760
+ edit->EncodeTo(&record);
761
+ s = descriptor_log_->AddRecord(record);
762
+ if (s.ok()) {
763
+ s = descriptor_file_->Sync();
764
+ }
462
765
  }
463
- }
464
766
 
465
- // If we just created a new descriptor file, install it by writing a
466
- // new CURRENT file that points to it.
467
- if (s.ok() && !new_manifest_file.empty()) {
468
- s = SetCurrentFile(env_, dbname_, manifest_file_number_);
767
+ // If we just created a new descriptor file, install it by writing a
768
+ // new CURRENT file that points to it.
769
+ if (s.ok() && !new_manifest_file.empty()) {
770
+ s = SetCurrentFile(env_, dbname_, manifest_file_number_);
771
+ }
772
+
773
+ mu->Lock();
469
774
  }
470
775
 
471
776
  // Install the new version
@@ -581,6 +886,9 @@ Status VersionSet::Recover() {
581
886
  if (!have_prev_log_number) {
582
887
  prev_log_number = 0;
583
888
  }
889
+
890
+ MarkFileNumberUsed(prev_log_number);
891
+ MarkFileNumberUsed(log_number);
584
892
  }
585
893
 
586
894
  if (s.ok()) {
@@ -599,12 +907,10 @@ Status VersionSet::Recover() {
599
907
  return s;
600
908
  }
601
909
 
602
- static int64_t TotalFileSize(const std::vector<FileMetaData*>& files) {
603
- int64_t sum = 0;
604
- for (size_t i = 0; i < files.size(); i++) {
605
- sum += files[i]->file_size;
910
+ void VersionSet::MarkFileNumberUsed(uint64_t number) {
911
+ if (next_file_number_ <= number) {
912
+ next_file_number_ = number + 1;
606
913
  }
607
- return sum;
608
914
  }
609
915
 
610
916
  void VersionSet::Finalize(Version* v) {
@@ -749,10 +1055,11 @@ int64_t VersionSet::NumLevelBytes(int level) const {
749
1055
  int64_t VersionSet::MaxNextLevelOverlappingBytes() {
750
1056
  int64_t result = 0;
751
1057
  std::vector<FileMetaData*> overlaps;
752
- for (int level = 0; level < config::kNumLevels - 1; level++) {
1058
+ for (int level = 1; level < config::kNumLevels - 1; level++) {
753
1059
  for (size_t i = 0; i < current_->files_[level].size(); i++) {
754
1060
  const FileMetaData* f = current_->files_[level][i];
755
- GetOverlappingInputs(level+1, f->smallest, f->largest, &overlaps);
1061
+ current_->GetOverlappingInputs(level+1, &f->smallest, &f->largest,
1062
+ &overlaps);
756
1063
  const int64_t sum = TotalFileSize(overlaps);
757
1064
  if (sum > result) {
758
1065
  result = sum;
@@ -762,27 +1069,6 @@ int64_t VersionSet::MaxNextLevelOverlappingBytes() {
762
1069
  return result;
763
1070
  }
764
1071
 
765
- // Store in "*inputs" all files in "level" that overlap [begin,end]
766
- void VersionSet::GetOverlappingInputs(
767
- int level,
768
- const InternalKey& begin,
769
- const InternalKey& end,
770
- std::vector<FileMetaData*>* inputs) {
771
- inputs->clear();
772
- Slice user_begin = begin.user_key();
773
- Slice user_end = end.user_key();
774
- const Comparator* user_cmp = icmp_.user_comparator();
775
- for (size_t i = 0; i < current_->files_[level].size(); i++) {
776
- FileMetaData* f = current_->files_[level][i];
777
- if (user_cmp->Compare(f->largest.user_key(), user_begin) < 0 ||
778
- user_cmp->Compare(f->smallest.user_key(), user_end) > 0) {
779
- // Either completely before or after range; skip it
780
- } else {
781
- inputs->push_back(f);
782
- }
783
- }
784
- }
785
-
786
1072
  // Stores the minimal range that covers all entries in inputs in
787
1073
  // *smallest, *largest.
788
1074
  // REQUIRES: inputs is not empty
@@ -854,31 +1140,43 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) {
854
1140
  }
855
1141
 
856
1142
  Compaction* VersionSet::PickCompaction() {
857
- if (!NeedsCompaction()) {
1143
+ Compaction* c;
1144
+ int level;
1145
+
1146
+ // We prefer compactions triggered by too much data in a level over
1147
+ // the compactions triggered by seeks.
1148
+ const bool size_compaction = (current_->compaction_score_ >= 1);
1149
+ const bool seek_compaction = (current_->file_to_compact_ != NULL);
1150
+ if (size_compaction) {
1151
+ level = current_->compaction_level_;
1152
+ assert(level >= 0);
1153
+ assert(level+1 < config::kNumLevels);
1154
+ c = new Compaction(level);
1155
+
1156
+ // Pick the first file that comes after compact_pointer_[level]
1157
+ for (size_t i = 0; i < current_->files_[level].size(); i++) {
1158
+ FileMetaData* f = current_->files_[level][i];
1159
+ if (compact_pointer_[level].empty() ||
1160
+ icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
1161
+ c->inputs_[0].push_back(f);
1162
+ break;
1163
+ }
1164
+ }
1165
+ if (c->inputs_[0].empty()) {
1166
+ // Wrap-around to the beginning of the key space
1167
+ c->inputs_[0].push_back(current_->files_[level][0]);
1168
+ }
1169
+ } else if (seek_compaction) {
1170
+ level = current_->file_to_compact_level_;
1171
+ c = new Compaction(level);
1172
+ c->inputs_[0].push_back(current_->file_to_compact_);
1173
+ } else {
858
1174
  return NULL;
859
1175
  }
860
- const int level = current_->compaction_level_;
861
- assert(level >= 0);
862
- assert(level+1 < config::kNumLevels);
863
1176
 
864
- Compaction* c = new Compaction(level);
865
1177
  c->input_version_ = current_;
866
1178
  c->input_version_->Ref();
867
1179
 
868
- // Pick the first file that comes after compact_pointer_[level]
869
- for (size_t i = 0; i < current_->files_[level].size(); i++) {
870
- FileMetaData* f = current_->files_[level][i];
871
- if (compact_pointer_[level].empty() ||
872
- icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) {
873
- c->inputs_[0].push_back(f);
874
- break;
875
- }
876
- }
877
- if (c->inputs_[0].empty()) {
878
- // Wrap-around to the beginning of the key space
879
- c->inputs_[0].push_back(current_->files_[level][0]);
880
- }
881
-
882
1180
  // Files in level 0 may overlap each other, so pick up all overlapping ones
883
1181
  if (level == 0) {
884
1182
  InternalKey smallest, largest;
@@ -886,7 +1184,7 @@ Compaction* VersionSet::PickCompaction() {
886
1184
  // Note that the next call will discard the file we placed in
887
1185
  // c->inputs_[0] earlier and replace it with an overlapping set
888
1186
  // which will include the picked file.
889
- GetOverlappingInputs(0, smallest, largest, &c->inputs_[0]);
1187
+ current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]);
890
1188
  assert(!c->inputs_[0].empty());
891
1189
  }
892
1190
 
@@ -900,7 +1198,7 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
900
1198
  InternalKey smallest, largest;
901
1199
  GetRange(c->inputs_[0], &smallest, &largest);
902
1200
 
903
- GetOverlappingInputs(level+1, smallest, largest, &c->inputs_[1]);
1201
+ current_->GetOverlappingInputs(level+1, &smallest, &largest, &c->inputs_[1]);
904
1202
 
905
1203
  // Get entire range covered by compaction
906
1204
  InternalKey all_start, all_limit;
@@ -910,14 +1208,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
910
1208
  // changing the number of "level+1" files we pick up.
911
1209
  if (!c->inputs_[1].empty()) {
912
1210
  std::vector<FileMetaData*> expanded0;
913
- GetOverlappingInputs(level, all_start, all_limit, &expanded0);
1211
+ current_->GetOverlappingInputs(level, &all_start, &all_limit, &expanded0);
914
1212
  if (expanded0.size() > c->inputs_[0].size()) {
915
1213
  InternalKey new_start, new_limit;
916
1214
  GetRange(expanded0, &new_start, &new_limit);
917
1215
  std::vector<FileMetaData*> expanded1;
918
- GetOverlappingInputs(level+1, new_start, new_limit, &expanded1);
1216
+ current_->GetOverlappingInputs(level+1, &new_start, &new_limit,
1217
+ &expanded1);
919
1218
  if (expanded1.size() == c->inputs_[1].size()) {
920
- Log(env_, options_->info_log,
1219
+ Log(options_->info_log,
921
1220
  "Expanding@%d %d+%d to %d+%d\n",
922
1221
  level,
923
1222
  int(c->inputs_[0].size()),
@@ -936,14 +1235,15 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
936
1235
  // Compute the set of grandparent files that overlap this compaction
937
1236
  // (parent == level+1; grandparent == level+2)
938
1237
  if (level + 2 < config::kNumLevels) {
939
- GetOverlappingInputs(level + 2, all_start, all_limit, &c->grandparents_);
1238
+ current_->GetOverlappingInputs(level + 2, &all_start, &all_limit,
1239
+ &c->grandparents_);
940
1240
  }
941
1241
 
942
1242
  if (false) {
943
- Log(env_, options_->info_log, "Compacting %d '%s' .. '%s'",
1243
+ Log(options_->info_log, "Compacting %d '%s' .. '%s'",
944
1244
  level,
945
- EscapeString(smallest.Encode()).c_str(),
946
- EscapeString(largest.Encode()).c_str());
1245
+ smallest.DebugString().c_str(),
1246
+ largest.DebugString().c_str());
947
1247
  }
948
1248
 
949
1249
  // Update the place where we will do the next compaction for this level.
@@ -956,14 +1256,26 @@ void VersionSet::SetupOtherInputs(Compaction* c) {
956
1256
 
957
1257
  Compaction* VersionSet::CompactRange(
958
1258
  int level,
959
- const InternalKey& begin,
960
- const InternalKey& end) {
1259
+ const InternalKey* begin,
1260
+ const InternalKey* end) {
961
1261
  std::vector<FileMetaData*> inputs;
962
- GetOverlappingInputs(level, begin, end, &inputs);
1262
+ current_->GetOverlappingInputs(level, begin, end, &inputs);
963
1263
  if (inputs.empty()) {
964
1264
  return NULL;
965
1265
  }
966
1266
 
1267
+ // Avoid compacting too much in one shot in case the range is large.
1268
+ const uint64_t limit = MaxFileSizeForLevel(level);
1269
+ uint64_t total = 0;
1270
+ for (int i = 0; i < inputs.size(); i++) {
1271
+ uint64_t s = inputs[i]->file_size;
1272
+ total += s;
1273
+ if (total >= limit) {
1274
+ inputs.resize(i + 1);
1275
+ break;
1276
+ }
1277
+ }
1278
+
967
1279
  Compaction* c = new Compaction(level);
968
1280
  c->input_version_ = current_;
969
1281
  c->input_version_->Ref();