leveldb-ruby 0.7 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/README +1 -1
  2. data/leveldb/Makefile +70 -29
  3. data/leveldb/build_detect_platform +74 -0
  4. data/leveldb/db/builder.cc +2 -4
  5. data/leveldb/db/builder.h +4 -6
  6. data/leveldb/db/c.cc +471 -0
  7. data/leveldb/db/corruption_test.cc +21 -16
  8. data/leveldb/db/db_bench.cc +400 -200
  9. data/leveldb/db/db_impl.cc +276 -131
  10. data/leveldb/db/db_impl.h +22 -10
  11. data/leveldb/db/db_iter.cc +2 -1
  12. data/leveldb/db/db_test.cc +391 -43
  13. data/leveldb/db/dbformat.cc +31 -0
  14. data/leveldb/db/dbformat.h +51 -1
  15. data/leveldb/db/filename.h +1 -1
  16. data/leveldb/db/log_format.h +1 -1
  17. data/leveldb/db/log_reader.cc +16 -11
  18. data/leveldb/db/memtable.cc +37 -0
  19. data/leveldb/db/memtable.h +6 -0
  20. data/leveldb/db/repair.cc +17 -14
  21. data/leveldb/db/skiplist_test.cc +2 -2
  22. data/leveldb/db/version_edit.cc +7 -9
  23. data/leveldb/db/version_edit.h +2 -1
  24. data/leveldb/db/version_set.cc +416 -104
  25. data/leveldb/db/version_set.h +78 -14
  26. data/leveldb/db/version_set_test.cc +179 -0
  27. data/leveldb/db/write_batch_internal.h +2 -0
  28. data/leveldb/include/leveldb/c.h +246 -0
  29. data/leveldb/include/leveldb/db.h +14 -2
  30. data/leveldb/include/leveldb/env.h +31 -10
  31. data/leveldb/include/leveldb/options.h +7 -18
  32. data/leveldb/include/leveldb/slice.h +2 -2
  33. data/leveldb/include/leveldb/status.h +1 -1
  34. data/leveldb/port/atomic_pointer.h +144 -0
  35. data/leveldb/port/port.h +0 -2
  36. data/leveldb/port/port_android.h +7 -1
  37. data/leveldb/port/port_example.h +11 -1
  38. data/leveldb/port/port_posix.h +56 -38
  39. data/leveldb/table/format.cc +12 -8
  40. data/leveldb/table/table_test.cc +16 -7
  41. data/leveldb/util/cache.cc +173 -100
  42. data/leveldb/util/cache_test.cc +28 -11
  43. data/leveldb/util/coding.h +4 -4
  44. data/leveldb/util/comparator.cc +1 -0
  45. data/leveldb/util/env.cc +10 -5
  46. data/leveldb/util/env_posix.cc +48 -87
  47. data/leveldb/util/histogram.cc +11 -0
  48. data/leveldb/util/histogram.h +1 -0
  49. data/leveldb/util/posix_logger.h +98 -0
  50. data/leveldb/util/testharness.cc +12 -0
  51. data/leveldb/util/testharness.h +10 -1
  52. data/lib/leveldb.rb +11 -3
  53. metadata +41 -22
data/leveldb/db/db_impl.h CHANGED
@@ -38,14 +38,12 @@ class DBImpl : public DB {
38
38
  virtual void ReleaseSnapshot(const Snapshot* snapshot);
39
39
  virtual bool GetProperty(const Slice& property, std::string* value);
40
40
  virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
41
+ virtual void CompactRange(const Slice* begin, const Slice* end);
41
42
 
42
43
  // Extra methods (for testing) that are not in the public DB interface
43
44
 
44
- // Compact any files in the named level that overlap [begin,end]
45
- void TEST_CompactRange(
46
- int level,
47
- const std::string& begin,
48
- const std::string& end);
45
+ // Compact any files in the named level that overlap [*begin,*end]
46
+ void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
49
47
 
50
48
  // Force current memtable contents to be compacted.
51
49
  Status TEST_CompactMemTable();
@@ -85,7 +83,12 @@ class DBImpl : public DB {
85
83
  VersionEdit* edit,
86
84
  SequenceNumber* max_sequence);
87
85
 
88
- Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
86
+ Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base);
87
+
88
+ // Only thread is allowed to log at a time.
89
+ struct LoggerId { }; // Opaque identifier for logging thread
90
+ void AcquireLoggingResponsibility(LoggerId* self);
91
+ void ReleaseLoggingResponsibility(LoggerId* self);
89
92
 
90
93
  Status MakeRoomForWrite(bool force /* compact even if there is room? */);
91
94
 
@@ -119,13 +122,15 @@ class DBImpl : public DB {
119
122
  // State below is protected by mutex_
120
123
  port::Mutex mutex_;
121
124
  port::AtomicPointer shutting_down_;
122
- port::CondVar bg_cv_; // Signalled when !bg_compaction_scheduled_
123
- port::CondVar compacting_cv_; // Signalled when !compacting_
125
+ port::CondVar bg_cv_; // Signalled when background work finishes
124
126
  MemTable* mem_;
125
127
  MemTable* imm_; // Memtable being compacted
126
128
  port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
127
129
  WritableFile* logfile_;
130
+ uint64_t logfile_number_;
128
131
  log::Writer* log_;
132
+ LoggerId* logger_; // NULL, or the id of the current logging thread
133
+ port::CondVar logger_cv_; // For threads waiting to log
129
134
  SnapshotList snapshots_;
130
135
 
131
136
  // Set of table files to protect from deletion because they are
@@ -135,8 +140,15 @@ class DBImpl : public DB {
135
140
  // Has a background compaction been scheduled or is running?
136
141
  bool bg_compaction_scheduled_;
137
142
 
138
- // Is there a compaction running?
139
- bool compacting_;
143
+ // Information for a manual compaction
144
+ struct ManualCompaction {
145
+ int level;
146
+ bool done;
147
+ const InternalKey* begin; // NULL means beginning of key range
148
+ const InternalKey* end; // NULL means end of key range
149
+ InternalKey tmp_storage; // Used to keep track of compaction progress
150
+ };
151
+ ManualCompaction* manual_compaction_;
140
152
 
141
153
  VersionSet* versions_;
142
154
 
@@ -216,7 +216,6 @@ void DBIter::FindPrevUserEntry() {
216
216
 
217
217
  ValueType value_type = kTypeDeletion;
218
218
  if (iter_->Valid()) {
219
- SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
220
219
  do {
221
220
  ParsedInternalKey ikey;
222
221
  if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
@@ -227,6 +226,7 @@ void DBIter::FindPrevUserEntry() {
227
226
  }
228
227
  value_type = ikey.type;
229
228
  if (value_type == kTypeDeletion) {
229
+ saved_key_.clear();
230
230
  ClearSavedValue();
231
231
  } else {
232
232
  Slice raw_value = iter_->value();
@@ -234,6 +234,7 @@ void DBIter::FindPrevUserEntry() {
234
234
  std::string empty;
235
235
  swap(empty, saved_value_);
236
236
  }
237
+ SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
237
238
  saved_value_.assign(raw_value.data(), raw_value.size());
238
239
  }
239
240
  }
@@ -10,6 +10,7 @@
10
10
  #include "leveldb/env.h"
11
11
  #include "leveldb/table.h"
12
12
  #include "util/logging.h"
13
+ #include "util/mutexlock.h"
13
14
  #include "util/testharness.h"
14
15
  #include "util/testutil.h"
15
16
 
@@ -21,15 +22,58 @@ static std::string RandomString(Random* rnd, int len) {
21
22
  return r;
22
23
  }
23
24
 
25
+ // Special Env used to delay background operations
26
+ class SpecialEnv : public EnvWrapper {
27
+ public:
28
+ // sstable Sync() calls are blocked while this pointer is non-NULL.
29
+ port::AtomicPointer delay_sstable_sync_;
30
+
31
+ explicit SpecialEnv(Env* base) : EnvWrapper(base) {
32
+ delay_sstable_sync_.Release_Store(NULL);
33
+ }
34
+
35
+ Status NewWritableFile(const std::string& f, WritableFile** r) {
36
+ class SSTableFile : public WritableFile {
37
+ private:
38
+ SpecialEnv* env_;
39
+ WritableFile* base_;
40
+
41
+ public:
42
+ SSTableFile(SpecialEnv* env, WritableFile* base)
43
+ : env_(env),
44
+ base_(base) {
45
+ }
46
+ ~SSTableFile() { delete base_; }
47
+ Status Append(const Slice& data) { return base_->Append(data); }
48
+ Status Close() { return base_->Close(); }
49
+ Status Flush() { return base_->Flush(); }
50
+ Status Sync() {
51
+ while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
52
+ env_->SleepForMicroseconds(100000);
53
+ }
54
+ return base_->Sync();
55
+ }
56
+ };
57
+
58
+ Status s = target()->NewWritableFile(f, r);
59
+ if (s.ok()) {
60
+ if (strstr(f.c_str(), ".sst") != NULL) {
61
+ *r = new SSTableFile(this, *r);
62
+ }
63
+ }
64
+ return s;
65
+ }
66
+ };
67
+
24
68
  class DBTest {
25
69
  public:
26
70
  std::string dbname_;
27
- Env* env_;
71
+ SpecialEnv* env_;
28
72
  DB* db_;
29
73
 
30
74
  Options last_options_;
31
75
 
32
- DBTest() : env_(Env::Default()) {
76
+ DBTest() : env_(new SpecialEnv(Env::Default())) {
33
77
  dbname_ = test::TmpDir() + "/db_test";
34
78
  DestroyDB(dbname_, Options());
35
79
  db_ = NULL;
@@ -39,6 +83,7 @@ class DBTest {
39
83
  ~DBTest() {
40
84
  delete db_;
41
85
  DestroyDB(dbname_, Options());
86
+ delete env_;
42
87
  }
43
88
 
44
89
  DBImpl* dbfull() {
@@ -142,6 +187,31 @@ class DBTest {
142
187
  return atoi(property.c_str());
143
188
  }
144
189
 
190
+ int TotalTableFiles() {
191
+ int result = 0;
192
+ for (int level = 0; level < config::kNumLevels; level++) {
193
+ result += NumTableFilesAtLevel(level);
194
+ }
195
+ return result;
196
+ }
197
+
198
+ // Return spread of files per level
199
+ std::string FilesPerLevel() {
200
+ std::string result;
201
+ int last_non_zero_offset = 0;
202
+ for (int level = 0; level < config::kNumLevels; level++) {
203
+ int f = NumTableFilesAtLevel(level);
204
+ char buf[100];
205
+ snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
206
+ result += buf;
207
+ if (f > 0) {
208
+ last_non_zero_offset = result.size();
209
+ }
210
+ }
211
+ result.resize(last_non_zero_offset);
212
+ return result;
213
+ }
214
+
145
215
  uint64_t Size(const Slice& start, const Slice& limit) {
146
216
  Range r(start, limit);
147
217
  uint64_t size;
@@ -150,18 +220,25 @@ class DBTest {
150
220
  }
151
221
 
152
222
  void Compact(const Slice& start, const Slice& limit) {
153
- dbfull()->TEST_CompactMemTable();
154
- int max_level_with_files = 1;
155
- for (int level = 1; level < config::kNumLevels; level++) {
156
- if (NumTableFilesAtLevel(level) > 0) {
157
- max_level_with_files = level;
158
- }
159
- }
160
- for (int level = 0; level < max_level_with_files; level++) {
161
- dbfull()->TEST_CompactRange(level, "", "~");
223
+ db_->CompactRange(&start, &limit);
224
+ }
225
+
226
+ // Do n memtable compactions, each of which produces an sstable
227
+ // covering the range [small,large].
228
+ void MakeTables(int n, const std::string& small, const std::string& large) {
229
+ for (int i = 0; i < n; i++) {
230
+ Put(small, "begin");
231
+ Put(large, "end");
232
+ dbfull()->TEST_CompactMemTable();
162
233
  }
163
234
  }
164
235
 
236
+ // Prevent pushing of new sstables into deeper levels by adding
237
+ // tables that cover a specified range to all levels.
238
+ void FillLevels(const std::string& smallest, const std::string& largest) {
239
+ MakeTables(config::kNumLevels, smallest, largest);
240
+ }
241
+
165
242
  void DumpFileCounts(const char* label) {
166
243
  fprintf(stderr, "---\n%s:\n", label);
167
244
  fprintf(stderr, "maxoverlap: %lld\n",
@@ -175,6 +252,12 @@ class DBTest {
175
252
  }
176
253
  }
177
254
 
255
+ std::string DumpSSTableList() {
256
+ std::string property;
257
+ db_->GetProperty("leveldb.sstables", &property);
258
+ return property;
259
+ }
260
+
178
261
  std::string IterStatus(Iterator* iter) {
179
262
  std::string result;
180
263
  if (iter->Valid()) {
@@ -209,6 +292,115 @@ TEST(DBTest, PutDeleteGet) {
209
292
  ASSERT_EQ("NOT_FOUND", Get("foo"));
210
293
  }
211
294
 
295
+ TEST(DBTest, GetFromImmutableLayer) {
296
+ Options options;
297
+ options.env = env_;
298
+ options.write_buffer_size = 100000; // Small write buffer
299
+ Reopen(&options);
300
+
301
+ ASSERT_OK(Put("foo", "v1"));
302
+ ASSERT_EQ("v1", Get("foo"));
303
+
304
+ env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
305
+ Put("k1", std::string(100000, 'x')); // Fill memtable
306
+ Put("k2", std::string(100000, 'y')); // Trigger compaction
307
+ ASSERT_EQ("v1", Get("foo"));
308
+ env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
309
+ }
310
+
311
+ TEST(DBTest, GetFromVersions) {
312
+ ASSERT_OK(Put("foo", "v1"));
313
+ dbfull()->TEST_CompactMemTable();
314
+ ASSERT_EQ("v1", Get("foo"));
315
+ }
316
+
317
+ TEST(DBTest, GetSnapshot) {
318
+ // Try with both a short key and a long key
319
+ for (int i = 0; i < 2; i++) {
320
+ std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
321
+ ASSERT_OK(Put(key, "v1"));
322
+ const Snapshot* s1 = db_->GetSnapshot();
323
+ ASSERT_OK(Put(key, "v2"));
324
+ ASSERT_EQ("v2", Get(key));
325
+ ASSERT_EQ("v1", Get(key, s1));
326
+ dbfull()->TEST_CompactMemTable();
327
+ ASSERT_EQ("v2", Get(key));
328
+ ASSERT_EQ("v1", Get(key, s1));
329
+ db_->ReleaseSnapshot(s1);
330
+ }
331
+ }
332
+
333
+ TEST(DBTest, GetLevel0Ordering) {
334
+ // Check that we process level-0 files in correct order. The code
335
+ // below generates two level-0 files where the earlier one comes
336
+ // before the later one in the level-0 file list since the earlier
337
+ // one has a smaller "smallest" key.
338
+ ASSERT_OK(Put("bar", "b"));
339
+ ASSERT_OK(Put("foo", "v1"));
340
+ dbfull()->TEST_CompactMemTable();
341
+ ASSERT_OK(Put("foo", "v2"));
342
+ dbfull()->TEST_CompactMemTable();
343
+ ASSERT_EQ("v2", Get("foo"));
344
+ }
345
+
346
+ TEST(DBTest, GetOrderedByLevels) {
347
+ ASSERT_OK(Put("foo", "v1"));
348
+ Compact("a", "z");
349
+ ASSERT_EQ("v1", Get("foo"));
350
+ ASSERT_OK(Put("foo", "v2"));
351
+ ASSERT_EQ("v2", Get("foo"));
352
+ dbfull()->TEST_CompactMemTable();
353
+ ASSERT_EQ("v2", Get("foo"));
354
+ }
355
+
356
+ TEST(DBTest, GetPicksCorrectFile) {
357
+ // Arrange to have multiple files in a non-level-0 level.
358
+ ASSERT_OK(Put("a", "va"));
359
+ Compact("a", "b");
360
+ ASSERT_OK(Put("x", "vx"));
361
+ Compact("x", "y");
362
+ ASSERT_OK(Put("f", "vf"));
363
+ Compact("f", "g");
364
+ ASSERT_EQ("va", Get("a"));
365
+ ASSERT_EQ("vf", Get("f"));
366
+ ASSERT_EQ("vx", Get("x"));
367
+ }
368
+
369
+ TEST(DBTest, GetEncountersEmptyLevel) {
370
+ // Arrange for the following to happen:
371
+ // * sstable A in level 0
372
+ // * nothing in level 1
373
+ // * sstable B in level 2
374
+ // Then do enough Get() calls to arrange for an automatic compaction
375
+ // of sstable A. A bug would cause the compaction to be marked as
376
+ // occuring at level 1 (instead of the correct level 0).
377
+
378
+ // Step 1: First place sstables in levels 0 and 2
379
+ int compaction_count = 0;
380
+ while (NumTableFilesAtLevel(0) == 0 ||
381
+ NumTableFilesAtLevel(2) == 0) {
382
+ ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
383
+ compaction_count++;
384
+ Put("a", "begin");
385
+ Put("z", "end");
386
+ dbfull()->TEST_CompactMemTable();
387
+ }
388
+
389
+ // Step 2: clear level 1 if necessary.
390
+ dbfull()->TEST_CompactRange(1, NULL, NULL);
391
+ ASSERT_EQ(NumTableFilesAtLevel(0), 1);
392
+ ASSERT_EQ(NumTableFilesAtLevel(1), 0);
393
+ ASSERT_EQ(NumTableFilesAtLevel(2), 1);
394
+
395
+ // Step 3: read until level 0 compaction disappears.
396
+ int read_count = 0;
397
+ while (NumTableFilesAtLevel(0) > 0) {
398
+ ASSERT_LE(read_count, 10000) << "did not trigger level 0 compaction";
399
+ read_count++;
400
+ ASSERT_EQ("NOT_FOUND", Get("missing"));
401
+ }
402
+ }
403
+
212
404
  TEST(DBTest, IterEmpty) {
213
405
  Iterator* iter = db_->NewIterator(ReadOptions());
214
406
 
@@ -383,6 +575,21 @@ TEST(DBTest, IterSmallAndLargeMix) {
383
575
  delete iter;
384
576
  }
385
577
 
578
+ TEST(DBTest, IterMultiWithDelete) {
579
+ ASSERT_OK(Put("a", "va"));
580
+ ASSERT_OK(Put("b", "vb"));
581
+ ASSERT_OK(Put("c", "vc"));
582
+ ASSERT_OK(Delete("b"));
583
+ ASSERT_EQ("NOT_FOUND", Get("b"));
584
+
585
+ Iterator* iter = db_->NewIterator(ReadOptions());
586
+ iter->Seek("c");
587
+ ASSERT_EQ(IterStatus(iter), "c->vc");
588
+ iter->Prev();
589
+ ASSERT_EQ(IterStatus(iter), "a->va");
590
+ delete iter;
591
+ }
592
+
386
593
  TEST(DBTest, Recover) {
387
594
  ASSERT_OK(Put("foo", "v1"));
388
595
  ASSERT_OK(Put("baz", "v5"));
@@ -413,6 +620,27 @@ TEST(DBTest, RecoveryWithEmptyLog) {
413
620
  ASSERT_EQ("v3", Get("foo"));
414
621
  }
415
622
 
623
+ // Check that writes done during a memtable compaction are recovered
624
+ // if the database is shutdown during the memtable compaction.
625
+ TEST(DBTest, RecoverDuringMemtableCompaction) {
626
+ Options options;
627
+ options.env = env_;
628
+ options.write_buffer_size = 1000000;
629
+ Reopen(&options);
630
+
631
+ // Trigger a long memtable compaction and reopen the database during it
632
+ ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
633
+ ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
634
+ ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
635
+ ASSERT_OK(Put("bar", "v2")); // Goes to new log file
636
+
637
+ Reopen(&options);
638
+ ASSERT_EQ("v1", Get("foo"));
639
+ ASSERT_EQ("v2", Get("bar"));
640
+ ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
641
+ ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
642
+ }
643
+
416
644
  static std::string Key(int i) {
417
645
  char buf[100];
418
646
  snprintf(buf, sizeof(buf), "key%06d", i);
@@ -426,11 +654,11 @@ TEST(DBTest, MinorCompactionsHappen) {
426
654
 
427
655
  const int N = 500;
428
656
 
429
- int starting_num_tables = NumTableFilesAtLevel(0);
657
+ int starting_num_tables = TotalTableFiles();
430
658
  for (int i = 0; i < N; i++) {
431
659
  ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
432
660
  }
433
- int ending_num_tables = NumTableFilesAtLevel(0);
661
+ int ending_num_tables = TotalTableFiles();
434
662
  ASSERT_GT(ending_num_tables, starting_num_tables);
435
663
 
436
664
  for (int i = 0; i < N; i++) {
@@ -485,7 +713,7 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) {
485
713
 
486
714
  // Reopening moves updates to level-0
487
715
  Reopen(&options);
488
- dbfull()->TEST_CompactRange(0, "", Key(100000));
716
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
489
717
 
490
718
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
491
719
  ASSERT_GT(NumTableFilesAtLevel(1), 1);
@@ -494,11 +722,32 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) {
494
722
  }
495
723
  }
496
724
 
725
+ TEST(DBTest, RepeatedWritesToSameKey) {
726
+ Options options;
727
+ options.env = env_;
728
+ options.write_buffer_size = 100000; // Small write buffer
729
+ Reopen(&options);
730
+
731
+ // We must have at most one file per level except for level-0,
732
+ // which may have up to kL0_StopWritesTrigger files.
733
+ const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
734
+
735
+ Random rnd(301);
736
+ std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
737
+ for (int i = 0; i < 5 * kMaxFiles; i++) {
738
+ Put("key", value);
739
+ ASSERT_LE(TotalTableFiles(), kMaxFiles);
740
+ fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
741
+ }
742
+ }
743
+
497
744
  TEST(DBTest, SparseMerge) {
498
745
  Options options;
499
746
  options.compression = kNoCompression;
500
747
  Reopen(&options);
501
748
 
749
+ FillLevels("A", "Z");
750
+
502
751
  // Suppose there is:
503
752
  // small amount of data with prefix A
504
753
  // large amount of data with prefix B
@@ -514,7 +763,8 @@ TEST(DBTest, SparseMerge) {
514
763
  Put(key, value);
515
764
  }
516
765
  Put("C", "vc");
517
- Compact("", "z");
766
+ dbfull()->TEST_CompactMemTable();
767
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
518
768
 
519
769
  // Make sparse update
520
770
  Put("A", "va2");
@@ -525,9 +775,9 @@ TEST(DBTest, SparseMerge) {
525
775
  // Compactions should not cause us to create a situation where
526
776
  // a file overlaps too much data at the next level.
527
777
  ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
528
- dbfull()->TEST_CompactRange(0, "", "z");
778
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
529
779
  ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
530
- dbfull()->TEST_CompactRange(1, "", "z");
780
+ dbfull()->TEST_CompactRange(1, NULL, NULL);
531
781
  ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
532
782
  }
533
783
 
@@ -578,9 +828,11 @@ TEST(DBTest, ApproximateSizes) {
578
828
  ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
579
829
  ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
580
830
 
581
- dbfull()->TEST_CompactRange(0,
582
- Key(compact_start),
583
- Key(compact_start + 9));
831
+ std::string cstart_str = Key(compact_start);
832
+ std::string cend_str = Key(compact_start + 9);
833
+ Slice cstart = cstart_str;
834
+ Slice cend = cend_str;
835
+ dbfull()->TEST_CompactRange(0, &cstart, &cend);
584
836
  }
585
837
 
586
838
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
@@ -620,7 +872,7 @@ TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
620
872
 
621
873
  ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
622
874
 
623
- dbfull()->TEST_CompactRange(0, Key(0), Key(100));
875
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
624
876
  }
625
877
  }
626
878
 
@@ -675,6 +927,8 @@ TEST(DBTest, Snapshot) {
675
927
 
676
928
  TEST(DBTest, HiddenValuesAreRemoved) {
677
929
  Random rnd(301);
930
+ FillLevels("a", "z");
931
+
678
932
  std::string big = RandomString(&rnd, 50000);
679
933
  Put("foo", big);
680
934
  Put("pastfoo", "v");
@@ -689,11 +943,12 @@ TEST(DBTest, HiddenValuesAreRemoved) {
689
943
  ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
690
944
  db_->ReleaseSnapshot(snapshot);
691
945
  ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
692
- dbfull()->TEST_CompactRange(0, "", "x");
946
+ Slice x("x");
947
+ dbfull()->TEST_CompactRange(0, NULL, &x);
693
948
  ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
694
949
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
695
950
  ASSERT_GE(NumTableFilesAtLevel(1), 1);
696
- dbfull()->TEST_CompactRange(1, "", "x");
951
+ dbfull()->TEST_CompactRange(1, NULL, &x);
697
952
  ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
698
953
 
699
954
  ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
@@ -702,43 +957,97 @@ TEST(DBTest, HiddenValuesAreRemoved) {
702
957
  TEST(DBTest, DeletionMarkers1) {
703
958
  Put("foo", "v1");
704
959
  ASSERT_OK(dbfull()->TEST_CompactMemTable());
705
- dbfull()->TEST_CompactRange(0, "", "z");
706
- dbfull()->TEST_CompactRange(1, "", "z");
707
- ASSERT_EQ(NumTableFilesAtLevel(2), 1); // foo => v1 is now in level 2 file
960
+ const int last = config::kMaxMemCompactLevel;
961
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
962
+
963
+ // Place a table at level last-1 to prevent merging with preceding mutation
964
+ Put("a", "begin");
965
+ Put("z", "end");
966
+ dbfull()->TEST_CompactMemTable();
967
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1);
968
+ ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
969
+
708
970
  Delete("foo");
709
971
  Put("foo", "v2");
710
972
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
711
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
973
+ ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
712
974
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
713
- dbfull()->TEST_CompactRange(0, "", "z");
975
+ Slice z("z");
976
+ dbfull()->TEST_CompactRange(last-2, NULL, &z);
714
977
  // DEL eliminated, but v1 remains because we aren't compacting that level
715
978
  // (DEL can be eliminated because v2 hides v1).
716
979
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
717
- dbfull()->TEST_CompactRange(1, "", "z");
718
- // Merging L1 w/ L2, so we are the base level for "foo", so DEL is removed.
719
- // (as is v1).
980
+ dbfull()->TEST_CompactRange(last-1, NULL, NULL);
981
+ // Merging last-1 w/ last, so we are the base level for "foo", so
982
+ // DEL is removed. (as is v1).
720
983
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
721
984
  }
722
985
 
723
986
  TEST(DBTest, DeletionMarkers2) {
724
987
  Put("foo", "v1");
725
988
  ASSERT_OK(dbfull()->TEST_CompactMemTable());
726
- dbfull()->TEST_CompactRange(0, "", "z");
727
- dbfull()->TEST_CompactRange(1, "", "z");
728
- ASSERT_EQ(NumTableFilesAtLevel(2), 1); // foo => v1 is now in level 2 file
989
+ const int last = config::kMaxMemCompactLevel;
990
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
991
+
992
+ // Place a table at level last-1 to prevent merging with preceding mutation
993
+ Put("a", "begin");
994
+ Put("z", "end");
995
+ dbfull()->TEST_CompactMemTable();
996
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1);
997
+ ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
998
+
729
999
  Delete("foo");
730
1000
  ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
731
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
1001
+ ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
732
1002
  ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
733
- dbfull()->TEST_CompactRange(0, "", "z");
734
- // DEL kept: L2 file overlaps
1003
+ dbfull()->TEST_CompactRange(last-2, NULL, NULL);
1004
+ // DEL kept: "last" file overlaps
735
1005
  ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
736
- dbfull()->TEST_CompactRange(1, "", "z");
737
- // Merging L1 w/ L2, so we are the base level for "foo", so DEL is removed.
738
- // (as is v1).
1006
+ dbfull()->TEST_CompactRange(last-1, NULL, NULL);
1007
+ // Merging last-1 w/ last, so we are the base level for "foo", so
1008
+ // DEL is removed. (as is v1).
739
1009
  ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
740
1010
  }
741
1011
 
1012
+ TEST(DBTest, OverlapInLevel0) {
1013
+ ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
1014
+
1015
+ // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
1016
+ ASSERT_OK(Put("100", "v100"));
1017
+ ASSERT_OK(Put("999", "v999"));
1018
+ dbfull()->TEST_CompactMemTable();
1019
+ ASSERT_OK(Delete("100"));
1020
+ ASSERT_OK(Delete("999"));
1021
+ dbfull()->TEST_CompactMemTable();
1022
+ ASSERT_EQ("0,1,1", FilesPerLevel());
1023
+
1024
+ // Make files spanning the following ranges in level-0:
1025
+ // files[0] 200 .. 900
1026
+ // files[1] 300 .. 500
1027
+ // Note that files are sorted by smallest key.
1028
+ ASSERT_OK(Put("300", "v300"));
1029
+ ASSERT_OK(Put("500", "v500"));
1030
+ dbfull()->TEST_CompactMemTable();
1031
+ ASSERT_OK(Put("200", "v200"));
1032
+ ASSERT_OK(Put("600", "v600"));
1033
+ ASSERT_OK(Put("900", "v900"));
1034
+ dbfull()->TEST_CompactMemTable();
1035
+ ASSERT_EQ("2,1,1", FilesPerLevel());
1036
+
1037
+ // Compact away the placeholder files we created initially
1038
+ dbfull()->TEST_CompactRange(1, NULL, NULL);
1039
+ dbfull()->TEST_CompactRange(2, NULL, NULL);
1040
+ ASSERT_EQ("2", FilesPerLevel());
1041
+
1042
+ // Do a memtable compaction. Before bug-fix, the compaction would
1043
+ // not detect the overlap with level-0 files and would incorrectly place
1044
+ // the deletion in a deeper level.
1045
+ ASSERT_OK(Delete("600"));
1046
+ dbfull()->TEST_CompactMemTable();
1047
+ ASSERT_EQ("3", FilesPerLevel());
1048
+ ASSERT_EQ("NOT_FOUND", Get("600"));
1049
+ }
1050
+
742
1051
  TEST(DBTest, ComparatorCheck) {
743
1052
  class NewComparator : public Comparator {
744
1053
  public:
@@ -762,6 +1071,40 @@ TEST(DBTest, ComparatorCheck) {
762
1071
  << s.ToString();
763
1072
  }
764
1073
 
1074
+ TEST(DBTest, ManualCompaction) {
1075
+ ASSERT_EQ(config::kMaxMemCompactLevel, 2)
1076
+ << "Need to update this test to match kMaxMemCompactLevel";
1077
+
1078
+ MakeTables(3, "p", "q");
1079
+ ASSERT_EQ("1,1,1", FilesPerLevel());
1080
+
1081
+ // Compaction range falls before files
1082
+ Compact("", "c");
1083
+ ASSERT_EQ("1,1,1", FilesPerLevel());
1084
+
1085
+ // Compaction range falls after files
1086
+ Compact("r", "z");
1087
+ ASSERT_EQ("1,1,1", FilesPerLevel());
1088
+
1089
+ // Compaction range overlaps files
1090
+ Compact("p1", "p9");
1091
+ ASSERT_EQ("0,0,1", FilesPerLevel());
1092
+
1093
+ // Populate a different range
1094
+ MakeTables(3, "c", "e");
1095
+ ASSERT_EQ("1,1,2", FilesPerLevel());
1096
+
1097
+ // Compact just the new range
1098
+ Compact("b", "f");
1099
+ ASSERT_EQ("0,0,2", FilesPerLevel());
1100
+
1101
+ // Compact all
1102
+ MakeTables(1, "a", "z");
1103
+ ASSERT_EQ("0,1,2", FilesPerLevel());
1104
+ db_->CompactRange(NULL, NULL);
1105
+ ASSERT_EQ("0,0,1", FilesPerLevel());
1106
+ }
1107
+
765
1108
  TEST(DBTest, DBOpen_Options) {
766
1109
  std::string dbname = test::TmpDir() + "/db_options_test";
767
1110
  DestroyDB(dbname, Options());
@@ -941,7 +1284,6 @@ class ModelDB: public DB {
941
1284
  delete reinterpret_cast<const ModelSnapshot*>(snapshot);
942
1285
  }
943
1286
  virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
944
- assert(options.post_write_snapshot == NULL); // Not supported
945
1287
  class Handler : public WriteBatch::Handler {
946
1288
  public:
947
1289
  KVMap* map_;
@@ -965,6 +1307,9 @@ class ModelDB: public DB {
965
1307
  sizes[i] = 0;
966
1308
  }
967
1309
  }
1310
+ virtual void CompactRange(const Slice* start, const Slice* end) {
1311
+ }
1312
+
968
1313
  private:
969
1314
  class ModelIter: public Iterator {
970
1315
  public:
@@ -1145,6 +1490,9 @@ void BM_LogAndApply(int iters, int num_base_files) {
1145
1490
 
1146
1491
  Env* env = Env::Default();
1147
1492
 
1493
+ port::Mutex mu;
1494
+ MutexLock l(&mu);
1495
+
1148
1496
  InternalKeyComparator cmp(BytewiseComparator());
1149
1497
  Options options;
1150
1498
  VersionSet vset(dbname, &options, NULL, &cmp);
@@ -1156,7 +1504,7 @@ void BM_LogAndApply(int iters, int num_base_files) {
1156
1504
  InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
1157
1505
  vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
1158
1506
  }
1159
- ASSERT_OK(vset.LogAndApply(&vbase));
1507
+ ASSERT_OK(vset.LogAndApply(&vbase, &mu));
1160
1508
 
1161
1509
  uint64_t start_micros = env->NowMicros();
1162
1510
 
@@ -1166,7 +1514,7 @@ void BM_LogAndApply(int iters, int num_base_files) {
1166
1514
  InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
1167
1515
  InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
1168
1516
  vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
1169
- vset.LogAndApply(&vedit);
1517
+ vset.LogAndApply(&vedit, &mu);
1170
1518
  }
1171
1519
  uint64_t stop_micros = env->NowMicros();
1172
1520
  unsigned int us = stop_micros - start_micros;