leveldb-ruby 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/README +1 -1
  2. data/leveldb/Makefile +70 -29
  3. data/leveldb/build_detect_platform +74 -0
  4. data/leveldb/db/builder.cc +2 -4
  5. data/leveldb/db/builder.h +4 -6
  6. data/leveldb/db/c.cc +471 -0
  7. data/leveldb/db/corruption_test.cc +21 -16
  8. data/leveldb/db/db_bench.cc +400 -200
  9. data/leveldb/db/db_impl.cc +276 -131
  10. data/leveldb/db/db_impl.h +22 -10
  11. data/leveldb/db/db_iter.cc +2 -1
  12. data/leveldb/db/db_test.cc +391 -43
  13. data/leveldb/db/dbformat.cc +31 -0
  14. data/leveldb/db/dbformat.h +51 -1
  15. data/leveldb/db/filename.h +1 -1
  16. data/leveldb/db/log_format.h +1 -1
  17. data/leveldb/db/log_reader.cc +16 -11
  18. data/leveldb/db/memtable.cc +37 -0
  19. data/leveldb/db/memtable.h +6 -0
  20. data/leveldb/db/repair.cc +17 -14
  21. data/leveldb/db/skiplist_test.cc +2 -2
  22. data/leveldb/db/version_edit.cc +7 -9
  23. data/leveldb/db/version_edit.h +2 -1
  24. data/leveldb/db/version_set.cc +416 -104
  25. data/leveldb/db/version_set.h +78 -14
  26. data/leveldb/db/version_set_test.cc +179 -0
  27. data/leveldb/db/write_batch_internal.h +2 -0
  28. data/leveldb/include/leveldb/c.h +246 -0
  29. data/leveldb/include/leveldb/db.h +14 -2
  30. data/leveldb/include/leveldb/env.h +31 -10
  31. data/leveldb/include/leveldb/options.h +7 -18
  32. data/leveldb/include/leveldb/slice.h +2 -2
  33. data/leveldb/include/leveldb/status.h +1 -1
  34. data/leveldb/port/atomic_pointer.h +144 -0
  35. data/leveldb/port/port.h +0 -2
  36. data/leveldb/port/port_android.h +7 -1
  37. data/leveldb/port/port_example.h +11 -1
  38. data/leveldb/port/port_posix.h +56 -38
  39. data/leveldb/table/format.cc +12 -8
  40. data/leveldb/table/table_test.cc +16 -7
  41. data/leveldb/util/cache.cc +173 -100
  42. data/leveldb/util/cache_test.cc +28 -11
  43. data/leveldb/util/coding.h +4 -4
  44. data/leveldb/util/comparator.cc +1 -0
  45. data/leveldb/util/env.cc +10 -5
  46. data/leveldb/util/env_posix.cc +48 -87
  47. data/leveldb/util/histogram.cc +11 -0
  48. data/leveldb/util/histogram.h +1 -0
  49. data/leveldb/util/posix_logger.h +98 -0
  50. data/leveldb/util/testharness.cc +12 -0
  51. data/leveldb/util/testharness.h +10 -1
  52. data/lib/leveldb.rb +11 -3
  53. metadata +41 -22
data/leveldb/db/db_impl.h CHANGED
@@ -38,14 +38,12 @@ class DBImpl : public DB {
38
38
  virtual void ReleaseSnapshot(const Snapshot* snapshot);
39
39
  virtual bool GetProperty(const Slice& property, std::string* value);
40
40
  virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes);
41
+ virtual void CompactRange(const Slice* begin, const Slice* end);
41
42
 
42
43
  // Extra methods (for testing) that are not in the public DB interface
43
44
 
44
- // Compact any files in the named level that overlap [begin,end]
45
- void TEST_CompactRange(
46
- int level,
47
- const std::string& begin,
48
- const std::string& end);
45
+ // Compact any files in the named level that overlap [*begin,*end]
46
+ void TEST_CompactRange(int level, const Slice* begin, const Slice* end);
49
47
 
50
48
  // Force current memtable contents to be compacted.
51
49
  Status TEST_CompactMemTable();
@@ -85,7 +83,12 @@ class DBImpl : public DB {
85
83
  VersionEdit* edit,
86
84
  SequenceNumber* max_sequence);
87
85
 
88
- Status WriteLevel0Table(MemTable* mem, VersionEdit* edit);
86
+ Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base);
87
+
88
+ // Only thread is allowed to log at a time.
89
+ struct LoggerId { }; // Opaque identifier for logging thread
90
+ void AcquireLoggingResponsibility(LoggerId* self);
91
+ void ReleaseLoggingResponsibility(LoggerId* self);
89
92
 
90
93
  Status MakeRoomForWrite(bool force /* compact even if there is room? */);
91
94
 
@@ -119,13 +122,15 @@ class DBImpl : public DB {
119
122
  // State below is protected by mutex_
120
123
  port::Mutex mutex_;
121
124
  port::AtomicPointer shutting_down_;
122
- port::CondVar bg_cv_; // Signalled when !bg_compaction_scheduled_
123
- port::CondVar compacting_cv_; // Signalled when !compacting_
125
+ port::CondVar bg_cv_; // Signalled when background work finishes
124
126
  MemTable* mem_;
125
127
  MemTable* imm_; // Memtable being compacted
126
128
  port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
127
129
  WritableFile* logfile_;
130
+ uint64_t logfile_number_;
128
131
  log::Writer* log_;
132
+ LoggerId* logger_; // NULL, or the id of the current logging thread
133
+ port::CondVar logger_cv_; // For threads waiting to log
129
134
  SnapshotList snapshots_;
130
135
 
131
136
  // Set of table files to protect from deletion because they are
@@ -135,8 +140,15 @@ class DBImpl : public DB {
135
140
  // Has a background compaction been scheduled or is running?
136
141
  bool bg_compaction_scheduled_;
137
142
 
138
- // Is there a compaction running?
139
- bool compacting_;
143
+ // Information for a manual compaction
144
+ struct ManualCompaction {
145
+ int level;
146
+ bool done;
147
+ const InternalKey* begin; // NULL means beginning of key range
148
+ const InternalKey* end; // NULL means end of key range
149
+ InternalKey tmp_storage; // Used to keep track of compaction progress
150
+ };
151
+ ManualCompaction* manual_compaction_;
140
152
 
141
153
  VersionSet* versions_;
142
154
 
@@ -216,7 +216,6 @@ void DBIter::FindPrevUserEntry() {
216
216
 
217
217
  ValueType value_type = kTypeDeletion;
218
218
  if (iter_->Valid()) {
219
- SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
220
219
  do {
221
220
  ParsedInternalKey ikey;
222
221
  if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
@@ -227,6 +226,7 @@ void DBIter::FindPrevUserEntry() {
227
226
  }
228
227
  value_type = ikey.type;
229
228
  if (value_type == kTypeDeletion) {
229
+ saved_key_.clear();
230
230
  ClearSavedValue();
231
231
  } else {
232
232
  Slice raw_value = iter_->value();
@@ -234,6 +234,7 @@ void DBIter::FindPrevUserEntry() {
234
234
  std::string empty;
235
235
  swap(empty, saved_value_);
236
236
  }
237
+ SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
237
238
  saved_value_.assign(raw_value.data(), raw_value.size());
238
239
  }
239
240
  }
@@ -10,6 +10,7 @@
10
10
  #include "leveldb/env.h"
11
11
  #include "leveldb/table.h"
12
12
  #include "util/logging.h"
13
+ #include "util/mutexlock.h"
13
14
  #include "util/testharness.h"
14
15
  #include "util/testutil.h"
15
16
 
@@ -21,15 +22,58 @@ static std::string RandomString(Random* rnd, int len) {
21
22
  return r;
22
23
  }
23
24
 
25
+ // Special Env used to delay background operations
26
+ class SpecialEnv : public EnvWrapper {
27
+ public:
28
+ // sstable Sync() calls are blocked while this pointer is non-NULL.
29
+ port::AtomicPointer delay_sstable_sync_;
30
+
31
+ explicit SpecialEnv(Env* base) : EnvWrapper(base) {
32
+ delay_sstable_sync_.Release_Store(NULL);
33
+ }
34
+
35
+ Status NewWritableFile(const std::string& f, WritableFile** r) {
36
+ class SSTableFile : public WritableFile {
37
+ private:
38
+ SpecialEnv* env_;
39
+ WritableFile* base_;
40
+
41
+ public:
42
+ SSTableFile(SpecialEnv* env, WritableFile* base)
43
+ : env_(env),
44
+ base_(base) {
45
+ }
46
+ ~SSTableFile() { delete base_; }
47
+ Status Append(const Slice& data) { return base_->Append(data); }
48
+ Status Close() { return base_->Close(); }
49
+ Status Flush() { return base_->Flush(); }
50
+ Status Sync() {
51
+ while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
52
+ env_->SleepForMicroseconds(100000);
53
+ }
54
+ return base_->Sync();
55
+ }
56
+ };
57
+
58
+ Status s = target()->NewWritableFile(f, r);
59
+ if (s.ok()) {
60
+ if (strstr(f.c_str(), ".sst") != NULL) {
61
+ *r = new SSTableFile(this, *r);
62
+ }
63
+ }
64
+ return s;
65
+ }
66
+ };
67
+
24
68
  class DBTest {
25
69
  public:
26
70
  std::string dbname_;
27
- Env* env_;
71
+ SpecialEnv* env_;
28
72
  DB* db_;
29
73
 
30
74
  Options last_options_;
31
75
 
32
- DBTest() : env_(Env::Default()) {
76
+ DBTest() : env_(new SpecialEnv(Env::Default())) {
33
77
  dbname_ = test::TmpDir() + "/db_test";
34
78
  DestroyDB(dbname_, Options());
35
79
  db_ = NULL;
@@ -39,6 +83,7 @@ class DBTest {
39
83
  ~DBTest() {
40
84
  delete db_;
41
85
  DestroyDB(dbname_, Options());
86
+ delete env_;
42
87
  }
43
88
 
44
89
  DBImpl* dbfull() {
@@ -142,6 +187,31 @@ class DBTest {
142
187
  return atoi(property.c_str());
143
188
  }
144
189
 
190
+ int TotalTableFiles() {
191
+ int result = 0;
192
+ for (int level = 0; level < config::kNumLevels; level++) {
193
+ result += NumTableFilesAtLevel(level);
194
+ }
195
+ return result;
196
+ }
197
+
198
+ // Return spread of files per level
199
+ std::string FilesPerLevel() {
200
+ std::string result;
201
+ int last_non_zero_offset = 0;
202
+ for (int level = 0; level < config::kNumLevels; level++) {
203
+ int f = NumTableFilesAtLevel(level);
204
+ char buf[100];
205
+ snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
206
+ result += buf;
207
+ if (f > 0) {
208
+ last_non_zero_offset = result.size();
209
+ }
210
+ }
211
+ result.resize(last_non_zero_offset);
212
+ return result;
213
+ }
214
+
145
215
  uint64_t Size(const Slice& start, const Slice& limit) {
146
216
  Range r(start, limit);
147
217
  uint64_t size;
@@ -150,18 +220,25 @@ class DBTest {
150
220
  }
151
221
 
152
222
  void Compact(const Slice& start, const Slice& limit) {
153
- dbfull()->TEST_CompactMemTable();
154
- int max_level_with_files = 1;
155
- for (int level = 1; level < config::kNumLevels; level++) {
156
- if (NumTableFilesAtLevel(level) > 0) {
157
- max_level_with_files = level;
158
- }
159
- }
160
- for (int level = 0; level < max_level_with_files; level++) {
161
- dbfull()->TEST_CompactRange(level, "", "~");
223
+ db_->CompactRange(&start, &limit);
224
+ }
225
+
226
+ // Do n memtable compactions, each of which produces an sstable
227
+ // covering the range [small,large].
228
+ void MakeTables(int n, const std::string& small, const std::string& large) {
229
+ for (int i = 0; i < n; i++) {
230
+ Put(small, "begin");
231
+ Put(large, "end");
232
+ dbfull()->TEST_CompactMemTable();
162
233
  }
163
234
  }
164
235
 
236
+ // Prevent pushing of new sstables into deeper levels by adding
237
+ // tables that cover a specified range to all levels.
238
+ void FillLevels(const std::string& smallest, const std::string& largest) {
239
+ MakeTables(config::kNumLevels, smallest, largest);
240
+ }
241
+
165
242
  void DumpFileCounts(const char* label) {
166
243
  fprintf(stderr, "---\n%s:\n", label);
167
244
  fprintf(stderr, "maxoverlap: %lld\n",
@@ -175,6 +252,12 @@ class DBTest {
175
252
  }
176
253
  }
177
254
 
255
+ std::string DumpSSTableList() {
256
+ std::string property;
257
+ db_->GetProperty("leveldb.sstables", &property);
258
+ return property;
259
+ }
260
+
178
261
  std::string IterStatus(Iterator* iter) {
179
262
  std::string result;
180
263
  if (iter->Valid()) {
@@ -209,6 +292,115 @@ TEST(DBTest, PutDeleteGet) {
209
292
  ASSERT_EQ("NOT_FOUND", Get("foo"));
210
293
  }
211
294
 
295
+ TEST(DBTest, GetFromImmutableLayer) {
296
+ Options options;
297
+ options.env = env_;
298
+ options.write_buffer_size = 100000; // Small write buffer
299
+ Reopen(&options);
300
+
301
+ ASSERT_OK(Put("foo", "v1"));
302
+ ASSERT_EQ("v1", Get("foo"));
303
+
304
+ env_->delay_sstable_sync_.Release_Store(env_); // Block sync calls
305
+ Put("k1", std::string(100000, 'x')); // Fill memtable
306
+ Put("k2", std::string(100000, 'y')); // Trigger compaction
307
+ ASSERT_EQ("v1", Get("foo"));
308
+ env_->delay_sstable_sync_.Release_Store(NULL); // Release sync calls
309
+ }
310
+
311
+ TEST(DBTest, GetFromVersions) {
312
+ ASSERT_OK(Put("foo", "v1"));
313
+ dbfull()->TEST_CompactMemTable();
314
+ ASSERT_EQ("v1", Get("foo"));
315
+ }
316
+
317
+ TEST(DBTest, GetSnapshot) {
318
+ // Try with both a short key and a long key
319
+ for (int i = 0; i < 2; i++) {
320
+ std::string key = (i == 0) ? std::string("foo") : std::string(200, 'x');
321
+ ASSERT_OK(Put(key, "v1"));
322
+ const Snapshot* s1 = db_->GetSnapshot();
323
+ ASSERT_OK(Put(key, "v2"));
324
+ ASSERT_EQ("v2", Get(key));
325
+ ASSERT_EQ("v1", Get(key, s1));
326
+ dbfull()->TEST_CompactMemTable();
327
+ ASSERT_EQ("v2", Get(key));
328
+ ASSERT_EQ("v1", Get(key, s1));
329
+ db_->ReleaseSnapshot(s1);
330
+ }
331
+ }
332
+
333
+ TEST(DBTest, GetLevel0Ordering) {
334
+ // Check that we process level-0 files in correct order. The code
335
+ // below generates two level-0 files where the earlier one comes
336
+ // before the later one in the level-0 file list since the earlier
337
+ // one has a smaller "smallest" key.
338
+ ASSERT_OK(Put("bar", "b"));
339
+ ASSERT_OK(Put("foo", "v1"));
340
+ dbfull()->TEST_CompactMemTable();
341
+ ASSERT_OK(Put("foo", "v2"));
342
+ dbfull()->TEST_CompactMemTable();
343
+ ASSERT_EQ("v2", Get("foo"));
344
+ }
345
+
346
+ TEST(DBTest, GetOrderedByLevels) {
347
+ ASSERT_OK(Put("foo", "v1"));
348
+ Compact("a", "z");
349
+ ASSERT_EQ("v1", Get("foo"));
350
+ ASSERT_OK(Put("foo", "v2"));
351
+ ASSERT_EQ("v2", Get("foo"));
352
+ dbfull()->TEST_CompactMemTable();
353
+ ASSERT_EQ("v2", Get("foo"));
354
+ }
355
+
356
+ TEST(DBTest, GetPicksCorrectFile) {
357
+ // Arrange to have multiple files in a non-level-0 level.
358
+ ASSERT_OK(Put("a", "va"));
359
+ Compact("a", "b");
360
+ ASSERT_OK(Put("x", "vx"));
361
+ Compact("x", "y");
362
+ ASSERT_OK(Put("f", "vf"));
363
+ Compact("f", "g");
364
+ ASSERT_EQ("va", Get("a"));
365
+ ASSERT_EQ("vf", Get("f"));
366
+ ASSERT_EQ("vx", Get("x"));
367
+ }
368
+
369
+ TEST(DBTest, GetEncountersEmptyLevel) {
370
+ // Arrange for the following to happen:
371
+ // * sstable A in level 0
372
+ // * nothing in level 1
373
+ // * sstable B in level 2
374
+ // Then do enough Get() calls to arrange for an automatic compaction
375
+ // of sstable A. A bug would cause the compaction to be marked as
376
+ // occuring at level 1 (instead of the correct level 0).
377
+
378
+ // Step 1: First place sstables in levels 0 and 2
379
+ int compaction_count = 0;
380
+ while (NumTableFilesAtLevel(0) == 0 ||
381
+ NumTableFilesAtLevel(2) == 0) {
382
+ ASSERT_LE(compaction_count, 100) << "could not fill levels 0 and 2";
383
+ compaction_count++;
384
+ Put("a", "begin");
385
+ Put("z", "end");
386
+ dbfull()->TEST_CompactMemTable();
387
+ }
388
+
389
+ // Step 2: clear level 1 if necessary.
390
+ dbfull()->TEST_CompactRange(1, NULL, NULL);
391
+ ASSERT_EQ(NumTableFilesAtLevel(0), 1);
392
+ ASSERT_EQ(NumTableFilesAtLevel(1), 0);
393
+ ASSERT_EQ(NumTableFilesAtLevel(2), 1);
394
+
395
+ // Step 3: read until level 0 compaction disappears.
396
+ int read_count = 0;
397
+ while (NumTableFilesAtLevel(0) > 0) {
398
+ ASSERT_LE(read_count, 10000) << "did not trigger level 0 compaction";
399
+ read_count++;
400
+ ASSERT_EQ("NOT_FOUND", Get("missing"));
401
+ }
402
+ }
403
+
212
404
  TEST(DBTest, IterEmpty) {
213
405
  Iterator* iter = db_->NewIterator(ReadOptions());
214
406
 
@@ -383,6 +575,21 @@ TEST(DBTest, IterSmallAndLargeMix) {
383
575
  delete iter;
384
576
  }
385
577
 
578
+ TEST(DBTest, IterMultiWithDelete) {
579
+ ASSERT_OK(Put("a", "va"));
580
+ ASSERT_OK(Put("b", "vb"));
581
+ ASSERT_OK(Put("c", "vc"));
582
+ ASSERT_OK(Delete("b"));
583
+ ASSERT_EQ("NOT_FOUND", Get("b"));
584
+
585
+ Iterator* iter = db_->NewIterator(ReadOptions());
586
+ iter->Seek("c");
587
+ ASSERT_EQ(IterStatus(iter), "c->vc");
588
+ iter->Prev();
589
+ ASSERT_EQ(IterStatus(iter), "a->va");
590
+ delete iter;
591
+ }
592
+
386
593
  TEST(DBTest, Recover) {
387
594
  ASSERT_OK(Put("foo", "v1"));
388
595
  ASSERT_OK(Put("baz", "v5"));
@@ -413,6 +620,27 @@ TEST(DBTest, RecoveryWithEmptyLog) {
413
620
  ASSERT_EQ("v3", Get("foo"));
414
621
  }
415
622
 
623
+ // Check that writes done during a memtable compaction are recovered
624
+ // if the database is shutdown during the memtable compaction.
625
+ TEST(DBTest, RecoverDuringMemtableCompaction) {
626
+ Options options;
627
+ options.env = env_;
628
+ options.write_buffer_size = 1000000;
629
+ Reopen(&options);
630
+
631
+ // Trigger a long memtable compaction and reopen the database during it
632
+ ASSERT_OK(Put("foo", "v1")); // Goes to 1st log file
633
+ ASSERT_OK(Put("big1", std::string(10000000, 'x'))); // Fills memtable
634
+ ASSERT_OK(Put("big2", std::string(1000, 'y'))); // Triggers compaction
635
+ ASSERT_OK(Put("bar", "v2")); // Goes to new log file
636
+
637
+ Reopen(&options);
638
+ ASSERT_EQ("v1", Get("foo"));
639
+ ASSERT_EQ("v2", Get("bar"));
640
+ ASSERT_EQ(std::string(10000000, 'x'), Get("big1"));
641
+ ASSERT_EQ(std::string(1000, 'y'), Get("big2"));
642
+ }
643
+
416
644
  static std::string Key(int i) {
417
645
  char buf[100];
418
646
  snprintf(buf, sizeof(buf), "key%06d", i);
@@ -426,11 +654,11 @@ TEST(DBTest, MinorCompactionsHappen) {
426
654
 
427
655
  const int N = 500;
428
656
 
429
- int starting_num_tables = NumTableFilesAtLevel(0);
657
+ int starting_num_tables = TotalTableFiles();
430
658
  for (int i = 0; i < N; i++) {
431
659
  ASSERT_OK(Put(Key(i), Key(i) + std::string(1000, 'v')));
432
660
  }
433
- int ending_num_tables = NumTableFilesAtLevel(0);
661
+ int ending_num_tables = TotalTableFiles();
434
662
  ASSERT_GT(ending_num_tables, starting_num_tables);
435
663
 
436
664
  for (int i = 0; i < N; i++) {
@@ -485,7 +713,7 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) {
485
713
 
486
714
  // Reopening moves updates to level-0
487
715
  Reopen(&options);
488
- dbfull()->TEST_CompactRange(0, "", Key(100000));
716
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
489
717
 
490
718
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
491
719
  ASSERT_GT(NumTableFilesAtLevel(1), 1);
@@ -494,11 +722,32 @@ TEST(DBTest, CompactionsGenerateMultipleFiles) {
494
722
  }
495
723
  }
496
724
 
725
+ TEST(DBTest, RepeatedWritesToSameKey) {
726
+ Options options;
727
+ options.env = env_;
728
+ options.write_buffer_size = 100000; // Small write buffer
729
+ Reopen(&options);
730
+
731
+ // We must have at most one file per level except for level-0,
732
+ // which may have up to kL0_StopWritesTrigger files.
733
+ const int kMaxFiles = config::kNumLevels + config::kL0_StopWritesTrigger;
734
+
735
+ Random rnd(301);
736
+ std::string value = RandomString(&rnd, 2 * options.write_buffer_size);
737
+ for (int i = 0; i < 5 * kMaxFiles; i++) {
738
+ Put("key", value);
739
+ ASSERT_LE(TotalTableFiles(), kMaxFiles);
740
+ fprintf(stderr, "after %d: %d files\n", int(i+1), TotalTableFiles());
741
+ }
742
+ }
743
+
497
744
  TEST(DBTest, SparseMerge) {
498
745
  Options options;
499
746
  options.compression = kNoCompression;
500
747
  Reopen(&options);
501
748
 
749
+ FillLevels("A", "Z");
750
+
502
751
  // Suppose there is:
503
752
  // small amount of data with prefix A
504
753
  // large amount of data with prefix B
@@ -514,7 +763,8 @@ TEST(DBTest, SparseMerge) {
514
763
  Put(key, value);
515
764
  }
516
765
  Put("C", "vc");
517
- Compact("", "z");
766
+ dbfull()->TEST_CompactMemTable();
767
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
518
768
 
519
769
  // Make sparse update
520
770
  Put("A", "va2");
@@ -525,9 +775,9 @@ TEST(DBTest, SparseMerge) {
525
775
  // Compactions should not cause us to create a situation where
526
776
  // a file overlaps too much data at the next level.
527
777
  ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
528
- dbfull()->TEST_CompactRange(0, "", "z");
778
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
529
779
  ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
530
- dbfull()->TEST_CompactRange(1, "", "z");
780
+ dbfull()->TEST_CompactRange(1, NULL, NULL);
531
781
  ASSERT_LE(dbfull()->TEST_MaxNextLevelOverlappingBytes(), 20*1048576);
532
782
  }
533
783
 
@@ -578,9 +828,11 @@ TEST(DBTest, ApproximateSizes) {
578
828
  ASSERT_TRUE(Between(Size("", Key(50)), 5000000, 5010000));
579
829
  ASSERT_TRUE(Between(Size("", Key(50)+".suffix"), 5100000, 5110000));
580
830
 
581
- dbfull()->TEST_CompactRange(0,
582
- Key(compact_start),
583
- Key(compact_start + 9));
831
+ std::string cstart_str = Key(compact_start);
832
+ std::string cend_str = Key(compact_start + 9);
833
+ Slice cstart = cstart_str;
834
+ Slice cend = cend_str;
835
+ dbfull()->TEST_CompactRange(0, &cstart, &cend);
584
836
  }
585
837
 
586
838
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
@@ -620,7 +872,7 @@ TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
620
872
 
621
873
  ASSERT_TRUE(Between(Size(Key(3), Key(5)), 110000, 111000));
622
874
 
623
- dbfull()->TEST_CompactRange(0, Key(0), Key(100));
875
+ dbfull()->TEST_CompactRange(0, NULL, NULL);
624
876
  }
625
877
  }
626
878
 
@@ -675,6 +927,8 @@ TEST(DBTest, Snapshot) {
675
927
 
676
928
  TEST(DBTest, HiddenValuesAreRemoved) {
677
929
  Random rnd(301);
930
+ FillLevels("a", "z");
931
+
678
932
  std::string big = RandomString(&rnd, 50000);
679
933
  Put("foo", big);
680
934
  Put("pastfoo", "v");
@@ -689,11 +943,12 @@ TEST(DBTest, HiddenValuesAreRemoved) {
689
943
  ASSERT_TRUE(Between(Size("", "pastfoo"), 50000, 60000));
690
944
  db_->ReleaseSnapshot(snapshot);
691
945
  ASSERT_EQ(AllEntriesFor("foo"), "[ tiny, " + big + " ]");
692
- dbfull()->TEST_CompactRange(0, "", "x");
946
+ Slice x("x");
947
+ dbfull()->TEST_CompactRange(0, NULL, &x);
693
948
  ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
694
949
  ASSERT_EQ(NumTableFilesAtLevel(0), 0);
695
950
  ASSERT_GE(NumTableFilesAtLevel(1), 1);
696
- dbfull()->TEST_CompactRange(1, "", "x");
951
+ dbfull()->TEST_CompactRange(1, NULL, &x);
697
952
  ASSERT_EQ(AllEntriesFor("foo"), "[ tiny ]");
698
953
 
699
954
  ASSERT_TRUE(Between(Size("", "pastfoo"), 0, 1000));
@@ -702,43 +957,97 @@ TEST(DBTest, HiddenValuesAreRemoved) {
702
957
  TEST(DBTest, DeletionMarkers1) {
703
958
  Put("foo", "v1");
704
959
  ASSERT_OK(dbfull()->TEST_CompactMemTable());
705
- dbfull()->TEST_CompactRange(0, "", "z");
706
- dbfull()->TEST_CompactRange(1, "", "z");
707
- ASSERT_EQ(NumTableFilesAtLevel(2), 1); // foo => v1 is now in level 2 file
960
+ const int last = config::kMaxMemCompactLevel;
961
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
962
+
963
+ // Place a table at level last-1 to prevent merging with preceding mutation
964
+ Put("a", "begin");
965
+ Put("z", "end");
966
+ dbfull()->TEST_CompactMemTable();
967
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1);
968
+ ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
969
+
708
970
  Delete("foo");
709
971
  Put("foo", "v2");
710
972
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
711
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
973
+ ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
712
974
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2, DEL, v1 ]");
713
- dbfull()->TEST_CompactRange(0, "", "z");
975
+ Slice z("z");
976
+ dbfull()->TEST_CompactRange(last-2, NULL, &z);
714
977
  // DEL eliminated, but v1 remains because we aren't compacting that level
715
978
  // (DEL can be eliminated because v2 hides v1).
716
979
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2, v1 ]");
717
- dbfull()->TEST_CompactRange(1, "", "z");
718
- // Merging L1 w/ L2, so we are the base level for "foo", so DEL is removed.
719
- // (as is v1).
980
+ dbfull()->TEST_CompactRange(last-1, NULL, NULL);
981
+ // Merging last-1 w/ last, so we are the base level for "foo", so
982
+ // DEL is removed. (as is v1).
720
983
  ASSERT_EQ(AllEntriesFor("foo"), "[ v2 ]");
721
984
  }
722
985
 
723
986
  TEST(DBTest, DeletionMarkers2) {
724
987
  Put("foo", "v1");
725
988
  ASSERT_OK(dbfull()->TEST_CompactMemTable());
726
- dbfull()->TEST_CompactRange(0, "", "z");
727
- dbfull()->TEST_CompactRange(1, "", "z");
728
- ASSERT_EQ(NumTableFilesAtLevel(2), 1); // foo => v1 is now in level 2 file
989
+ const int last = config::kMaxMemCompactLevel;
990
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo => v1 is now in last level
991
+
992
+ // Place a table at level last-1 to prevent merging with preceding mutation
993
+ Put("a", "begin");
994
+ Put("z", "end");
995
+ dbfull()->TEST_CompactMemTable();
996
+ ASSERT_EQ(NumTableFilesAtLevel(last), 1);
997
+ ASSERT_EQ(NumTableFilesAtLevel(last-1), 1);
998
+
729
999
  Delete("foo");
730
1000
  ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
731
- ASSERT_OK(dbfull()->TEST_CompactMemTable());
1001
+ ASSERT_OK(dbfull()->TEST_CompactMemTable()); // Moves to level last-2
732
1002
  ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
733
- dbfull()->TEST_CompactRange(0, "", "z");
734
- // DEL kept: L2 file overlaps
1003
+ dbfull()->TEST_CompactRange(last-2, NULL, NULL);
1004
+ // DEL kept: "last" file overlaps
735
1005
  ASSERT_EQ(AllEntriesFor("foo"), "[ DEL, v1 ]");
736
- dbfull()->TEST_CompactRange(1, "", "z");
737
- // Merging L1 w/ L2, so we are the base level for "foo", so DEL is removed.
738
- // (as is v1).
1006
+ dbfull()->TEST_CompactRange(last-1, NULL, NULL);
1007
+ // Merging last-1 w/ last, so we are the base level for "foo", so
1008
+ // DEL is removed. (as is v1).
739
1009
  ASSERT_EQ(AllEntriesFor("foo"), "[ ]");
740
1010
  }
741
1011
 
1012
+ TEST(DBTest, OverlapInLevel0) {
1013
+ ASSERT_EQ(config::kMaxMemCompactLevel, 2) << "Fix test to match config";
1014
+
1015
+ // Fill levels 1 and 2 to disable the pushing of new memtables to levels > 0.
1016
+ ASSERT_OK(Put("100", "v100"));
1017
+ ASSERT_OK(Put("999", "v999"));
1018
+ dbfull()->TEST_CompactMemTable();
1019
+ ASSERT_OK(Delete("100"));
1020
+ ASSERT_OK(Delete("999"));
1021
+ dbfull()->TEST_CompactMemTable();
1022
+ ASSERT_EQ("0,1,1", FilesPerLevel());
1023
+
1024
+ // Make files spanning the following ranges in level-0:
1025
+ // files[0] 200 .. 900
1026
+ // files[1] 300 .. 500
1027
+ // Note that files are sorted by smallest key.
1028
+ ASSERT_OK(Put("300", "v300"));
1029
+ ASSERT_OK(Put("500", "v500"));
1030
+ dbfull()->TEST_CompactMemTable();
1031
+ ASSERT_OK(Put("200", "v200"));
1032
+ ASSERT_OK(Put("600", "v600"));
1033
+ ASSERT_OK(Put("900", "v900"));
1034
+ dbfull()->TEST_CompactMemTable();
1035
+ ASSERT_EQ("2,1,1", FilesPerLevel());
1036
+
1037
+ // Compact away the placeholder files we created initially
1038
+ dbfull()->TEST_CompactRange(1, NULL, NULL);
1039
+ dbfull()->TEST_CompactRange(2, NULL, NULL);
1040
+ ASSERT_EQ("2", FilesPerLevel());
1041
+
1042
+ // Do a memtable compaction. Before bug-fix, the compaction would
1043
+ // not detect the overlap with level-0 files and would incorrectly place
1044
+ // the deletion in a deeper level.
1045
+ ASSERT_OK(Delete("600"));
1046
+ dbfull()->TEST_CompactMemTable();
1047
+ ASSERT_EQ("3", FilesPerLevel());
1048
+ ASSERT_EQ("NOT_FOUND", Get("600"));
1049
+ }
1050
+
742
1051
  TEST(DBTest, ComparatorCheck) {
743
1052
  class NewComparator : public Comparator {
744
1053
  public:
@@ -762,6 +1071,40 @@ TEST(DBTest, ComparatorCheck) {
762
1071
  << s.ToString();
763
1072
  }
764
1073
 
1074
+ TEST(DBTest, ManualCompaction) {
1075
+ ASSERT_EQ(config::kMaxMemCompactLevel, 2)
1076
+ << "Need to update this test to match kMaxMemCompactLevel";
1077
+
1078
+ MakeTables(3, "p", "q");
1079
+ ASSERT_EQ("1,1,1", FilesPerLevel());
1080
+
1081
+ // Compaction range falls before files
1082
+ Compact("", "c");
1083
+ ASSERT_EQ("1,1,1", FilesPerLevel());
1084
+
1085
+ // Compaction range falls after files
1086
+ Compact("r", "z");
1087
+ ASSERT_EQ("1,1,1", FilesPerLevel());
1088
+
1089
+ // Compaction range overlaps files
1090
+ Compact("p1", "p9");
1091
+ ASSERT_EQ("0,0,1", FilesPerLevel());
1092
+
1093
+ // Populate a different range
1094
+ MakeTables(3, "c", "e");
1095
+ ASSERT_EQ("1,1,2", FilesPerLevel());
1096
+
1097
+ // Compact just the new range
1098
+ Compact("b", "f");
1099
+ ASSERT_EQ("0,0,2", FilesPerLevel());
1100
+
1101
+ // Compact all
1102
+ MakeTables(1, "a", "z");
1103
+ ASSERT_EQ("0,1,2", FilesPerLevel());
1104
+ db_->CompactRange(NULL, NULL);
1105
+ ASSERT_EQ("0,0,1", FilesPerLevel());
1106
+ }
1107
+
765
1108
  TEST(DBTest, DBOpen_Options) {
766
1109
  std::string dbname = test::TmpDir() + "/db_options_test";
767
1110
  DestroyDB(dbname, Options());
@@ -941,7 +1284,6 @@ class ModelDB: public DB {
941
1284
  delete reinterpret_cast<const ModelSnapshot*>(snapshot);
942
1285
  }
943
1286
  virtual Status Write(const WriteOptions& options, WriteBatch* batch) {
944
- assert(options.post_write_snapshot == NULL); // Not supported
945
1287
  class Handler : public WriteBatch::Handler {
946
1288
  public:
947
1289
  KVMap* map_;
@@ -965,6 +1307,9 @@ class ModelDB: public DB {
965
1307
  sizes[i] = 0;
966
1308
  }
967
1309
  }
1310
+ virtual void CompactRange(const Slice* start, const Slice* end) {
1311
+ }
1312
+
968
1313
  private:
969
1314
  class ModelIter: public Iterator {
970
1315
  public:
@@ -1145,6 +1490,9 @@ void BM_LogAndApply(int iters, int num_base_files) {
1145
1490
 
1146
1491
  Env* env = Env::Default();
1147
1492
 
1493
+ port::Mutex mu;
1494
+ MutexLock l(&mu);
1495
+
1148
1496
  InternalKeyComparator cmp(BytewiseComparator());
1149
1497
  Options options;
1150
1498
  VersionSet vset(dbname, &options, NULL, &cmp);
@@ -1156,7 +1504,7 @@ void BM_LogAndApply(int iters, int num_base_files) {
1156
1504
  InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
1157
1505
  vbase.AddFile(2, fnum++, 1 /* file size */, start, limit);
1158
1506
  }
1159
- ASSERT_OK(vset.LogAndApply(&vbase));
1507
+ ASSERT_OK(vset.LogAndApply(&vbase, &mu));
1160
1508
 
1161
1509
  uint64_t start_micros = env->NowMicros();
1162
1510
 
@@ -1166,7 +1514,7 @@ void BM_LogAndApply(int iters, int num_base_files) {
1166
1514
  InternalKey start(MakeKey(2*fnum), 1, kTypeValue);
1167
1515
  InternalKey limit(MakeKey(2*fnum+1), 1, kTypeDeletion);
1168
1516
  vedit.AddFile(2, fnum++, 1 /* file size */, start, limit);
1169
- vset.LogAndApply(&vedit);
1517
+ vset.LogAndApply(&vedit, &mu);
1170
1518
  }
1171
1519
  uint64_t stop_micros = env->NowMicros();
1172
1520
  unsigned int us = stop_micros - start_micros;