leveldb-ruby 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. data/README +17 -0
  2. data/ext/leveldb/extconf.rb +10 -0
  3. data/ext/leveldb/leveldb.cc +181 -0
  4. data/leveldb/Makefile +172 -0
  5. data/leveldb/db/builder.cc +90 -0
  6. data/leveldb/db/builder.h +36 -0
  7. data/leveldb/db/corruption_test.cc +354 -0
  8. data/leveldb/db/db_bench.cc +677 -0
  9. data/leveldb/db/db_impl.cc +1236 -0
  10. data/leveldb/db/db_impl.h +180 -0
  11. data/leveldb/db/db_iter.cc +298 -0
  12. data/leveldb/db/db_iter.h +26 -0
  13. data/leveldb/db/db_test.cc +1192 -0
  14. data/leveldb/db/dbformat.cc +87 -0
  15. data/leveldb/db/dbformat.h +165 -0
  16. data/leveldb/db/dbformat_test.cc +112 -0
  17. data/leveldb/db/filename.cc +135 -0
  18. data/leveldb/db/filename.h +80 -0
  19. data/leveldb/db/filename_test.cc +122 -0
  20. data/leveldb/db/log_format.h +35 -0
  21. data/leveldb/db/log_reader.cc +254 -0
  22. data/leveldb/db/log_reader.h +108 -0
  23. data/leveldb/db/log_test.cc +500 -0
  24. data/leveldb/db/log_writer.cc +103 -0
  25. data/leveldb/db/log_writer.h +48 -0
  26. data/leveldb/db/memtable.cc +108 -0
  27. data/leveldb/db/memtable.h +85 -0
  28. data/leveldb/db/repair.cc +384 -0
  29. data/leveldb/db/skiplist.h +378 -0
  30. data/leveldb/db/skiplist_test.cc +378 -0
  31. data/leveldb/db/snapshot.h +66 -0
  32. data/leveldb/db/table_cache.cc +95 -0
  33. data/leveldb/db/table_cache.h +50 -0
  34. data/leveldb/db/version_edit.cc +268 -0
  35. data/leveldb/db/version_edit.h +106 -0
  36. data/leveldb/db/version_edit_test.cc +46 -0
  37. data/leveldb/db/version_set.cc +1060 -0
  38. data/leveldb/db/version_set.h +306 -0
  39. data/leveldb/db/write_batch.cc +138 -0
  40. data/leveldb/db/write_batch_internal.h +45 -0
  41. data/leveldb/db/write_batch_test.cc +89 -0
  42. data/leveldb/include/leveldb/cache.h +99 -0
  43. data/leveldb/include/leveldb/comparator.h +63 -0
  44. data/leveldb/include/leveldb/db.h +148 -0
  45. data/leveldb/include/leveldb/env.h +302 -0
  46. data/leveldb/include/leveldb/iterator.h +100 -0
  47. data/leveldb/include/leveldb/options.h +198 -0
  48. data/leveldb/include/leveldb/slice.h +109 -0
  49. data/leveldb/include/leveldb/status.h +100 -0
  50. data/leveldb/include/leveldb/table.h +70 -0
  51. data/leveldb/include/leveldb/table_builder.h +91 -0
  52. data/leveldb/include/leveldb/write_batch.h +64 -0
  53. data/leveldb/port/port.h +23 -0
  54. data/leveldb/port/port_android.cc +64 -0
  55. data/leveldb/port/port_android.h +150 -0
  56. data/leveldb/port/port_chromium.cc +80 -0
  57. data/leveldb/port/port_chromium.h +97 -0
  58. data/leveldb/port/port_example.h +115 -0
  59. data/leveldb/port/port_osx.cc +50 -0
  60. data/leveldb/port/port_osx.h +125 -0
  61. data/leveldb/port/port_posix.cc +50 -0
  62. data/leveldb/port/port_posix.h +94 -0
  63. data/leveldb/port/sha1_portable.cc +298 -0
  64. data/leveldb/port/sha1_portable.h +25 -0
  65. data/leveldb/port/sha1_test.cc +39 -0
  66. data/leveldb/port/win/stdint.h +24 -0
  67. data/leveldb/table/block.cc +263 -0
  68. data/leveldb/table/block.h +43 -0
  69. data/leveldb/table/block_builder.cc +109 -0
  70. data/leveldb/table/block_builder.h +57 -0
  71. data/leveldb/table/format.cc +131 -0
  72. data/leveldb/table/format.h +103 -0
  73. data/leveldb/table/iterator.cc +67 -0
  74. data/leveldb/table/iterator_wrapper.h +63 -0
  75. data/leveldb/table/merger.cc +197 -0
  76. data/leveldb/table/merger.h +26 -0
  77. data/leveldb/table/table.cc +175 -0
  78. data/leveldb/table/table_builder.cc +227 -0
  79. data/leveldb/table/table_test.cc +845 -0
  80. data/leveldb/table/two_level_iterator.cc +182 -0
  81. data/leveldb/table/two_level_iterator.h +34 -0
  82. data/leveldb/util/arena.cc +68 -0
  83. data/leveldb/util/arena.h +68 -0
  84. data/leveldb/util/arena_test.cc +68 -0
  85. data/leveldb/util/cache.cc +255 -0
  86. data/leveldb/util/cache_test.cc +169 -0
  87. data/leveldb/util/coding.cc +194 -0
  88. data/leveldb/util/coding.h +104 -0
  89. data/leveldb/util/coding_test.cc +173 -0
  90. data/leveldb/util/comparator.cc +72 -0
  91. data/leveldb/util/crc32c.cc +332 -0
  92. data/leveldb/util/crc32c.h +45 -0
  93. data/leveldb/util/crc32c_test.cc +72 -0
  94. data/leveldb/util/env.cc +77 -0
  95. data/leveldb/util/env_chromium.cc +612 -0
  96. data/leveldb/util/env_posix.cc +606 -0
  97. data/leveldb/util/env_test.cc +102 -0
  98. data/leveldb/util/hash.cc +45 -0
  99. data/leveldb/util/hash.h +19 -0
  100. data/leveldb/util/histogram.cc +128 -0
  101. data/leveldb/util/histogram.h +41 -0
  102. data/leveldb/util/logging.cc +81 -0
  103. data/leveldb/util/logging.h +47 -0
  104. data/leveldb/util/mutexlock.h +39 -0
  105. data/leveldb/util/options.cc +28 -0
  106. data/leveldb/util/random.h +59 -0
  107. data/leveldb/util/status.cc +75 -0
  108. data/leveldb/util/testharness.cc +65 -0
  109. data/leveldb/util/testharness.h +129 -0
  110. data/leveldb/util/testutil.cc +51 -0
  111. data/leveldb/util/testutil.h +53 -0
  112. data/lib/leveldb.rb +36 -0
  113. metadata +183 -0
@@ -0,0 +1,1236 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "db/db_impl.h"
6
+
7
+ #include <algorithm>
8
+ #include <set>
9
+ #include <string>
10
+ #include <stdint.h>
11
+ #include <stdio.h>
12
+ #include <vector>
13
+ #include "db/builder.h"
14
+ #include "db/db_iter.h"
15
+ #include "db/dbformat.h"
16
+ #include "db/filename.h"
17
+ #include "db/log_reader.h"
18
+ #include "db/log_writer.h"
19
+ #include "db/memtable.h"
20
+ #include "db/table_cache.h"
21
+ #include "db/version_set.h"
22
+ #include "db/write_batch_internal.h"
23
+ #include "leveldb/db.h"
24
+ #include "leveldb/env.h"
25
+ #include "leveldb/status.h"
26
+ #include "leveldb/table.h"
27
+ #include "leveldb/table_builder.h"
28
+ #include "port/port.h"
29
+ #include "table/block.h"
30
+ #include "table/merger.h"
31
+ #include "table/two_level_iterator.h"
32
+ #include "util/coding.h"
33
+ #include "util/logging.h"
34
+ #include "util/mutexlock.h"
35
+
36
+ namespace leveldb {
37
+
38
+ struct DBImpl::CompactionState {
39
+ Compaction* const compaction;
40
+
41
+ // Sequence numbers < smallest_snapshot are not significant since we
42
+ // will never have to service a snapshot below smallest_snapshot.
43
+ // Therefore if we have seen a sequence number S <= smallest_snapshot,
44
+ // we can drop all entries for the same key with sequence numbers < S.
45
+ SequenceNumber smallest_snapshot;
46
+
47
+ // Files produced by compaction
48
+ struct Output {
49
+ uint64_t number;
50
+ uint64_t file_size;
51
+ InternalKey smallest, largest;
52
+ };
53
+ std::vector<Output> outputs;
54
+
55
+ // State kept for output being generated
56
+ WritableFile* outfile;
57
+ TableBuilder* builder;
58
+
59
+ uint64_t total_bytes;
60
+
61
+ Output* current_output() { return &outputs[outputs.size()-1]; }
62
+
63
+ explicit CompactionState(Compaction* c)
64
+ : compaction(c),
65
+ outfile(NULL),
66
+ builder(NULL),
67
+ total_bytes(0) {
68
+ }
69
+ };
70
+
71
+ namespace {
72
+ class NullWritableFile : public WritableFile {
73
+ public:
74
+ virtual Status Append(const Slice& data) { return Status::OK(); }
75
+ virtual Status Close() { return Status::OK(); }
76
+ virtual Status Flush() { return Status::OK(); }
77
+ virtual Status Sync() { return Status::OK(); }
78
+ };
79
+ }
80
+
81
+ // Fix user-supplied options to be reasonable
82
+ template <class T,class V>
83
+ static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
84
+ if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
85
+ if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
86
+ }
87
+ Options SanitizeOptions(const std::string& dbname,
88
+ const InternalKeyComparator* icmp,
89
+ const Options& src) {
90
+ Options result = src;
91
+ result.comparator = icmp;
92
+ ClipToRange(&result.max_open_files, 20, 50000);
93
+ ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
94
+ ClipToRange(&result.block_size, 1<<10, 4<<20);
95
+ if (result.info_log == NULL) {
96
+ // Open a log file in the same directory as the db
97
+ src.env->CreateDir(dbname); // In case it does not exist
98
+ src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
99
+ Status s = src.env->NewWritableFile(InfoLogFileName(dbname),
100
+ &result.info_log);
101
+ if (!s.ok()) {
102
+ // No place suitable for logging
103
+ result.info_log = new NullWritableFile;
104
+ }
105
+ }
106
+ if (result.block_cache == NULL) {
107
+ result.block_cache = NewLRUCache(8 << 20);
108
+ }
109
+ return result;
110
+ }
111
+
112
+ DBImpl::DBImpl(const Options& options, const std::string& dbname)
113
+ : env_(options.env),
114
+ internal_comparator_(options.comparator),
115
+ options_(SanitizeOptions(dbname, &internal_comparator_, options)),
116
+ owns_info_log_(options_.info_log != options.info_log),
117
+ owns_cache_(options_.block_cache != options.block_cache),
118
+ dbname_(dbname),
119
+ db_lock_(NULL),
120
+ shutting_down_(NULL),
121
+ bg_cv_(&mutex_),
122
+ compacting_cv_(&mutex_),
123
+ mem_(new MemTable(internal_comparator_)),
124
+ imm_(NULL),
125
+ logfile_(NULL),
126
+ log_(NULL),
127
+ bg_compaction_scheduled_(false),
128
+ compacting_(false) {
129
+ mem_->Ref();
130
+ has_imm_.Release_Store(NULL);
131
+
132
+ // Reserve ten files or so for other uses and give the rest to TableCache.
133
+ const int table_cache_size = options.max_open_files - 10;
134
+ table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
135
+
136
+ versions_ = new VersionSet(dbname_, &options_, table_cache_,
137
+ &internal_comparator_);
138
+ }
139
+
140
+ DBImpl::~DBImpl() {
141
+ // Wait for background work to finish
142
+ mutex_.Lock();
143
+ shutting_down_.Release_Store(this); // Any non-NULL value is ok
144
+ if (bg_compaction_scheduled_) {
145
+ while (bg_compaction_scheduled_) {
146
+ bg_cv_.Wait();
147
+ }
148
+ }
149
+ mutex_.Unlock();
150
+
151
+ if (db_lock_ != NULL) {
152
+ env_->UnlockFile(db_lock_);
153
+ }
154
+
155
+ delete versions_;
156
+ if (mem_ != NULL) mem_->Unref();
157
+ if (imm_ != NULL) imm_->Unref();
158
+ delete log_;
159
+ delete logfile_;
160
+ delete table_cache_;
161
+
162
+ if (owns_info_log_) {
163
+ delete options_.info_log;
164
+ }
165
+ if (owns_cache_) {
166
+ delete options_.block_cache;
167
+ }
168
+ }
169
+
170
+ Status DBImpl::NewDB() {
171
+ VersionEdit new_db;
172
+ new_db.SetComparatorName(user_comparator()->Name());
173
+ new_db.SetLogNumber(0);
174
+ new_db.SetNextFile(2);
175
+ new_db.SetLastSequence(0);
176
+
177
+ const std::string manifest = DescriptorFileName(dbname_, 1);
178
+ WritableFile* file;
179
+ Status s = env_->NewWritableFile(manifest, &file);
180
+ if (!s.ok()) {
181
+ return s;
182
+ }
183
+ {
184
+ log::Writer log(file);
185
+ std::string record;
186
+ new_db.EncodeTo(&record);
187
+ s = log.AddRecord(record);
188
+ if (s.ok()) {
189
+ s = file->Close();
190
+ }
191
+ }
192
+ delete file;
193
+ if (s.ok()) {
194
+ // Make "CURRENT" file that points to the new manifest file.
195
+ s = SetCurrentFile(env_, dbname_, 1);
196
+ } else {
197
+ env_->DeleteFile(manifest);
198
+ }
199
+ return s;
200
+ }
201
+
202
+ void DBImpl::MaybeIgnoreError(Status* s) const {
203
+ if (s->ok() || options_.paranoid_checks) {
204
+ // No change needed
205
+ } else {
206
+ Log(env_, options_.info_log, "Ignoring error %s", s->ToString().c_str());
207
+ *s = Status::OK();
208
+ }
209
+ }
210
+
211
+ void DBImpl::DeleteObsoleteFiles() {
212
+ // Make a set of all of the live files
213
+ std::set<uint64_t> live = pending_outputs_;
214
+ versions_->AddLiveFiles(&live);
215
+
216
+ std::vector<std::string> filenames;
217
+ env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
218
+ uint64_t number;
219
+ FileType type;
220
+ for (size_t i = 0; i < filenames.size(); i++) {
221
+ if (ParseFileName(filenames[i], &number, &type)) {
222
+ bool keep = true;
223
+ switch (type) {
224
+ case kLogFile:
225
+ keep = ((number == versions_->LogNumber()) ||
226
+ (number == versions_->PrevLogNumber()));
227
+ break;
228
+ case kDescriptorFile:
229
+ // Keep my manifest file, and any newer incarnations'
230
+ // (in case there is a race that allows other incarnations)
231
+ keep = (number >= versions_->ManifestFileNumber());
232
+ break;
233
+ case kTableFile:
234
+ keep = (live.find(number) != live.end());
235
+ break;
236
+ case kTempFile:
237
+ // Any temp files that are currently being written to must
238
+ // be recorded in pending_outputs_, which is inserted into "live"
239
+ keep = (live.find(number) != live.end());
240
+ break;
241
+ case kCurrentFile:
242
+ case kDBLockFile:
243
+ case kInfoLogFile:
244
+ keep = true;
245
+ break;
246
+ }
247
+
248
+ if (!keep) {
249
+ if (type == kTableFile) {
250
+ table_cache_->Evict(number);
251
+ }
252
+ Log(env_, options_.info_log, "Delete type=%d #%lld\n",
253
+ int(type),
254
+ static_cast<unsigned long long>(number));
255
+ env_->DeleteFile(dbname_ + "/" + filenames[i]);
256
+ }
257
+ }
258
+ }
259
+ }
260
+
261
+ Status DBImpl::Recover(VersionEdit* edit) {
262
+ mutex_.AssertHeld();
263
+
264
+ // Ignore error from CreateDir since the creation of the DB is
265
+ // committed only when the descriptor is created, and this directory
266
+ // may already exist from a previous failed creation attempt.
267
+ env_->CreateDir(dbname_);
268
+ assert(db_lock_ == NULL);
269
+ Status s = env_->LockFile(LockFileName(dbname_), &db_lock_);
270
+ if (!s.ok()) {
271
+ return s;
272
+ }
273
+
274
+ if (!env_->FileExists(CurrentFileName(dbname_))) {
275
+ if (options_.create_if_missing) {
276
+ s = NewDB();
277
+ if (!s.ok()) {
278
+ return s;
279
+ }
280
+ } else {
281
+ return Status::InvalidArgument(
282
+ dbname_, "does not exist (create_if_missing is false)");
283
+ }
284
+ } else {
285
+ if (options_.error_if_exists) {
286
+ return Status::InvalidArgument(
287
+ dbname_, "exists (error_if_exists is true)");
288
+ }
289
+ }
290
+
291
+ s = versions_->Recover();
292
+ if (s.ok()) {
293
+ // Recover from the log files named in the descriptor
294
+ SequenceNumber max_sequence(0);
295
+ if (versions_->PrevLogNumber() != 0) { // log#==0 means no prev log
296
+ s = RecoverLogFile(versions_->PrevLogNumber(), edit, &max_sequence);
297
+ }
298
+ if (s.ok() && versions_->LogNumber() != 0) { // log#==0 for initial state
299
+ s = RecoverLogFile(versions_->LogNumber(), edit, &max_sequence);
300
+ }
301
+ if (s.ok()) {
302
+ if (versions_->LastSequence() < max_sequence) {
303
+ versions_->SetLastSequence(max_sequence);
304
+ }
305
+ }
306
+ }
307
+
308
+ return s;
309
+ }
310
+
311
+ Status DBImpl::RecoverLogFile(uint64_t log_number,
312
+ VersionEdit* edit,
313
+ SequenceNumber* max_sequence) {
314
+ struct LogReporter : public log::Reader::Reporter {
315
+ Env* env;
316
+ WritableFile* info_log;
317
+ const char* fname;
318
+ Status* status; // NULL if options_.paranoid_checks==false
319
+ virtual void Corruption(size_t bytes, const Status& s) {
320
+ Log(env, info_log, "%s%s: dropping %d bytes; %s",
321
+ (this->status == NULL ? "(ignoring error) " : ""),
322
+ fname, static_cast<int>(bytes), s.ToString().c_str());
323
+ if (this->status != NULL && this->status->ok()) *this->status = s;
324
+ }
325
+ };
326
+
327
+ mutex_.AssertHeld();
328
+
329
+ // Open the log file
330
+ std::string fname = LogFileName(dbname_, log_number);
331
+ SequentialFile* file;
332
+ Status status = env_->NewSequentialFile(fname, &file);
333
+ if (!status.ok()) {
334
+ MaybeIgnoreError(&status);
335
+ return status;
336
+ }
337
+
338
+ // Create the log reader.
339
+ LogReporter reporter;
340
+ reporter.env = env_;
341
+ reporter.info_log = options_.info_log;
342
+ reporter.fname = fname.c_str();
343
+ reporter.status = (options_.paranoid_checks ? &status : NULL);
344
+ // We intentially make log::Reader do checksumming even if
345
+ // paranoid_checks==false so that corruptions cause entire commits
346
+ // to be skipped instead of propagating bad information (like overly
347
+ // large sequence numbers).
348
+ log::Reader reader(file, &reporter, true/*checksum*/,
349
+ 0/*initial_offset*/);
350
+ Log(env_, options_.info_log, "Recovering log #%llu",
351
+ (unsigned long long) log_number);
352
+
353
+ // Read all the records and add to a memtable
354
+ std::string scratch;
355
+ Slice record;
356
+ WriteBatch batch;
357
+ MemTable* mem = NULL;
358
+ while (reader.ReadRecord(&record, &scratch) &&
359
+ status.ok()) {
360
+ if (record.size() < 12) {
361
+ reporter.Corruption(
362
+ record.size(), Status::Corruption("log record too small"));
363
+ continue;
364
+ }
365
+ WriteBatchInternal::SetContents(&batch, record);
366
+
367
+ if (mem == NULL) {
368
+ mem = new MemTable(internal_comparator_);
369
+ mem->Ref();
370
+ }
371
+ status = WriteBatchInternal::InsertInto(&batch, mem);
372
+ MaybeIgnoreError(&status);
373
+ if (!status.ok()) {
374
+ break;
375
+ }
376
+ const SequenceNumber last_seq =
377
+ WriteBatchInternal::Sequence(&batch) +
378
+ WriteBatchInternal::Count(&batch) - 1;
379
+ if (last_seq > *max_sequence) {
380
+ *max_sequence = last_seq;
381
+ }
382
+
383
+ if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
384
+ status = WriteLevel0Table(mem, edit);
385
+ if (!status.ok()) {
386
+ // Reflect errors immediately so that conditions like full
387
+ // file-systems cause the DB::Open() to fail.
388
+ break;
389
+ }
390
+ mem->Unref();
391
+ mem = NULL;
392
+ }
393
+ }
394
+
395
+ if (status.ok() && mem != NULL) {
396
+ status = WriteLevel0Table(mem, edit);
397
+ // Reflect errors immediately so that conditions like full
398
+ // file-systems cause the DB::Open() to fail.
399
+ }
400
+
401
+ if (mem != NULL) mem->Unref();
402
+ delete file;
403
+ return status;
404
+ }
405
+
406
+ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit) {
407
+ mutex_.AssertHeld();
408
+ const uint64_t start_micros = env_->NowMicros();
409
+ FileMetaData meta;
410
+ meta.number = versions_->NewFileNumber();
411
+ pending_outputs_.insert(meta.number);
412
+ Iterator* iter = mem->NewIterator();
413
+ Log(env_, options_.info_log, "Level-0 table #%llu: started",
414
+ (unsigned long long) meta.number);
415
+
416
+ Status s;
417
+ {
418
+ mutex_.Unlock();
419
+ s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta, edit);
420
+ mutex_.Lock();
421
+ }
422
+
423
+ Log(env_, options_.info_log, "Level-0 table #%llu: %lld bytes %s",
424
+ (unsigned long long) meta.number,
425
+ (unsigned long long) meta.file_size,
426
+ s.ToString().c_str());
427
+ delete iter;
428
+ pending_outputs_.erase(meta.number);
429
+
430
+ CompactionStats stats;
431
+ stats.micros = env_->NowMicros() - start_micros;
432
+ stats.bytes_written = meta.file_size;
433
+ stats_[0].Add(stats);
434
+ return s;
435
+ }
436
+
437
+ Status DBImpl::CompactMemTable() {
438
+ mutex_.AssertHeld();
439
+ assert(imm_ != NULL);
440
+ assert(compacting_);
441
+
442
+ // Save the contents of the memtable as a new Table
443
+ VersionEdit edit;
444
+ Status s = WriteLevel0Table(imm_, &edit);
445
+
446
+ // Replace immutable memtable with the generated Table
447
+ if (s.ok()) {
448
+ edit.SetPrevLogNumber(0);
449
+ s = versions_->LogAndApply(&edit);
450
+ }
451
+
452
+ if (s.ok()) {
453
+ // Commit to the new state
454
+ imm_->Unref();
455
+ imm_ = NULL;
456
+ has_imm_.Release_Store(NULL);
457
+ DeleteObsoleteFiles();
458
+ }
459
+
460
+ compacting_cv_.SignalAll(); // Wake up waiter even if there was an error
461
+ return s;
462
+ }
463
+
464
+ void DBImpl::TEST_CompactRange(
465
+ int level,
466
+ const std::string& begin,
467
+ const std::string& end) {
468
+ MutexLock l(&mutex_);
469
+ while (compacting_) {
470
+ compacting_cv_.Wait();
471
+ }
472
+ Compaction* c = versions_->CompactRange(
473
+ level,
474
+ InternalKey(begin, kMaxSequenceNumber, kValueTypeForSeek),
475
+ InternalKey(end, 0, static_cast<ValueType>(0)));
476
+
477
+ if (c != NULL) {
478
+ CompactionState* compact = new CompactionState(c);
479
+ DoCompactionWork(compact); // Ignore error in test compaction
480
+ CleanupCompaction(compact);
481
+ }
482
+
483
+ // Start any background compaction that may have been delayed by this thread
484
+ MaybeScheduleCompaction();
485
+ }
486
+
487
+ Status DBImpl::TEST_CompactMemTable() {
488
+ MutexLock l(&mutex_);
489
+ Status s = MakeRoomForWrite(true /* force compaction */);
490
+ if (s.ok()) {
491
+ // Wait until the compaction completes
492
+ while (imm_ != NULL && bg_error_.ok()) {
493
+ compacting_cv_.Wait();
494
+ }
495
+ if (imm_ != NULL) {
496
+ s = bg_error_;
497
+ }
498
+ }
499
+ return s;
500
+ }
501
+
502
+ void DBImpl::MaybeScheduleCompaction() {
503
+ mutex_.AssertHeld();
504
+ if (bg_compaction_scheduled_) {
505
+ // Already scheduled
506
+ } else if (compacting_) {
507
+ // Some other thread is running a compaction. Do not conflict with it.
508
+ } else if (shutting_down_.Acquire_Load()) {
509
+ // DB is being deleted; no more background compactions
510
+ } else if (imm_ == NULL && !versions_->NeedsCompaction()) {
511
+ // No work to be done
512
+ } else {
513
+ bg_compaction_scheduled_ = true;
514
+ env_->Schedule(&DBImpl::BGWork, this);
515
+ }
516
+ }
517
+
518
+ void DBImpl::BGWork(void* db) {
519
+ reinterpret_cast<DBImpl*>(db)->BackgroundCall();
520
+ }
521
+
522
+ void DBImpl::BackgroundCall() {
523
+ MutexLock l(&mutex_);
524
+ assert(bg_compaction_scheduled_);
525
+ if (!shutting_down_.Acquire_Load() &&
526
+ !compacting_) {
527
+ BackgroundCompaction();
528
+ }
529
+ bg_compaction_scheduled_ = false;
530
+ bg_cv_.SignalAll();
531
+
532
+ // Previous compaction may have produced too many files in a level,
533
+ // so reschedule another compaction if needed.
534
+ MaybeScheduleCompaction();
535
+ }
536
+
537
+ void DBImpl::BackgroundCompaction() {
538
+ mutex_.AssertHeld();
539
+ assert(!compacting_);
540
+
541
+ if (imm_ != NULL) {
542
+ compacting_ = true;
543
+ CompactMemTable();
544
+ compacting_ = false;
545
+ compacting_cv_.SignalAll();
546
+ return;
547
+ }
548
+
549
+ Compaction* c = versions_->PickCompaction();
550
+ if (c == NULL) {
551
+ // Nothing to do
552
+ return;
553
+ }
554
+
555
+ Status status;
556
+ if (c->IsTrivialMove()) {
557
+ // Move file to next level
558
+ assert(c->num_input_files(0) == 1);
559
+ FileMetaData* f = c->input(0, 0);
560
+ c->edit()->DeleteFile(c->level(), f->number);
561
+ c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
562
+ f->smallest, f->largest);
563
+ status = versions_->LogAndApply(c->edit());
564
+ Log(env_, options_.info_log, "Moved #%lld to level-%d %lld bytes %s\n",
565
+ static_cast<unsigned long long>(f->number),
566
+ c->level() + 1,
567
+ static_cast<unsigned long long>(f->file_size),
568
+ status.ToString().c_str());
569
+ } else {
570
+ CompactionState* compact = new CompactionState(c);
571
+ status = DoCompactionWork(compact);
572
+ CleanupCompaction(compact);
573
+ }
574
+ delete c;
575
+
576
+ if (status.ok()) {
577
+ // Done
578
+ } else if (shutting_down_.Acquire_Load()) {
579
+ // Ignore compaction errors found during shutting down
580
+ } else {
581
+ Log(env_, options_.info_log,
582
+ "Compaction error: %s", status.ToString().c_str());
583
+ if (options_.paranoid_checks && bg_error_.ok()) {
584
+ bg_error_ = status;
585
+ }
586
+ }
587
+ }
588
+
589
+ void DBImpl::CleanupCompaction(CompactionState* compact) {
590
+ mutex_.AssertHeld();
591
+ if (compact->builder != NULL) {
592
+ // May happen if we get a shutdown call in the middle of compaction
593
+ compact->builder->Abandon();
594
+ delete compact->builder;
595
+ } else {
596
+ assert(compact->outfile == NULL);
597
+ }
598
+ delete compact->outfile;
599
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
600
+ const CompactionState::Output& out = compact->outputs[i];
601
+ pending_outputs_.erase(out.number);
602
+ }
603
+ delete compact;
604
+ }
605
+
606
+ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
607
+ assert(compact != NULL);
608
+ assert(compact->builder == NULL);
609
+ uint64_t file_number;
610
+ {
611
+ mutex_.Lock();
612
+ file_number = versions_->NewFileNumber();
613
+ pending_outputs_.insert(file_number);
614
+ CompactionState::Output out;
615
+ out.number = file_number;
616
+ out.smallest.Clear();
617
+ out.largest.Clear();
618
+ compact->outputs.push_back(out);
619
+ mutex_.Unlock();
620
+ }
621
+
622
+ // Make the output file
623
+ std::string fname = TableFileName(dbname_, file_number);
624
+ Status s = env_->NewWritableFile(fname, &compact->outfile);
625
+ if (s.ok()) {
626
+ compact->builder = new TableBuilder(options_, compact->outfile);
627
+ }
628
+ return s;
629
+ }
630
+
631
+ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
632
+ Iterator* input) {
633
+ assert(compact != NULL);
634
+ assert(compact->outfile != NULL);
635
+ assert(compact->builder != NULL);
636
+
637
+ const uint64_t output_number = compact->current_output()->number;
638
+ assert(output_number != 0);
639
+
640
+ // Check for iterator errors
641
+ Status s = input->status();
642
+ const uint64_t current_entries = compact->builder->NumEntries();
643
+ if (s.ok()) {
644
+ s = compact->builder->Finish();
645
+ } else {
646
+ compact->builder->Abandon();
647
+ }
648
+ const uint64_t current_bytes = compact->builder->FileSize();
649
+ compact->current_output()->file_size = current_bytes;
650
+ compact->total_bytes += current_bytes;
651
+ delete compact->builder;
652
+ compact->builder = NULL;
653
+
654
+ // Finish and check for file errors
655
+ if (s.ok()) {
656
+ s = compact->outfile->Sync();
657
+ }
658
+ if (s.ok()) {
659
+ s = compact->outfile->Close();
660
+ }
661
+ delete compact->outfile;
662
+ compact->outfile = NULL;
663
+
664
+ if (s.ok() && current_entries > 0) {
665
+ // Verify that the table is usable
666
+ Iterator* iter = table_cache_->NewIterator(ReadOptions(),
667
+ output_number,
668
+ current_bytes);
669
+ s = iter->status();
670
+ delete iter;
671
+ if (s.ok()) {
672
+ Log(env_, options_.info_log,
673
+ "Generated table #%llu: %lld keys, %lld bytes",
674
+ (unsigned long long) output_number,
675
+ (unsigned long long) current_entries,
676
+ (unsigned long long) current_bytes);
677
+ }
678
+ }
679
+ return s;
680
+ }
681
+
682
+
683
+ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
684
+ mutex_.AssertHeld();
685
+ Log(env_, options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
686
+ compact->compaction->num_input_files(0),
687
+ compact->compaction->level(),
688
+ compact->compaction->num_input_files(1),
689
+ compact->compaction->level() + 1,
690
+ static_cast<long long>(compact->total_bytes));
691
+
692
+ // Add compaction outputs
693
+ compact->compaction->AddInputDeletions(compact->compaction->edit());
694
+ const int level = compact->compaction->level();
695
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
696
+ const CompactionState::Output& out = compact->outputs[i];
697
+ compact->compaction->edit()->AddFile(
698
+ level + 1,
699
+ out.number, out.file_size, out.smallest, out.largest);
700
+ pending_outputs_.erase(out.number);
701
+ }
702
+ compact->outputs.clear();
703
+
704
+ Status s = versions_->LogAndApply(compact->compaction->edit());
705
+ if (s.ok()) {
706
+ compact->compaction->ReleaseInputs();
707
+ DeleteObsoleteFiles();
708
+ } else {
709
+ // Discard any files we may have created during this failed compaction
710
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
711
+ env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
712
+ }
713
+ }
714
+ return s;
715
+ }
716
+
717
+ Status DBImpl::DoCompactionWork(CompactionState* compact) {
718
+ const uint64_t start_micros = env_->NowMicros();
719
+ int64_t imm_micros = 0; // Micros spent doing imm_ compactions
720
+
721
+ Log(env_, options_.info_log, "Compacting %d@%d + %d@%d files",
722
+ compact->compaction->num_input_files(0),
723
+ compact->compaction->level(),
724
+ compact->compaction->num_input_files(1),
725
+ compact->compaction->level() + 1);
726
+
727
+ assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
728
+ assert(compact->builder == NULL);
729
+ assert(compact->outfile == NULL);
730
+ if (snapshots_.empty()) {
731
+ compact->smallest_snapshot = versions_->LastSequence();
732
+ } else {
733
+ compact->smallest_snapshot = snapshots_.oldest()->number_;
734
+ }
735
+
736
+ // Release mutex while we're actually doing the compaction work
737
+ compacting_ = true;
738
+ mutex_.Unlock();
739
+
740
+ Iterator* input = versions_->MakeInputIterator(compact->compaction);
741
+ input->SeekToFirst();
742
+ Status status;
743
+ ParsedInternalKey ikey;
744
+ std::string current_user_key;
745
+ bool has_current_user_key = false;
746
+ SequenceNumber last_sequence_for_key = kMaxSequenceNumber;
747
+ for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
748
+ // Prioritize immutable compaction work
749
+ if (has_imm_.NoBarrier_Load() != NULL) {
750
+ const uint64_t imm_start = env_->NowMicros();
751
+ mutex_.Lock();
752
+ if (imm_ != NULL) {
753
+ CompactMemTable();
754
+ compacting_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
755
+ }
756
+ mutex_.Unlock();
757
+ imm_micros += (env_->NowMicros() - imm_start);
758
+ }
759
+
760
+ Slice key = input->key();
761
+ if (compact->compaction->ShouldStopBefore(key) &&
762
+ compact->builder != NULL) {
763
+ status = FinishCompactionOutputFile(compact, input);
764
+ if (!status.ok()) {
765
+ break;
766
+ }
767
+ }
768
+
769
+ // Handle key/value, add to state, etc.
770
+ bool drop = false;
771
+ if (!ParseInternalKey(key, &ikey)) {
772
+ // Do not hide error keys
773
+ current_user_key.clear();
774
+ has_current_user_key = false;
775
+ last_sequence_for_key = kMaxSequenceNumber;
776
+ } else {
777
+ if (!has_current_user_key ||
778
+ user_comparator()->Compare(ikey.user_key,
779
+ Slice(current_user_key)) != 0) {
780
+ // First occurrence of this user key
781
+ current_user_key.assign(ikey.user_key.data(), ikey.user_key.size());
782
+ has_current_user_key = true;
783
+ last_sequence_for_key = kMaxSequenceNumber;
784
+ }
785
+
786
+ if (last_sequence_for_key <= compact->smallest_snapshot) {
787
+ // Hidden by an newer entry for same user key
788
+ drop = true; // (A)
789
+ } else if (ikey.type == kTypeDeletion &&
790
+ ikey.sequence <= compact->smallest_snapshot &&
791
+ compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
792
+ // For this user key:
793
+ // (1) there is no data in higher levels
794
+ // (2) data in lower levels will have larger sequence numbers
795
+ // (3) data in layers that are being compacted here and have
796
+ // smaller sequence numbers will be dropped in the next
797
+ // few iterations of this loop (by rule (A) above).
798
+ // Therefore this deletion marker is obsolete and can be dropped.
799
+ drop = true;
800
+ }
801
+
802
+ last_sequence_for_key = ikey.sequence;
803
+ }
804
+ #if 0
805
+ Log(env_, options_.info_log,
806
+ " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
807
+ "%d smallest_snapshot: %d",
808
+ ikey.user_key.ToString().c_str(),
809
+ (int)ikey.sequence, ikey.type, kTypeValue, drop,
810
+ compact->compaction->IsBaseLevelForKey(ikey.user_key),
811
+ (int)last_sequence_for_key, (int)compact->smallest_snapshot);
812
+ #endif
813
+
814
+ if (!drop) {
815
+ // Open output file if necessary
816
+ if (compact->builder == NULL) {
817
+ status = OpenCompactionOutputFile(compact);
818
+ if (!status.ok()) {
819
+ break;
820
+ }
821
+ }
822
+ if (compact->builder->NumEntries() == 0) {
823
+ compact->current_output()->smallest.DecodeFrom(key);
824
+ }
825
+ compact->current_output()->largest.DecodeFrom(key);
826
+ compact->builder->Add(key, input->value());
827
+
828
+ // Close output file if it is big enough
829
+ if (compact->builder->FileSize() >=
830
+ compact->compaction->MaxOutputFileSize()) {
831
+ status = FinishCompactionOutputFile(compact, input);
832
+ if (!status.ok()) {
833
+ break;
834
+ }
835
+ }
836
+ }
837
+
838
+ input->Next();
839
+ }
840
+
841
+ if (status.ok() && shutting_down_.Acquire_Load()) {
842
+ status = Status::IOError("Deleting DB during compaction");
843
+ }
844
+ if (status.ok() && compact->builder != NULL) {
845
+ status = FinishCompactionOutputFile(compact, input);
846
+ }
847
+ if (status.ok()) {
848
+ status = input->status();
849
+ }
850
+ delete input;
851
+ input = NULL;
852
+
853
+ CompactionStats stats;
854
+ stats.micros = env_->NowMicros() - start_micros - imm_micros;
855
+ for (int which = 0; which < 2; which++) {
856
+ for (int i = 0; i < compact->compaction->num_input_files(which); i++) {
857
+ stats.bytes_read += compact->compaction->input(which, i)->file_size;
858
+ }
859
+ }
860
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
861
+ stats.bytes_written += compact->outputs[i].file_size;
862
+ }
863
+
864
+ mutex_.Lock();
865
+ stats_[compact->compaction->level() + 1].Add(stats);
866
+
867
+ if (status.ok()) {
868
+ status = InstallCompactionResults(compact);
869
+ }
870
+ compacting_ = false;
871
+ compacting_cv_.SignalAll();
872
+ VersionSet::LevelSummaryStorage tmp;
873
+ Log(env_, options_.info_log,
874
+ "compacted to: %s", versions_->LevelSummary(&tmp));
875
+ return status;
876
+ }
877
+
878
+ namespace {
879
+ struct IterState {
880
+ port::Mutex* mu;
881
+ Version* version;
882
+ MemTable* mem;
883
+ MemTable* imm;
884
+ };
885
+
886
+ static void CleanupIteratorState(void* arg1, void* arg2) {
887
+ IterState* state = reinterpret_cast<IterState*>(arg1);
888
+ state->mu->Lock();
889
+ state->mem->Unref();
890
+ if (state->imm != NULL) state->imm->Unref();
891
+ state->version->Unref();
892
+ state->mu->Unlock();
893
+ delete state;
894
+ }
895
+ }
896
+
897
+ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
898
+ SequenceNumber* latest_snapshot) {
899
+ IterState* cleanup = new IterState;
900
+ mutex_.Lock();
901
+ *latest_snapshot = versions_->LastSequence();
902
+
903
+ // Collect together all needed child iterators
904
+ std::vector<Iterator*> list;
905
+ list.push_back(mem_->NewIterator());
906
+ mem_->Ref();
907
+ if (imm_ != NULL) {
908
+ list.push_back(imm_->NewIterator());
909
+ imm_->Ref();
910
+ }
911
+ versions_->current()->AddIterators(options, &list);
912
+ Iterator* internal_iter =
913
+ NewMergingIterator(&internal_comparator_, &list[0], list.size());
914
+ versions_->current()->Ref();
915
+
916
+ cleanup->mu = &mutex_;
917
+ cleanup->mem = mem_;
918
+ cleanup->imm = imm_;
919
+ cleanup->version = versions_->current();
920
+ internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL);
921
+
922
+ mutex_.Unlock();
923
+ return internal_iter;
924
+ }
925
+
926
+ Iterator* DBImpl::TEST_NewInternalIterator() {
927
+ SequenceNumber ignored;
928
+ return NewInternalIterator(ReadOptions(), &ignored);
929
+ }
930
+
931
+ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
932
+ MutexLock l(&mutex_);
933
+ return versions_->MaxNextLevelOverlappingBytes();
934
+ }
935
+
936
+ Status DBImpl::Get(const ReadOptions& options,
937
+ const Slice& key,
938
+ std::string* value) {
939
+ // TODO(opt): faster implementation
940
+ Iterator* iter = NewIterator(options);
941
+ iter->Seek(key);
942
+ bool found = false;
943
+ if (iter->Valid() && user_comparator()->Compare(key, iter->key()) == 0) {
944
+ Slice v = iter->value();
945
+ value->assign(v.data(), v.size());
946
+ found = true;
947
+ }
948
+ // Non-OK iterator status trumps everything else
949
+ Status result = iter->status();
950
+ if (result.ok() && !found) {
951
+ result = Status::NotFound(Slice()); // Use an empty error message for speed
952
+ }
953
+ delete iter;
954
+ return result;
955
+ }
956
+
957
+ Iterator* DBImpl::NewIterator(const ReadOptions& options) {
958
+ SequenceNumber latest_snapshot;
959
+ Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
960
+ return NewDBIterator(
961
+ &dbname_, env_, user_comparator(), internal_iter,
962
+ (options.snapshot != NULL
963
+ ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
964
+ : latest_snapshot));
965
+ }
966
+
967
+ const Snapshot* DBImpl::GetSnapshot() {
968
+ MutexLock l(&mutex_);
969
+ return snapshots_.New(versions_->LastSequence());
970
+ }
971
+
972
+ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
973
+ MutexLock l(&mutex_);
974
+ snapshots_.Delete(reinterpret_cast<const SnapshotImpl*>(s));
975
+ }
976
+
977
+ // Convenience methods
978
+ Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) {
979
+ return DB::Put(o, key, val);
980
+ }
981
+
982
+ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
983
+ return DB::Delete(options, key);
984
+ }
985
+
986
+ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
987
+ Status status;
988
+ MutexLock l(&mutex_);
989
+ status = MakeRoomForWrite(false); // May temporarily release lock and wait
990
+ uint64_t last_sequence = versions_->LastSequence();
991
+ if (status.ok()) {
992
+ WriteBatchInternal::SetSequence(updates, last_sequence + 1);
993
+ last_sequence += WriteBatchInternal::Count(updates);
994
+ versions_->SetLastSequence(last_sequence);
995
+
996
+ // Add to log and apply to memtable
997
+ status = log_->AddRecord(WriteBatchInternal::Contents(updates));
998
+ if (status.ok() && options.sync) {
999
+ status = logfile_->Sync();
1000
+ }
1001
+ if (status.ok()) {
1002
+ status = WriteBatchInternal::InsertInto(updates, mem_);
1003
+ }
1004
+ }
1005
+ if (options.post_write_snapshot != NULL) {
1006
+ *options.post_write_snapshot =
1007
+ status.ok() ? snapshots_.New(last_sequence) : NULL;
1008
+ }
1009
+ return status;
1010
+ }
1011
+
1012
+ Status DBImpl::MakeRoomForWrite(bool force) {
1013
+ mutex_.AssertHeld();
1014
+ bool allow_delay = !force;
1015
+ Status s;
1016
+ while (true) {
1017
+ if (!bg_error_.ok()) {
1018
+ // Yield previous error
1019
+ s = bg_error_;
1020
+ break;
1021
+ } else if (
1022
+ allow_delay &&
1023
+ versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) {
1024
+ // We are getting close to hitting a hard limit on the number of
1025
+ // L0 files. Rather than delaying a single write by several
1026
+ // seconds when we hit the hard limit, start delaying each
1027
+ // individual write by 1ms to reduce latency variance. Also,
1028
+ // this delay hands over some CPU to the compaction thread in
1029
+ // case it is sharing the same core as the writer.
1030
+ mutex_.Unlock();
1031
+ env_->SleepForMicroseconds(1000);
1032
+ allow_delay = false; // Do not delay a single write more than once
1033
+ mutex_.Lock();
1034
+ } else if (!force &&
1035
+ (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) {
1036
+ // There is room in current memtable
1037
+ break;
1038
+ } else if (imm_ != NULL) {
1039
+ // We have filled up the current memtable, but the previous
1040
+ // one is still being compacted, so we wait.
1041
+ compacting_cv_.Wait();
1042
+ } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
1043
+ // There are too many level-0 files.
1044
+ compacting_cv_.Wait();
1045
+ } else {
1046
+ // Attempt to switch to a new memtable and trigger compaction of old
1047
+ assert(versions_->PrevLogNumber() == 0);
1048
+ uint64_t new_log_number = versions_->NewFileNumber();
1049
+ WritableFile* lfile = NULL;
1050
+ s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile);
1051
+ if (!s.ok()) {
1052
+ break;
1053
+ }
1054
+ VersionEdit edit;
1055
+ edit.SetPrevLogNumber(versions_->LogNumber());
1056
+ edit.SetLogNumber(new_log_number);
1057
+ s = versions_->LogAndApply(&edit);
1058
+ if (!s.ok()) {
1059
+ delete lfile;
1060
+ env_->DeleteFile(LogFileName(dbname_, new_log_number));
1061
+ break;
1062
+ }
1063
+ delete log_;
1064
+ delete logfile_;
1065
+ logfile_ = lfile;
1066
+ log_ = new log::Writer(lfile);
1067
+ imm_ = mem_;
1068
+ has_imm_.Release_Store(imm_);
1069
+ mem_ = new MemTable(internal_comparator_);
1070
+ mem_->Ref();
1071
+ force = false; // Do not force another compaction if have room
1072
+ MaybeScheduleCompaction();
1073
+ }
1074
+ }
1075
+ return s;
1076
+ }
1077
+
1078
+ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
1079
+ value->clear();
1080
+
1081
+ MutexLock l(&mutex_);
1082
+ Slice in = property;
1083
+ Slice prefix("leveldb.");
1084
+ if (!in.starts_with(prefix)) return false;
1085
+ in.remove_prefix(prefix.size());
1086
+
1087
+ if (in.starts_with("num-files-at-level")) {
1088
+ in.remove_prefix(strlen("num-files-at-level"));
1089
+ uint64_t level;
1090
+ bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
1091
+ if (!ok || level < 0 || level >= config::kNumLevels) {
1092
+ return false;
1093
+ } else {
1094
+ char buf[100];
1095
+ snprintf(buf, sizeof(buf), "%d",
1096
+ versions_->NumLevelFiles(static_cast<int>(level)));
1097
+ *value = buf;
1098
+ return true;
1099
+ }
1100
+ } else if (in == "stats") {
1101
+ char buf[200];
1102
+ snprintf(buf, sizeof(buf),
1103
+ " Compactions\n"
1104
+ "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n"
1105
+ "--------------------------------------------------\n"
1106
+ );
1107
+ value->append(buf);
1108
+ for (int level = 0; level < config::kNumLevels; level++) {
1109
+ int files = versions_->NumLevelFiles(level);
1110
+ if (stats_[level].micros > 0 || files > 0) {
1111
+ snprintf(
1112
+ buf, sizeof(buf),
1113
+ "%3d %8d %8.0f %9.0f %8.0f %9.0f\n",
1114
+ level,
1115
+ files,
1116
+ versions_->NumLevelBytes(level) / 1048576.0,
1117
+ stats_[level].micros / 1e6,
1118
+ stats_[level].bytes_read / 1048576.0,
1119
+ stats_[level].bytes_written / 1048576.0);
1120
+ value->append(buf);
1121
+ }
1122
+ }
1123
+ return true;
1124
+ }
1125
+
1126
+ return false;
1127
+ }
1128
+
1129
+ void DBImpl::GetApproximateSizes(
1130
+ const Range* range, int n,
1131
+ uint64_t* sizes) {
1132
+ // TODO(opt): better implementation
1133
+ Version* v;
1134
+ {
1135
+ MutexLock l(&mutex_);
1136
+ versions_->current()->Ref();
1137
+ v = versions_->current();
1138
+ }
1139
+
1140
+ for (int i = 0; i < n; i++) {
1141
+ // Convert user_key into a corresponding internal key.
1142
+ InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
1143
+ InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
1144
+ uint64_t start = versions_->ApproximateOffsetOf(v, k1);
1145
+ uint64_t limit = versions_->ApproximateOffsetOf(v, k2);
1146
+ sizes[i] = (limit >= start ? limit - start : 0);
1147
+ }
1148
+
1149
+ {
1150
+ MutexLock l(&mutex_);
1151
+ v->Unref();
1152
+ }
1153
+ }
1154
+
1155
+ // Default implementations of convenience methods that subclasses of DB
1156
+ // can call if they wish
1157
+ Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
1158
+ WriteBatch batch;
1159
+ batch.Put(key, value);
1160
+ return Write(opt, &batch);
1161
+ }
1162
+
1163
+ Status DB::Delete(const WriteOptions& opt, const Slice& key) {
1164
+ WriteBatch batch;
1165
+ batch.Delete(key);
1166
+ return Write(opt, &batch);
1167
+ }
1168
+
1169
+ DB::~DB() { }
1170
+
1171
+ Status DB::Open(const Options& options, const std::string& dbname,
1172
+ DB** dbptr) {
1173
+ *dbptr = NULL;
1174
+
1175
+ DBImpl* impl = new DBImpl(options, dbname);
1176
+ impl->mutex_.Lock();
1177
+ VersionEdit edit;
1178
+ Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists
1179
+ if (s.ok()) {
1180
+ uint64_t new_log_number = impl->versions_->NewFileNumber();
1181
+ WritableFile* lfile;
1182
+ s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
1183
+ &lfile);
1184
+ if (s.ok()) {
1185
+ edit.SetLogNumber(new_log_number);
1186
+ impl->logfile_ = lfile;
1187
+ impl->log_ = new log::Writer(lfile);
1188
+ s = impl->versions_->LogAndApply(&edit);
1189
+ }
1190
+ if (s.ok()) {
1191
+ impl->DeleteObsoleteFiles();
1192
+ impl->MaybeScheduleCompaction();
1193
+ }
1194
+ }
1195
+ impl->mutex_.Unlock();
1196
+ if (s.ok()) {
1197
+ *dbptr = impl;
1198
+ } else {
1199
+ delete impl;
1200
+ }
1201
+ return s;
1202
+ }
1203
+
1204
+ Snapshot::~Snapshot() {
1205
+ }
1206
+
1207
+ Status DestroyDB(const std::string& dbname, const Options& options) {
1208
+ Env* env = options.env;
1209
+ std::vector<std::string> filenames;
1210
+ // Ignore error in case directory does not exist
1211
+ env->GetChildren(dbname, &filenames);
1212
+ if (filenames.empty()) {
1213
+ return Status::OK();
1214
+ }
1215
+
1216
+ FileLock* lock;
1217
+ Status result = env->LockFile(LockFileName(dbname), &lock);
1218
+ if (result.ok()) {
1219
+ uint64_t number;
1220
+ FileType type;
1221
+ for (size_t i = 0; i < filenames.size(); i++) {
1222
+ if (ParseFileName(filenames[i], &number, &type)) {
1223
+ Status del = env->DeleteFile(dbname + "/" + filenames[i]);
1224
+ if (result.ok() && !del.ok()) {
1225
+ result = del;
1226
+ }
1227
+ }
1228
+ }
1229
+ env->UnlockFile(lock); // Ignore error since state is already gone
1230
+ env->DeleteFile(LockFileName(dbname));
1231
+ env->DeleteDir(dbname); // Ignore error in case dir contains other files
1232
+ }
1233
+ return result;
1234
+ }
1235
+
1236
+ }