leveldb-ruby 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. data/README +17 -0
  2. data/ext/leveldb/extconf.rb +10 -0
  3. data/ext/leveldb/leveldb.cc +181 -0
  4. data/leveldb/Makefile +172 -0
  5. data/leveldb/db/builder.cc +90 -0
  6. data/leveldb/db/builder.h +36 -0
  7. data/leveldb/db/corruption_test.cc +354 -0
  8. data/leveldb/db/db_bench.cc +677 -0
  9. data/leveldb/db/db_impl.cc +1236 -0
  10. data/leveldb/db/db_impl.h +180 -0
  11. data/leveldb/db/db_iter.cc +298 -0
  12. data/leveldb/db/db_iter.h +26 -0
  13. data/leveldb/db/db_test.cc +1192 -0
  14. data/leveldb/db/dbformat.cc +87 -0
  15. data/leveldb/db/dbformat.h +165 -0
  16. data/leveldb/db/dbformat_test.cc +112 -0
  17. data/leveldb/db/filename.cc +135 -0
  18. data/leveldb/db/filename.h +80 -0
  19. data/leveldb/db/filename_test.cc +122 -0
  20. data/leveldb/db/log_format.h +35 -0
  21. data/leveldb/db/log_reader.cc +254 -0
  22. data/leveldb/db/log_reader.h +108 -0
  23. data/leveldb/db/log_test.cc +500 -0
  24. data/leveldb/db/log_writer.cc +103 -0
  25. data/leveldb/db/log_writer.h +48 -0
  26. data/leveldb/db/memtable.cc +108 -0
  27. data/leveldb/db/memtable.h +85 -0
  28. data/leveldb/db/repair.cc +384 -0
  29. data/leveldb/db/skiplist.h +378 -0
  30. data/leveldb/db/skiplist_test.cc +378 -0
  31. data/leveldb/db/snapshot.h +66 -0
  32. data/leveldb/db/table_cache.cc +95 -0
  33. data/leveldb/db/table_cache.h +50 -0
  34. data/leveldb/db/version_edit.cc +268 -0
  35. data/leveldb/db/version_edit.h +106 -0
  36. data/leveldb/db/version_edit_test.cc +46 -0
  37. data/leveldb/db/version_set.cc +1060 -0
  38. data/leveldb/db/version_set.h +306 -0
  39. data/leveldb/db/write_batch.cc +138 -0
  40. data/leveldb/db/write_batch_internal.h +45 -0
  41. data/leveldb/db/write_batch_test.cc +89 -0
  42. data/leveldb/include/leveldb/cache.h +99 -0
  43. data/leveldb/include/leveldb/comparator.h +63 -0
  44. data/leveldb/include/leveldb/db.h +148 -0
  45. data/leveldb/include/leveldb/env.h +302 -0
  46. data/leveldb/include/leveldb/iterator.h +100 -0
  47. data/leveldb/include/leveldb/options.h +198 -0
  48. data/leveldb/include/leveldb/slice.h +109 -0
  49. data/leveldb/include/leveldb/status.h +100 -0
  50. data/leveldb/include/leveldb/table.h +70 -0
  51. data/leveldb/include/leveldb/table_builder.h +91 -0
  52. data/leveldb/include/leveldb/write_batch.h +64 -0
  53. data/leveldb/port/port.h +23 -0
  54. data/leveldb/port/port_android.cc +64 -0
  55. data/leveldb/port/port_android.h +150 -0
  56. data/leveldb/port/port_chromium.cc +80 -0
  57. data/leveldb/port/port_chromium.h +97 -0
  58. data/leveldb/port/port_example.h +115 -0
  59. data/leveldb/port/port_osx.cc +50 -0
  60. data/leveldb/port/port_osx.h +125 -0
  61. data/leveldb/port/port_posix.cc +50 -0
  62. data/leveldb/port/port_posix.h +94 -0
  63. data/leveldb/port/sha1_portable.cc +298 -0
  64. data/leveldb/port/sha1_portable.h +25 -0
  65. data/leveldb/port/sha1_test.cc +39 -0
  66. data/leveldb/port/win/stdint.h +24 -0
  67. data/leveldb/table/block.cc +263 -0
  68. data/leveldb/table/block.h +43 -0
  69. data/leveldb/table/block_builder.cc +109 -0
  70. data/leveldb/table/block_builder.h +57 -0
  71. data/leveldb/table/format.cc +131 -0
  72. data/leveldb/table/format.h +103 -0
  73. data/leveldb/table/iterator.cc +67 -0
  74. data/leveldb/table/iterator_wrapper.h +63 -0
  75. data/leveldb/table/merger.cc +197 -0
  76. data/leveldb/table/merger.h +26 -0
  77. data/leveldb/table/table.cc +175 -0
  78. data/leveldb/table/table_builder.cc +227 -0
  79. data/leveldb/table/table_test.cc +845 -0
  80. data/leveldb/table/two_level_iterator.cc +182 -0
  81. data/leveldb/table/two_level_iterator.h +34 -0
  82. data/leveldb/util/arena.cc +68 -0
  83. data/leveldb/util/arena.h +68 -0
  84. data/leveldb/util/arena_test.cc +68 -0
  85. data/leveldb/util/cache.cc +255 -0
  86. data/leveldb/util/cache_test.cc +169 -0
  87. data/leveldb/util/coding.cc +194 -0
  88. data/leveldb/util/coding.h +104 -0
  89. data/leveldb/util/coding_test.cc +173 -0
  90. data/leveldb/util/comparator.cc +72 -0
  91. data/leveldb/util/crc32c.cc +332 -0
  92. data/leveldb/util/crc32c.h +45 -0
  93. data/leveldb/util/crc32c_test.cc +72 -0
  94. data/leveldb/util/env.cc +77 -0
  95. data/leveldb/util/env_chromium.cc +612 -0
  96. data/leveldb/util/env_posix.cc +606 -0
  97. data/leveldb/util/env_test.cc +102 -0
  98. data/leveldb/util/hash.cc +45 -0
  99. data/leveldb/util/hash.h +19 -0
  100. data/leveldb/util/histogram.cc +128 -0
  101. data/leveldb/util/histogram.h +41 -0
  102. data/leveldb/util/logging.cc +81 -0
  103. data/leveldb/util/logging.h +47 -0
  104. data/leveldb/util/mutexlock.h +39 -0
  105. data/leveldb/util/options.cc +28 -0
  106. data/leveldb/util/random.h +59 -0
  107. data/leveldb/util/status.cc +75 -0
  108. data/leveldb/util/testharness.cc +65 -0
  109. data/leveldb/util/testharness.h +129 -0
  110. data/leveldb/util/testutil.cc +51 -0
  111. data/leveldb/util/testutil.h +53 -0
  112. data/lib/leveldb.rb +36 -0
  113. metadata +183 -0
@@ -0,0 +1,1236 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "db/db_impl.h"
6
+
7
+ #include <algorithm>
8
+ #include <set>
9
+ #include <string>
10
+ #include <stdint.h>
11
+ #include <stdio.h>
12
+ #include <vector>
13
+ #include "db/builder.h"
14
+ #include "db/db_iter.h"
15
+ #include "db/dbformat.h"
16
+ #include "db/filename.h"
17
+ #include "db/log_reader.h"
18
+ #include "db/log_writer.h"
19
+ #include "db/memtable.h"
20
+ #include "db/table_cache.h"
21
+ #include "db/version_set.h"
22
+ #include "db/write_batch_internal.h"
23
+ #include "leveldb/db.h"
24
+ #include "leveldb/env.h"
25
+ #include "leveldb/status.h"
26
+ #include "leveldb/table.h"
27
+ #include "leveldb/table_builder.h"
28
+ #include "port/port.h"
29
+ #include "table/block.h"
30
+ #include "table/merger.h"
31
+ #include "table/two_level_iterator.h"
32
+ #include "util/coding.h"
33
+ #include "util/logging.h"
34
+ #include "util/mutexlock.h"
35
+
36
+ namespace leveldb {
37
+
38
+ struct DBImpl::CompactionState {
39
+ Compaction* const compaction;
40
+
41
+ // Sequence numbers < smallest_snapshot are not significant since we
42
+ // will never have to service a snapshot below smallest_snapshot.
43
+ // Therefore if we have seen a sequence number S <= smallest_snapshot,
44
+ // we can drop all entries for the same key with sequence numbers < S.
45
+ SequenceNumber smallest_snapshot;
46
+
47
+ // Files produced by compaction
48
+ struct Output {
49
+ uint64_t number;
50
+ uint64_t file_size;
51
+ InternalKey smallest, largest;
52
+ };
53
+ std::vector<Output> outputs;
54
+
55
+ // State kept for output being generated
56
+ WritableFile* outfile;
57
+ TableBuilder* builder;
58
+
59
+ uint64_t total_bytes;
60
+
61
+ Output* current_output() { return &outputs[outputs.size()-1]; }
62
+
63
+ explicit CompactionState(Compaction* c)
64
+ : compaction(c),
65
+ outfile(NULL),
66
+ builder(NULL),
67
+ total_bytes(0) {
68
+ }
69
+ };
70
+
71
+ namespace {
72
+ class NullWritableFile : public WritableFile {
73
+ public:
74
+ virtual Status Append(const Slice& data) { return Status::OK(); }
75
+ virtual Status Close() { return Status::OK(); }
76
+ virtual Status Flush() { return Status::OK(); }
77
+ virtual Status Sync() { return Status::OK(); }
78
+ };
79
+ }
80
+
81
+ // Fix user-supplied options to be reasonable
82
+ template <class T,class V>
83
+ static void ClipToRange(T* ptr, V minvalue, V maxvalue) {
84
+ if (static_cast<V>(*ptr) > maxvalue) *ptr = maxvalue;
85
+ if (static_cast<V>(*ptr) < minvalue) *ptr = minvalue;
86
+ }
87
+ Options SanitizeOptions(const std::string& dbname,
88
+ const InternalKeyComparator* icmp,
89
+ const Options& src) {
90
+ Options result = src;
91
+ result.comparator = icmp;
92
+ ClipToRange(&result.max_open_files, 20, 50000);
93
+ ClipToRange(&result.write_buffer_size, 64<<10, 1<<30);
94
+ ClipToRange(&result.block_size, 1<<10, 4<<20);
95
+ if (result.info_log == NULL) {
96
+ // Open a log file in the same directory as the db
97
+ src.env->CreateDir(dbname); // In case it does not exist
98
+ src.env->RenameFile(InfoLogFileName(dbname), OldInfoLogFileName(dbname));
99
+ Status s = src.env->NewWritableFile(InfoLogFileName(dbname),
100
+ &result.info_log);
101
+ if (!s.ok()) {
102
+ // No place suitable for logging
103
+ result.info_log = new NullWritableFile;
104
+ }
105
+ }
106
+ if (result.block_cache == NULL) {
107
+ result.block_cache = NewLRUCache(8 << 20);
108
+ }
109
+ return result;
110
+ }
111
+
112
+ DBImpl::DBImpl(const Options& options, const std::string& dbname)
113
+ : env_(options.env),
114
+ internal_comparator_(options.comparator),
115
+ options_(SanitizeOptions(dbname, &internal_comparator_, options)),
116
+ owns_info_log_(options_.info_log != options.info_log),
117
+ owns_cache_(options_.block_cache != options.block_cache),
118
+ dbname_(dbname),
119
+ db_lock_(NULL),
120
+ shutting_down_(NULL),
121
+ bg_cv_(&mutex_),
122
+ compacting_cv_(&mutex_),
123
+ mem_(new MemTable(internal_comparator_)),
124
+ imm_(NULL),
125
+ logfile_(NULL),
126
+ log_(NULL),
127
+ bg_compaction_scheduled_(false),
128
+ compacting_(false) {
129
+ mem_->Ref();
130
+ has_imm_.Release_Store(NULL);
131
+
132
+ // Reserve ten files or so for other uses and give the rest to TableCache.
133
+ const int table_cache_size = options.max_open_files - 10;
134
+ table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
135
+
136
+ versions_ = new VersionSet(dbname_, &options_, table_cache_,
137
+ &internal_comparator_);
138
+ }
139
+
140
+ DBImpl::~DBImpl() {
141
+ // Wait for background work to finish
142
+ mutex_.Lock();
143
+ shutting_down_.Release_Store(this); // Any non-NULL value is ok
144
+ if (bg_compaction_scheduled_) {
145
+ while (bg_compaction_scheduled_) {
146
+ bg_cv_.Wait();
147
+ }
148
+ }
149
+ mutex_.Unlock();
150
+
151
+ if (db_lock_ != NULL) {
152
+ env_->UnlockFile(db_lock_);
153
+ }
154
+
155
+ delete versions_;
156
+ if (mem_ != NULL) mem_->Unref();
157
+ if (imm_ != NULL) imm_->Unref();
158
+ delete log_;
159
+ delete logfile_;
160
+ delete table_cache_;
161
+
162
+ if (owns_info_log_) {
163
+ delete options_.info_log;
164
+ }
165
+ if (owns_cache_) {
166
+ delete options_.block_cache;
167
+ }
168
+ }
169
+
170
+ Status DBImpl::NewDB() {
171
+ VersionEdit new_db;
172
+ new_db.SetComparatorName(user_comparator()->Name());
173
+ new_db.SetLogNumber(0);
174
+ new_db.SetNextFile(2);
175
+ new_db.SetLastSequence(0);
176
+
177
+ const std::string manifest = DescriptorFileName(dbname_, 1);
178
+ WritableFile* file;
179
+ Status s = env_->NewWritableFile(manifest, &file);
180
+ if (!s.ok()) {
181
+ return s;
182
+ }
183
+ {
184
+ log::Writer log(file);
185
+ std::string record;
186
+ new_db.EncodeTo(&record);
187
+ s = log.AddRecord(record);
188
+ if (s.ok()) {
189
+ s = file->Close();
190
+ }
191
+ }
192
+ delete file;
193
+ if (s.ok()) {
194
+ // Make "CURRENT" file that points to the new manifest file.
195
+ s = SetCurrentFile(env_, dbname_, 1);
196
+ } else {
197
+ env_->DeleteFile(manifest);
198
+ }
199
+ return s;
200
+ }
201
+
202
+ void DBImpl::MaybeIgnoreError(Status* s) const {
203
+ if (s->ok() || options_.paranoid_checks) {
204
+ // No change needed
205
+ } else {
206
+ Log(env_, options_.info_log, "Ignoring error %s", s->ToString().c_str());
207
+ *s = Status::OK();
208
+ }
209
+ }
210
+
211
+ void DBImpl::DeleteObsoleteFiles() {
212
+ // Make a set of all of the live files
213
+ std::set<uint64_t> live = pending_outputs_;
214
+ versions_->AddLiveFiles(&live);
215
+
216
+ std::vector<std::string> filenames;
217
+ env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose
218
+ uint64_t number;
219
+ FileType type;
220
+ for (size_t i = 0; i < filenames.size(); i++) {
221
+ if (ParseFileName(filenames[i], &number, &type)) {
222
+ bool keep = true;
223
+ switch (type) {
224
+ case kLogFile:
225
+ keep = ((number == versions_->LogNumber()) ||
226
+ (number == versions_->PrevLogNumber()));
227
+ break;
228
+ case kDescriptorFile:
229
+ // Keep my manifest file, and any newer incarnations'
230
+ // (in case there is a race that allows other incarnations)
231
+ keep = (number >= versions_->ManifestFileNumber());
232
+ break;
233
+ case kTableFile:
234
+ keep = (live.find(number) != live.end());
235
+ break;
236
+ case kTempFile:
237
+ // Any temp files that are currently being written to must
238
+ // be recorded in pending_outputs_, which is inserted into "live"
239
+ keep = (live.find(number) != live.end());
240
+ break;
241
+ case kCurrentFile:
242
+ case kDBLockFile:
243
+ case kInfoLogFile:
244
+ keep = true;
245
+ break;
246
+ }
247
+
248
+ if (!keep) {
249
+ if (type == kTableFile) {
250
+ table_cache_->Evict(number);
251
+ }
252
+ Log(env_, options_.info_log, "Delete type=%d #%lld\n",
253
+ int(type),
254
+ static_cast<unsigned long long>(number));
255
+ env_->DeleteFile(dbname_ + "/" + filenames[i]);
256
+ }
257
+ }
258
+ }
259
+ }
260
+
261
+ Status DBImpl::Recover(VersionEdit* edit) {
262
+ mutex_.AssertHeld();
263
+
264
+ // Ignore error from CreateDir since the creation of the DB is
265
+ // committed only when the descriptor is created, and this directory
266
+ // may already exist from a previous failed creation attempt.
267
+ env_->CreateDir(dbname_);
268
+ assert(db_lock_ == NULL);
269
+ Status s = env_->LockFile(LockFileName(dbname_), &db_lock_);
270
+ if (!s.ok()) {
271
+ return s;
272
+ }
273
+
274
+ if (!env_->FileExists(CurrentFileName(dbname_))) {
275
+ if (options_.create_if_missing) {
276
+ s = NewDB();
277
+ if (!s.ok()) {
278
+ return s;
279
+ }
280
+ } else {
281
+ return Status::InvalidArgument(
282
+ dbname_, "does not exist (create_if_missing is false)");
283
+ }
284
+ } else {
285
+ if (options_.error_if_exists) {
286
+ return Status::InvalidArgument(
287
+ dbname_, "exists (error_if_exists is true)");
288
+ }
289
+ }
290
+
291
+ s = versions_->Recover();
292
+ if (s.ok()) {
293
+ // Recover from the log files named in the descriptor
294
+ SequenceNumber max_sequence(0);
295
+ if (versions_->PrevLogNumber() != 0) { // log#==0 means no prev log
296
+ s = RecoverLogFile(versions_->PrevLogNumber(), edit, &max_sequence);
297
+ }
298
+ if (s.ok() && versions_->LogNumber() != 0) { // log#==0 for initial state
299
+ s = RecoverLogFile(versions_->LogNumber(), edit, &max_sequence);
300
+ }
301
+ if (s.ok()) {
302
+ if (versions_->LastSequence() < max_sequence) {
303
+ versions_->SetLastSequence(max_sequence);
304
+ }
305
+ }
306
+ }
307
+
308
+ return s;
309
+ }
310
+
311
+ Status DBImpl::RecoverLogFile(uint64_t log_number,
312
+ VersionEdit* edit,
313
+ SequenceNumber* max_sequence) {
314
+ struct LogReporter : public log::Reader::Reporter {
315
+ Env* env;
316
+ WritableFile* info_log;
317
+ const char* fname;
318
+ Status* status; // NULL if options_.paranoid_checks==false
319
+ virtual void Corruption(size_t bytes, const Status& s) {
320
+ Log(env, info_log, "%s%s: dropping %d bytes; %s",
321
+ (this->status == NULL ? "(ignoring error) " : ""),
322
+ fname, static_cast<int>(bytes), s.ToString().c_str());
323
+ if (this->status != NULL && this->status->ok()) *this->status = s;
324
+ }
325
+ };
326
+
327
+ mutex_.AssertHeld();
328
+
329
+ // Open the log file
330
+ std::string fname = LogFileName(dbname_, log_number);
331
+ SequentialFile* file;
332
+ Status status = env_->NewSequentialFile(fname, &file);
333
+ if (!status.ok()) {
334
+ MaybeIgnoreError(&status);
335
+ return status;
336
+ }
337
+
338
+ // Create the log reader.
339
+ LogReporter reporter;
340
+ reporter.env = env_;
341
+ reporter.info_log = options_.info_log;
342
+ reporter.fname = fname.c_str();
343
+ reporter.status = (options_.paranoid_checks ? &status : NULL);
344
+ // We intentially make log::Reader do checksumming even if
345
+ // paranoid_checks==false so that corruptions cause entire commits
346
+ // to be skipped instead of propagating bad information (like overly
347
+ // large sequence numbers).
348
+ log::Reader reader(file, &reporter, true/*checksum*/,
349
+ 0/*initial_offset*/);
350
+ Log(env_, options_.info_log, "Recovering log #%llu",
351
+ (unsigned long long) log_number);
352
+
353
+ // Read all the records and add to a memtable
354
+ std::string scratch;
355
+ Slice record;
356
+ WriteBatch batch;
357
+ MemTable* mem = NULL;
358
+ while (reader.ReadRecord(&record, &scratch) &&
359
+ status.ok()) {
360
+ if (record.size() < 12) {
361
+ reporter.Corruption(
362
+ record.size(), Status::Corruption("log record too small"));
363
+ continue;
364
+ }
365
+ WriteBatchInternal::SetContents(&batch, record);
366
+
367
+ if (mem == NULL) {
368
+ mem = new MemTable(internal_comparator_);
369
+ mem->Ref();
370
+ }
371
+ status = WriteBatchInternal::InsertInto(&batch, mem);
372
+ MaybeIgnoreError(&status);
373
+ if (!status.ok()) {
374
+ break;
375
+ }
376
+ const SequenceNumber last_seq =
377
+ WriteBatchInternal::Sequence(&batch) +
378
+ WriteBatchInternal::Count(&batch) - 1;
379
+ if (last_seq > *max_sequence) {
380
+ *max_sequence = last_seq;
381
+ }
382
+
383
+ if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
384
+ status = WriteLevel0Table(mem, edit);
385
+ if (!status.ok()) {
386
+ // Reflect errors immediately so that conditions like full
387
+ // file-systems cause the DB::Open() to fail.
388
+ break;
389
+ }
390
+ mem->Unref();
391
+ mem = NULL;
392
+ }
393
+ }
394
+
395
+ if (status.ok() && mem != NULL) {
396
+ status = WriteLevel0Table(mem, edit);
397
+ // Reflect errors immediately so that conditions like full
398
+ // file-systems cause the DB::Open() to fail.
399
+ }
400
+
401
+ if (mem != NULL) mem->Unref();
402
+ delete file;
403
+ return status;
404
+ }
405
+
406
+ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit) {
407
+ mutex_.AssertHeld();
408
+ const uint64_t start_micros = env_->NowMicros();
409
+ FileMetaData meta;
410
+ meta.number = versions_->NewFileNumber();
411
+ pending_outputs_.insert(meta.number);
412
+ Iterator* iter = mem->NewIterator();
413
+ Log(env_, options_.info_log, "Level-0 table #%llu: started",
414
+ (unsigned long long) meta.number);
415
+
416
+ Status s;
417
+ {
418
+ mutex_.Unlock();
419
+ s = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta, edit);
420
+ mutex_.Lock();
421
+ }
422
+
423
+ Log(env_, options_.info_log, "Level-0 table #%llu: %lld bytes %s",
424
+ (unsigned long long) meta.number,
425
+ (unsigned long long) meta.file_size,
426
+ s.ToString().c_str());
427
+ delete iter;
428
+ pending_outputs_.erase(meta.number);
429
+
430
+ CompactionStats stats;
431
+ stats.micros = env_->NowMicros() - start_micros;
432
+ stats.bytes_written = meta.file_size;
433
+ stats_[0].Add(stats);
434
+ return s;
435
+ }
436
+
437
+ Status DBImpl::CompactMemTable() {
438
+ mutex_.AssertHeld();
439
+ assert(imm_ != NULL);
440
+ assert(compacting_);
441
+
442
+ // Save the contents of the memtable as a new Table
443
+ VersionEdit edit;
444
+ Status s = WriteLevel0Table(imm_, &edit);
445
+
446
+ // Replace immutable memtable with the generated Table
447
+ if (s.ok()) {
448
+ edit.SetPrevLogNumber(0);
449
+ s = versions_->LogAndApply(&edit);
450
+ }
451
+
452
+ if (s.ok()) {
453
+ // Commit to the new state
454
+ imm_->Unref();
455
+ imm_ = NULL;
456
+ has_imm_.Release_Store(NULL);
457
+ DeleteObsoleteFiles();
458
+ }
459
+
460
+ compacting_cv_.SignalAll(); // Wake up waiter even if there was an error
461
+ return s;
462
+ }
463
+
464
+ void DBImpl::TEST_CompactRange(
465
+ int level,
466
+ const std::string& begin,
467
+ const std::string& end) {
468
+ MutexLock l(&mutex_);
469
+ while (compacting_) {
470
+ compacting_cv_.Wait();
471
+ }
472
+ Compaction* c = versions_->CompactRange(
473
+ level,
474
+ InternalKey(begin, kMaxSequenceNumber, kValueTypeForSeek),
475
+ InternalKey(end, 0, static_cast<ValueType>(0)));
476
+
477
+ if (c != NULL) {
478
+ CompactionState* compact = new CompactionState(c);
479
+ DoCompactionWork(compact); // Ignore error in test compaction
480
+ CleanupCompaction(compact);
481
+ }
482
+
483
+ // Start any background compaction that may have been delayed by this thread
484
+ MaybeScheduleCompaction();
485
+ }
486
+
487
+ Status DBImpl::TEST_CompactMemTable() {
488
+ MutexLock l(&mutex_);
489
+ Status s = MakeRoomForWrite(true /* force compaction */);
490
+ if (s.ok()) {
491
+ // Wait until the compaction completes
492
+ while (imm_ != NULL && bg_error_.ok()) {
493
+ compacting_cv_.Wait();
494
+ }
495
+ if (imm_ != NULL) {
496
+ s = bg_error_;
497
+ }
498
+ }
499
+ return s;
500
+ }
501
+
502
+ void DBImpl::MaybeScheduleCompaction() {
503
+ mutex_.AssertHeld();
504
+ if (bg_compaction_scheduled_) {
505
+ // Already scheduled
506
+ } else if (compacting_) {
507
+ // Some other thread is running a compaction. Do not conflict with it.
508
+ } else if (shutting_down_.Acquire_Load()) {
509
+ // DB is being deleted; no more background compactions
510
+ } else if (imm_ == NULL && !versions_->NeedsCompaction()) {
511
+ // No work to be done
512
+ } else {
513
+ bg_compaction_scheduled_ = true;
514
+ env_->Schedule(&DBImpl::BGWork, this);
515
+ }
516
+ }
517
+
518
+ void DBImpl::BGWork(void* db) {
519
+ reinterpret_cast<DBImpl*>(db)->BackgroundCall();
520
+ }
521
+
522
+ void DBImpl::BackgroundCall() {
523
+ MutexLock l(&mutex_);
524
+ assert(bg_compaction_scheduled_);
525
+ if (!shutting_down_.Acquire_Load() &&
526
+ !compacting_) {
527
+ BackgroundCompaction();
528
+ }
529
+ bg_compaction_scheduled_ = false;
530
+ bg_cv_.SignalAll();
531
+
532
+ // Previous compaction may have produced too many files in a level,
533
+ // so reschedule another compaction if needed.
534
+ MaybeScheduleCompaction();
535
+ }
536
+
537
+ void DBImpl::BackgroundCompaction() {
538
+ mutex_.AssertHeld();
539
+ assert(!compacting_);
540
+
541
+ if (imm_ != NULL) {
542
+ compacting_ = true;
543
+ CompactMemTable();
544
+ compacting_ = false;
545
+ compacting_cv_.SignalAll();
546
+ return;
547
+ }
548
+
549
+ Compaction* c = versions_->PickCompaction();
550
+ if (c == NULL) {
551
+ // Nothing to do
552
+ return;
553
+ }
554
+
555
+ Status status;
556
+ if (c->IsTrivialMove()) {
557
+ // Move file to next level
558
+ assert(c->num_input_files(0) == 1);
559
+ FileMetaData* f = c->input(0, 0);
560
+ c->edit()->DeleteFile(c->level(), f->number);
561
+ c->edit()->AddFile(c->level() + 1, f->number, f->file_size,
562
+ f->smallest, f->largest);
563
+ status = versions_->LogAndApply(c->edit());
564
+ Log(env_, options_.info_log, "Moved #%lld to level-%d %lld bytes %s\n",
565
+ static_cast<unsigned long long>(f->number),
566
+ c->level() + 1,
567
+ static_cast<unsigned long long>(f->file_size),
568
+ status.ToString().c_str());
569
+ } else {
570
+ CompactionState* compact = new CompactionState(c);
571
+ status = DoCompactionWork(compact);
572
+ CleanupCompaction(compact);
573
+ }
574
+ delete c;
575
+
576
+ if (status.ok()) {
577
+ // Done
578
+ } else if (shutting_down_.Acquire_Load()) {
579
+ // Ignore compaction errors found during shutting down
580
+ } else {
581
+ Log(env_, options_.info_log,
582
+ "Compaction error: %s", status.ToString().c_str());
583
+ if (options_.paranoid_checks && bg_error_.ok()) {
584
+ bg_error_ = status;
585
+ }
586
+ }
587
+ }
588
+
589
+ void DBImpl::CleanupCompaction(CompactionState* compact) {
590
+ mutex_.AssertHeld();
591
+ if (compact->builder != NULL) {
592
+ // May happen if we get a shutdown call in the middle of compaction
593
+ compact->builder->Abandon();
594
+ delete compact->builder;
595
+ } else {
596
+ assert(compact->outfile == NULL);
597
+ }
598
+ delete compact->outfile;
599
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
600
+ const CompactionState::Output& out = compact->outputs[i];
601
+ pending_outputs_.erase(out.number);
602
+ }
603
+ delete compact;
604
+ }
605
+
606
+ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) {
607
+ assert(compact != NULL);
608
+ assert(compact->builder == NULL);
609
+ uint64_t file_number;
610
+ {
611
+ mutex_.Lock();
612
+ file_number = versions_->NewFileNumber();
613
+ pending_outputs_.insert(file_number);
614
+ CompactionState::Output out;
615
+ out.number = file_number;
616
+ out.smallest.Clear();
617
+ out.largest.Clear();
618
+ compact->outputs.push_back(out);
619
+ mutex_.Unlock();
620
+ }
621
+
622
+ // Make the output file
623
+ std::string fname = TableFileName(dbname_, file_number);
624
+ Status s = env_->NewWritableFile(fname, &compact->outfile);
625
+ if (s.ok()) {
626
+ compact->builder = new TableBuilder(options_, compact->outfile);
627
+ }
628
+ return s;
629
+ }
630
+
631
+ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
632
+ Iterator* input) {
633
+ assert(compact != NULL);
634
+ assert(compact->outfile != NULL);
635
+ assert(compact->builder != NULL);
636
+
637
+ const uint64_t output_number = compact->current_output()->number;
638
+ assert(output_number != 0);
639
+
640
+ // Check for iterator errors
641
+ Status s = input->status();
642
+ const uint64_t current_entries = compact->builder->NumEntries();
643
+ if (s.ok()) {
644
+ s = compact->builder->Finish();
645
+ } else {
646
+ compact->builder->Abandon();
647
+ }
648
+ const uint64_t current_bytes = compact->builder->FileSize();
649
+ compact->current_output()->file_size = current_bytes;
650
+ compact->total_bytes += current_bytes;
651
+ delete compact->builder;
652
+ compact->builder = NULL;
653
+
654
+ // Finish and check for file errors
655
+ if (s.ok()) {
656
+ s = compact->outfile->Sync();
657
+ }
658
+ if (s.ok()) {
659
+ s = compact->outfile->Close();
660
+ }
661
+ delete compact->outfile;
662
+ compact->outfile = NULL;
663
+
664
+ if (s.ok() && current_entries > 0) {
665
+ // Verify that the table is usable
666
+ Iterator* iter = table_cache_->NewIterator(ReadOptions(),
667
+ output_number,
668
+ current_bytes);
669
+ s = iter->status();
670
+ delete iter;
671
+ if (s.ok()) {
672
+ Log(env_, options_.info_log,
673
+ "Generated table #%llu: %lld keys, %lld bytes",
674
+ (unsigned long long) output_number,
675
+ (unsigned long long) current_entries,
676
+ (unsigned long long) current_bytes);
677
+ }
678
+ }
679
+ return s;
680
+ }
681
+
682
+
683
+ Status DBImpl::InstallCompactionResults(CompactionState* compact) {
684
+ mutex_.AssertHeld();
685
+ Log(env_, options_.info_log, "Compacted %d@%d + %d@%d files => %lld bytes",
686
+ compact->compaction->num_input_files(0),
687
+ compact->compaction->level(),
688
+ compact->compaction->num_input_files(1),
689
+ compact->compaction->level() + 1,
690
+ static_cast<long long>(compact->total_bytes));
691
+
692
+ // Add compaction outputs
693
+ compact->compaction->AddInputDeletions(compact->compaction->edit());
694
+ const int level = compact->compaction->level();
695
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
696
+ const CompactionState::Output& out = compact->outputs[i];
697
+ compact->compaction->edit()->AddFile(
698
+ level + 1,
699
+ out.number, out.file_size, out.smallest, out.largest);
700
+ pending_outputs_.erase(out.number);
701
+ }
702
+ compact->outputs.clear();
703
+
704
+ Status s = versions_->LogAndApply(compact->compaction->edit());
705
+ if (s.ok()) {
706
+ compact->compaction->ReleaseInputs();
707
+ DeleteObsoleteFiles();
708
+ } else {
709
+ // Discard any files we may have created during this failed compaction
710
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
711
+ env_->DeleteFile(TableFileName(dbname_, compact->outputs[i].number));
712
+ }
713
+ }
714
+ return s;
715
+ }
716
+
717
+ Status DBImpl::DoCompactionWork(CompactionState* compact) {
718
+ const uint64_t start_micros = env_->NowMicros();
719
+ int64_t imm_micros = 0; // Micros spent doing imm_ compactions
720
+
721
+ Log(env_, options_.info_log, "Compacting %d@%d + %d@%d files",
722
+ compact->compaction->num_input_files(0),
723
+ compact->compaction->level(),
724
+ compact->compaction->num_input_files(1),
725
+ compact->compaction->level() + 1);
726
+
727
+ assert(versions_->NumLevelFiles(compact->compaction->level()) > 0);
728
+ assert(compact->builder == NULL);
729
+ assert(compact->outfile == NULL);
730
+ if (snapshots_.empty()) {
731
+ compact->smallest_snapshot = versions_->LastSequence();
732
+ } else {
733
+ compact->smallest_snapshot = snapshots_.oldest()->number_;
734
+ }
735
+
736
+ // Release mutex while we're actually doing the compaction work
737
+ compacting_ = true;
738
+ mutex_.Unlock();
739
+
740
+ Iterator* input = versions_->MakeInputIterator(compact->compaction);
741
+ input->SeekToFirst();
742
+ Status status;
743
+ ParsedInternalKey ikey;
744
+ std::string current_user_key;
745
+ bool has_current_user_key = false;
746
+ SequenceNumber last_sequence_for_key = kMaxSequenceNumber;
747
+ for (; input->Valid() && !shutting_down_.Acquire_Load(); ) {
748
+ // Prioritize immutable compaction work
749
+ if (has_imm_.NoBarrier_Load() != NULL) {
750
+ const uint64_t imm_start = env_->NowMicros();
751
+ mutex_.Lock();
752
+ if (imm_ != NULL) {
753
+ CompactMemTable();
754
+ compacting_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary
755
+ }
756
+ mutex_.Unlock();
757
+ imm_micros += (env_->NowMicros() - imm_start);
758
+ }
759
+
760
+ Slice key = input->key();
761
+ if (compact->compaction->ShouldStopBefore(key) &&
762
+ compact->builder != NULL) {
763
+ status = FinishCompactionOutputFile(compact, input);
764
+ if (!status.ok()) {
765
+ break;
766
+ }
767
+ }
768
+
769
+ // Handle key/value, add to state, etc.
770
+ bool drop = false;
771
+ if (!ParseInternalKey(key, &ikey)) {
772
+ // Do not hide error keys
773
+ current_user_key.clear();
774
+ has_current_user_key = false;
775
+ last_sequence_for_key = kMaxSequenceNumber;
776
+ } else {
777
+ if (!has_current_user_key ||
778
+ user_comparator()->Compare(ikey.user_key,
779
+ Slice(current_user_key)) != 0) {
780
+ // First occurrence of this user key
781
+ current_user_key.assign(ikey.user_key.data(), ikey.user_key.size());
782
+ has_current_user_key = true;
783
+ last_sequence_for_key = kMaxSequenceNumber;
784
+ }
785
+
786
+ if (last_sequence_for_key <= compact->smallest_snapshot) {
787
+ // Hidden by an newer entry for same user key
788
+ drop = true; // (A)
789
+ } else if (ikey.type == kTypeDeletion &&
790
+ ikey.sequence <= compact->smallest_snapshot &&
791
+ compact->compaction->IsBaseLevelForKey(ikey.user_key)) {
792
+ // For this user key:
793
+ // (1) there is no data in higher levels
794
+ // (2) data in lower levels will have larger sequence numbers
795
+ // (3) data in layers that are being compacted here and have
796
+ // smaller sequence numbers will be dropped in the next
797
+ // few iterations of this loop (by rule (A) above).
798
+ // Therefore this deletion marker is obsolete and can be dropped.
799
+ drop = true;
800
+ }
801
+
802
+ last_sequence_for_key = ikey.sequence;
803
+ }
804
+ #if 0
805
+ Log(env_, options_.info_log,
806
+ " Compact: %s, seq %d, type: %d %d, drop: %d, is_base: %d, "
807
+ "%d smallest_snapshot: %d",
808
+ ikey.user_key.ToString().c_str(),
809
+ (int)ikey.sequence, ikey.type, kTypeValue, drop,
810
+ compact->compaction->IsBaseLevelForKey(ikey.user_key),
811
+ (int)last_sequence_for_key, (int)compact->smallest_snapshot);
812
+ #endif
813
+
814
+ if (!drop) {
815
+ // Open output file if necessary
816
+ if (compact->builder == NULL) {
817
+ status = OpenCompactionOutputFile(compact);
818
+ if (!status.ok()) {
819
+ break;
820
+ }
821
+ }
822
+ if (compact->builder->NumEntries() == 0) {
823
+ compact->current_output()->smallest.DecodeFrom(key);
824
+ }
825
+ compact->current_output()->largest.DecodeFrom(key);
826
+ compact->builder->Add(key, input->value());
827
+
828
+ // Close output file if it is big enough
829
+ if (compact->builder->FileSize() >=
830
+ compact->compaction->MaxOutputFileSize()) {
831
+ status = FinishCompactionOutputFile(compact, input);
832
+ if (!status.ok()) {
833
+ break;
834
+ }
835
+ }
836
+ }
837
+
838
+ input->Next();
839
+ }
840
+
841
+ if (status.ok() && shutting_down_.Acquire_Load()) {
842
+ status = Status::IOError("Deleting DB during compaction");
843
+ }
844
+ if (status.ok() && compact->builder != NULL) {
845
+ status = FinishCompactionOutputFile(compact, input);
846
+ }
847
+ if (status.ok()) {
848
+ status = input->status();
849
+ }
850
+ delete input;
851
+ input = NULL;
852
+
853
+ CompactionStats stats;
854
+ stats.micros = env_->NowMicros() - start_micros - imm_micros;
855
+ for (int which = 0; which < 2; which++) {
856
+ for (int i = 0; i < compact->compaction->num_input_files(which); i++) {
857
+ stats.bytes_read += compact->compaction->input(which, i)->file_size;
858
+ }
859
+ }
860
+ for (size_t i = 0; i < compact->outputs.size(); i++) {
861
+ stats.bytes_written += compact->outputs[i].file_size;
862
+ }
863
+
864
+ mutex_.Lock();
865
+ stats_[compact->compaction->level() + 1].Add(stats);
866
+
867
+ if (status.ok()) {
868
+ status = InstallCompactionResults(compact);
869
+ }
870
+ compacting_ = false;
871
+ compacting_cv_.SignalAll();
872
+ VersionSet::LevelSummaryStorage tmp;
873
+ Log(env_, options_.info_log,
874
+ "compacted to: %s", versions_->LevelSummary(&tmp));
875
+ return status;
876
+ }
877
+
878
+ namespace {
879
+ struct IterState {
880
+ port::Mutex* mu;
881
+ Version* version;
882
+ MemTable* mem;
883
+ MemTable* imm;
884
+ };
885
+
886
+ static void CleanupIteratorState(void* arg1, void* arg2) {
887
+ IterState* state = reinterpret_cast<IterState*>(arg1);
888
+ state->mu->Lock();
889
+ state->mem->Unref();
890
+ if (state->imm != NULL) state->imm->Unref();
891
+ state->version->Unref();
892
+ state->mu->Unlock();
893
+ delete state;
894
+ }
895
+ }
896
+
897
+ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
898
+ SequenceNumber* latest_snapshot) {
899
+ IterState* cleanup = new IterState;
900
+ mutex_.Lock();
901
+ *latest_snapshot = versions_->LastSequence();
902
+
903
+ // Collect together all needed child iterators
904
+ std::vector<Iterator*> list;
905
+ list.push_back(mem_->NewIterator());
906
+ mem_->Ref();
907
+ if (imm_ != NULL) {
908
+ list.push_back(imm_->NewIterator());
909
+ imm_->Ref();
910
+ }
911
+ versions_->current()->AddIterators(options, &list);
912
+ Iterator* internal_iter =
913
+ NewMergingIterator(&internal_comparator_, &list[0], list.size());
914
+ versions_->current()->Ref();
915
+
916
+ cleanup->mu = &mutex_;
917
+ cleanup->mem = mem_;
918
+ cleanup->imm = imm_;
919
+ cleanup->version = versions_->current();
920
+ internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL);
921
+
922
+ mutex_.Unlock();
923
+ return internal_iter;
924
+ }
925
+
926
+ Iterator* DBImpl::TEST_NewInternalIterator() {
927
+ SequenceNumber ignored;
928
+ return NewInternalIterator(ReadOptions(), &ignored);
929
+ }
930
+
931
+ int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
932
+ MutexLock l(&mutex_);
933
+ return versions_->MaxNextLevelOverlappingBytes();
934
+ }
935
+
936
+ Status DBImpl::Get(const ReadOptions& options,
937
+ const Slice& key,
938
+ std::string* value) {
939
+ // TODO(opt): faster implementation
940
+ Iterator* iter = NewIterator(options);
941
+ iter->Seek(key);
942
+ bool found = false;
943
+ if (iter->Valid() && user_comparator()->Compare(key, iter->key()) == 0) {
944
+ Slice v = iter->value();
945
+ value->assign(v.data(), v.size());
946
+ found = true;
947
+ }
948
+ // Non-OK iterator status trumps everything else
949
+ Status result = iter->status();
950
+ if (result.ok() && !found) {
951
+ result = Status::NotFound(Slice()); // Use an empty error message for speed
952
+ }
953
+ delete iter;
954
+ return result;
955
+ }
956
+
957
+ Iterator* DBImpl::NewIterator(const ReadOptions& options) {
958
+ SequenceNumber latest_snapshot;
959
+ Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
960
+ return NewDBIterator(
961
+ &dbname_, env_, user_comparator(), internal_iter,
962
+ (options.snapshot != NULL
963
+ ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
964
+ : latest_snapshot));
965
+ }
966
+
967
+ const Snapshot* DBImpl::GetSnapshot() {
968
+ MutexLock l(&mutex_);
969
+ return snapshots_.New(versions_->LastSequence());
970
+ }
971
+
972
+ void DBImpl::ReleaseSnapshot(const Snapshot* s) {
973
+ MutexLock l(&mutex_);
974
+ snapshots_.Delete(reinterpret_cast<const SnapshotImpl*>(s));
975
+ }
976
+
977
+ // Convenience methods
978
+ Status DBImpl::Put(const WriteOptions& o, const Slice& key, const Slice& val) {
979
+ return DB::Put(o, key, val);
980
+ }
981
+
982
+ Status DBImpl::Delete(const WriteOptions& options, const Slice& key) {
983
+ return DB::Delete(options, key);
984
+ }
985
+
986
+ Status DBImpl::Write(const WriteOptions& options, WriteBatch* updates) {
987
+ Status status;
988
+ MutexLock l(&mutex_);
989
+ status = MakeRoomForWrite(false); // May temporarily release lock and wait
990
+ uint64_t last_sequence = versions_->LastSequence();
991
+ if (status.ok()) {
992
+ WriteBatchInternal::SetSequence(updates, last_sequence + 1);
993
+ last_sequence += WriteBatchInternal::Count(updates);
994
+ versions_->SetLastSequence(last_sequence);
995
+
996
+ // Add to log and apply to memtable
997
+ status = log_->AddRecord(WriteBatchInternal::Contents(updates));
998
+ if (status.ok() && options.sync) {
999
+ status = logfile_->Sync();
1000
+ }
1001
+ if (status.ok()) {
1002
+ status = WriteBatchInternal::InsertInto(updates, mem_);
1003
+ }
1004
+ }
1005
+ if (options.post_write_snapshot != NULL) {
1006
+ *options.post_write_snapshot =
1007
+ status.ok() ? snapshots_.New(last_sequence) : NULL;
1008
+ }
1009
+ return status;
1010
+ }
1011
+
1012
+ Status DBImpl::MakeRoomForWrite(bool force) {
1013
+ mutex_.AssertHeld();
1014
+ bool allow_delay = !force;
1015
+ Status s;
1016
+ while (true) {
1017
+ if (!bg_error_.ok()) {
1018
+ // Yield previous error
1019
+ s = bg_error_;
1020
+ break;
1021
+ } else if (
1022
+ allow_delay &&
1023
+ versions_->NumLevelFiles(0) >= config::kL0_SlowdownWritesTrigger) {
1024
+ // We are getting close to hitting a hard limit on the number of
1025
+ // L0 files. Rather than delaying a single write by several
1026
+ // seconds when we hit the hard limit, start delaying each
1027
+ // individual write by 1ms to reduce latency variance. Also,
1028
+ // this delay hands over some CPU to the compaction thread in
1029
+ // case it is sharing the same core as the writer.
1030
+ mutex_.Unlock();
1031
+ env_->SleepForMicroseconds(1000);
1032
+ allow_delay = false; // Do not delay a single write more than once
1033
+ mutex_.Lock();
1034
+ } else if (!force &&
1035
+ (mem_->ApproximateMemoryUsage() <= options_.write_buffer_size)) {
1036
+ // There is room in current memtable
1037
+ break;
1038
+ } else if (imm_ != NULL) {
1039
+ // We have filled up the current memtable, but the previous
1040
+ // one is still being compacted, so we wait.
1041
+ compacting_cv_.Wait();
1042
+ } else if (versions_->NumLevelFiles(0) >= config::kL0_StopWritesTrigger) {
1043
+ // There are too many level-0 files.
1044
+ compacting_cv_.Wait();
1045
+ } else {
1046
+ // Attempt to switch to a new memtable and trigger compaction of old
1047
+ assert(versions_->PrevLogNumber() == 0);
1048
+ uint64_t new_log_number = versions_->NewFileNumber();
1049
+ WritableFile* lfile = NULL;
1050
+ s = env_->NewWritableFile(LogFileName(dbname_, new_log_number), &lfile);
1051
+ if (!s.ok()) {
1052
+ break;
1053
+ }
1054
+ VersionEdit edit;
1055
+ edit.SetPrevLogNumber(versions_->LogNumber());
1056
+ edit.SetLogNumber(new_log_number);
1057
+ s = versions_->LogAndApply(&edit);
1058
+ if (!s.ok()) {
1059
+ delete lfile;
1060
+ env_->DeleteFile(LogFileName(dbname_, new_log_number));
1061
+ break;
1062
+ }
1063
+ delete log_;
1064
+ delete logfile_;
1065
+ logfile_ = lfile;
1066
+ log_ = new log::Writer(lfile);
1067
+ imm_ = mem_;
1068
+ has_imm_.Release_Store(imm_);
1069
+ mem_ = new MemTable(internal_comparator_);
1070
+ mem_->Ref();
1071
+ force = false; // Do not force another compaction if have room
1072
+ MaybeScheduleCompaction();
1073
+ }
1074
+ }
1075
+ return s;
1076
+ }
1077
+
1078
+ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
1079
+ value->clear();
1080
+
1081
+ MutexLock l(&mutex_);
1082
+ Slice in = property;
1083
+ Slice prefix("leveldb.");
1084
+ if (!in.starts_with(prefix)) return false;
1085
+ in.remove_prefix(prefix.size());
1086
+
1087
+ if (in.starts_with("num-files-at-level")) {
1088
+ in.remove_prefix(strlen("num-files-at-level"));
1089
+ uint64_t level;
1090
+ bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
1091
+ if (!ok || level < 0 || level >= config::kNumLevels) {
1092
+ return false;
1093
+ } else {
1094
+ char buf[100];
1095
+ snprintf(buf, sizeof(buf), "%d",
1096
+ versions_->NumLevelFiles(static_cast<int>(level)));
1097
+ *value = buf;
1098
+ return true;
1099
+ }
1100
+ } else if (in == "stats") {
1101
+ char buf[200];
1102
+ snprintf(buf, sizeof(buf),
1103
+ " Compactions\n"
1104
+ "Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n"
1105
+ "--------------------------------------------------\n"
1106
+ );
1107
+ value->append(buf);
1108
+ for (int level = 0; level < config::kNumLevels; level++) {
1109
+ int files = versions_->NumLevelFiles(level);
1110
+ if (stats_[level].micros > 0 || files > 0) {
1111
+ snprintf(
1112
+ buf, sizeof(buf),
1113
+ "%3d %8d %8.0f %9.0f %8.0f %9.0f\n",
1114
+ level,
1115
+ files,
1116
+ versions_->NumLevelBytes(level) / 1048576.0,
1117
+ stats_[level].micros / 1e6,
1118
+ stats_[level].bytes_read / 1048576.0,
1119
+ stats_[level].bytes_written / 1048576.0);
1120
+ value->append(buf);
1121
+ }
1122
+ }
1123
+ return true;
1124
+ }
1125
+
1126
+ return false;
1127
+ }
1128
+
1129
+ void DBImpl::GetApproximateSizes(
1130
+ const Range* range, int n,
1131
+ uint64_t* sizes) {
1132
+ // TODO(opt): better implementation
1133
+ Version* v;
1134
+ {
1135
+ MutexLock l(&mutex_);
1136
+ versions_->current()->Ref();
1137
+ v = versions_->current();
1138
+ }
1139
+
1140
+ for (int i = 0; i < n; i++) {
1141
+ // Convert user_key into a corresponding internal key.
1142
+ InternalKey k1(range[i].start, kMaxSequenceNumber, kValueTypeForSeek);
1143
+ InternalKey k2(range[i].limit, kMaxSequenceNumber, kValueTypeForSeek);
1144
+ uint64_t start = versions_->ApproximateOffsetOf(v, k1);
1145
+ uint64_t limit = versions_->ApproximateOffsetOf(v, k2);
1146
+ sizes[i] = (limit >= start ? limit - start : 0);
1147
+ }
1148
+
1149
+ {
1150
+ MutexLock l(&mutex_);
1151
+ v->Unref();
1152
+ }
1153
+ }
1154
+
1155
+ // Default implementations of convenience methods that subclasses of DB
1156
+ // can call if they wish
1157
+ Status DB::Put(const WriteOptions& opt, const Slice& key, const Slice& value) {
1158
+ WriteBatch batch;
1159
+ batch.Put(key, value);
1160
+ return Write(opt, &batch);
1161
+ }
1162
+
1163
+ Status DB::Delete(const WriteOptions& opt, const Slice& key) {
1164
+ WriteBatch batch;
1165
+ batch.Delete(key);
1166
+ return Write(opt, &batch);
1167
+ }
1168
+
1169
+ DB::~DB() { }
1170
+
1171
+ Status DB::Open(const Options& options, const std::string& dbname,
1172
+ DB** dbptr) {
1173
+ *dbptr = NULL;
1174
+
1175
+ DBImpl* impl = new DBImpl(options, dbname);
1176
+ impl->mutex_.Lock();
1177
+ VersionEdit edit;
1178
+ Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists
1179
+ if (s.ok()) {
1180
+ uint64_t new_log_number = impl->versions_->NewFileNumber();
1181
+ WritableFile* lfile;
1182
+ s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
1183
+ &lfile);
1184
+ if (s.ok()) {
1185
+ edit.SetLogNumber(new_log_number);
1186
+ impl->logfile_ = lfile;
1187
+ impl->log_ = new log::Writer(lfile);
1188
+ s = impl->versions_->LogAndApply(&edit);
1189
+ }
1190
+ if (s.ok()) {
1191
+ impl->DeleteObsoleteFiles();
1192
+ impl->MaybeScheduleCompaction();
1193
+ }
1194
+ }
1195
+ impl->mutex_.Unlock();
1196
+ if (s.ok()) {
1197
+ *dbptr = impl;
1198
+ } else {
1199
+ delete impl;
1200
+ }
1201
+ return s;
1202
+ }
1203
+
1204
+ Snapshot::~Snapshot() {
1205
+ }
1206
+
1207
+ Status DestroyDB(const std::string& dbname, const Options& options) {
1208
+ Env* env = options.env;
1209
+ std::vector<std::string> filenames;
1210
+ // Ignore error in case directory does not exist
1211
+ env->GetChildren(dbname, &filenames);
1212
+ if (filenames.empty()) {
1213
+ return Status::OK();
1214
+ }
1215
+
1216
+ FileLock* lock;
1217
+ Status result = env->LockFile(LockFileName(dbname), &lock);
1218
+ if (result.ok()) {
1219
+ uint64_t number;
1220
+ FileType type;
1221
+ for (size_t i = 0; i < filenames.size(); i++) {
1222
+ if (ParseFileName(filenames[i], &number, &type)) {
1223
+ Status del = env->DeleteFile(dbname + "/" + filenames[i]);
1224
+ if (result.ok() && !del.ok()) {
1225
+ result = del;
1226
+ }
1227
+ }
1228
+ }
1229
+ env->UnlockFile(lock); // Ignore error since state is already gone
1230
+ env->DeleteFile(LockFileName(dbname));
1231
+ env->DeleteDir(dbname); // Ignore error in case dir contains other files
1232
+ }
1233
+ return result;
1234
+ }
1235
+
1236
+ }