leveldb-ruby 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +17 -0
- data/ext/leveldb/extconf.rb +10 -0
- data/ext/leveldb/leveldb.cc +181 -0
- data/leveldb/Makefile +172 -0
- data/leveldb/db/builder.cc +90 -0
- data/leveldb/db/builder.h +36 -0
- data/leveldb/db/corruption_test.cc +354 -0
- data/leveldb/db/db_bench.cc +677 -0
- data/leveldb/db/db_impl.cc +1236 -0
- data/leveldb/db/db_impl.h +180 -0
- data/leveldb/db/db_iter.cc +298 -0
- data/leveldb/db/db_iter.h +26 -0
- data/leveldb/db/db_test.cc +1192 -0
- data/leveldb/db/dbformat.cc +87 -0
- data/leveldb/db/dbformat.h +165 -0
- data/leveldb/db/dbformat_test.cc +112 -0
- data/leveldb/db/filename.cc +135 -0
- data/leveldb/db/filename.h +80 -0
- data/leveldb/db/filename_test.cc +122 -0
- data/leveldb/db/log_format.h +35 -0
- data/leveldb/db/log_reader.cc +254 -0
- data/leveldb/db/log_reader.h +108 -0
- data/leveldb/db/log_test.cc +500 -0
- data/leveldb/db/log_writer.cc +103 -0
- data/leveldb/db/log_writer.h +48 -0
- data/leveldb/db/memtable.cc +108 -0
- data/leveldb/db/memtable.h +85 -0
- data/leveldb/db/repair.cc +384 -0
- data/leveldb/db/skiplist.h +378 -0
- data/leveldb/db/skiplist_test.cc +378 -0
- data/leveldb/db/snapshot.h +66 -0
- data/leveldb/db/table_cache.cc +95 -0
- data/leveldb/db/table_cache.h +50 -0
- data/leveldb/db/version_edit.cc +268 -0
- data/leveldb/db/version_edit.h +106 -0
- data/leveldb/db/version_edit_test.cc +46 -0
- data/leveldb/db/version_set.cc +1060 -0
- data/leveldb/db/version_set.h +306 -0
- data/leveldb/db/write_batch.cc +138 -0
- data/leveldb/db/write_batch_internal.h +45 -0
- data/leveldb/db/write_batch_test.cc +89 -0
- data/leveldb/include/leveldb/cache.h +99 -0
- data/leveldb/include/leveldb/comparator.h +63 -0
- data/leveldb/include/leveldb/db.h +148 -0
- data/leveldb/include/leveldb/env.h +302 -0
- data/leveldb/include/leveldb/iterator.h +100 -0
- data/leveldb/include/leveldb/options.h +198 -0
- data/leveldb/include/leveldb/slice.h +109 -0
- data/leveldb/include/leveldb/status.h +100 -0
- data/leveldb/include/leveldb/table.h +70 -0
- data/leveldb/include/leveldb/table_builder.h +91 -0
- data/leveldb/include/leveldb/write_batch.h +64 -0
- data/leveldb/port/port.h +23 -0
- data/leveldb/port/port_android.cc +64 -0
- data/leveldb/port/port_android.h +150 -0
- data/leveldb/port/port_chromium.cc +80 -0
- data/leveldb/port/port_chromium.h +97 -0
- data/leveldb/port/port_example.h +115 -0
- data/leveldb/port/port_osx.cc +50 -0
- data/leveldb/port/port_osx.h +125 -0
- data/leveldb/port/port_posix.cc +50 -0
- data/leveldb/port/port_posix.h +94 -0
- data/leveldb/port/sha1_portable.cc +298 -0
- data/leveldb/port/sha1_portable.h +25 -0
- data/leveldb/port/sha1_test.cc +39 -0
- data/leveldb/port/win/stdint.h +24 -0
- data/leveldb/table/block.cc +263 -0
- data/leveldb/table/block.h +43 -0
- data/leveldb/table/block_builder.cc +109 -0
- data/leveldb/table/block_builder.h +57 -0
- data/leveldb/table/format.cc +131 -0
- data/leveldb/table/format.h +103 -0
- data/leveldb/table/iterator.cc +67 -0
- data/leveldb/table/iterator_wrapper.h +63 -0
- data/leveldb/table/merger.cc +197 -0
- data/leveldb/table/merger.h +26 -0
- data/leveldb/table/table.cc +175 -0
- data/leveldb/table/table_builder.cc +227 -0
- data/leveldb/table/table_test.cc +845 -0
- data/leveldb/table/two_level_iterator.cc +182 -0
- data/leveldb/table/two_level_iterator.h +34 -0
- data/leveldb/util/arena.cc +68 -0
- data/leveldb/util/arena.h +68 -0
- data/leveldb/util/arena_test.cc +68 -0
- data/leveldb/util/cache.cc +255 -0
- data/leveldb/util/cache_test.cc +169 -0
- data/leveldb/util/coding.cc +194 -0
- data/leveldb/util/coding.h +104 -0
- data/leveldb/util/coding_test.cc +173 -0
- data/leveldb/util/comparator.cc +72 -0
- data/leveldb/util/crc32c.cc +332 -0
- data/leveldb/util/crc32c.h +45 -0
- data/leveldb/util/crc32c_test.cc +72 -0
- data/leveldb/util/env.cc +77 -0
- data/leveldb/util/env_chromium.cc +612 -0
- data/leveldb/util/env_posix.cc +606 -0
- data/leveldb/util/env_test.cc +102 -0
- data/leveldb/util/hash.cc +45 -0
- data/leveldb/util/hash.h +19 -0
- data/leveldb/util/histogram.cc +128 -0
- data/leveldb/util/histogram.h +41 -0
- data/leveldb/util/logging.cc +81 -0
- data/leveldb/util/logging.h +47 -0
- data/leveldb/util/mutexlock.h +39 -0
- data/leveldb/util/options.cc +28 -0
- data/leveldb/util/random.h +59 -0
- data/leveldb/util/status.cc +75 -0
- data/leveldb/util/testharness.cc +65 -0
- data/leveldb/util/testharness.h +129 -0
- data/leveldb/util/testutil.cc +51 -0
- data/leveldb/util/testutil.h +53 -0
- data/lib/leveldb.rb +36 -0
- metadata +183 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
4
|
+
|
|
5
|
+
#include "db/log_writer.h"
|
|
6
|
+
|
|
7
|
+
#include <stdint.h>
|
|
8
|
+
#include "leveldb/env.h"
|
|
9
|
+
#include "util/coding.h"
|
|
10
|
+
#include "util/crc32c.h"
|
|
11
|
+
|
|
12
|
+
namespace leveldb {
|
|
13
|
+
namespace log {
|
|
14
|
+
|
|
15
|
+
Writer::Writer(WritableFile* dest)
|
|
16
|
+
: dest_(dest),
|
|
17
|
+
block_offset_(0) {
|
|
18
|
+
for (int i = 0; i <= kMaxRecordType; i++) {
|
|
19
|
+
char t = static_cast<char>(i);
|
|
20
|
+
type_crc_[i] = crc32c::Value(&t, 1);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
Writer::~Writer() {
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
Status Writer::AddRecord(const Slice& slice) {
|
|
28
|
+
const char* ptr = slice.data();
|
|
29
|
+
size_t left = slice.size();
|
|
30
|
+
|
|
31
|
+
// Fragment the record if necessary and emit it. Note that if slice
|
|
32
|
+
// is empty, we still want to iterate once to emit a single
|
|
33
|
+
// zero-length record
|
|
34
|
+
Status s;
|
|
35
|
+
bool begin = true;
|
|
36
|
+
do {
|
|
37
|
+
const int leftover = kBlockSize - block_offset_;
|
|
38
|
+
assert(leftover >= 0);
|
|
39
|
+
if (leftover < kHeaderSize) {
|
|
40
|
+
// Switch to a new block
|
|
41
|
+
if (leftover > 0) {
|
|
42
|
+
// Fill the trailer (literal below relies on kHeaderSize being 7)
|
|
43
|
+
assert(kHeaderSize == 7);
|
|
44
|
+
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
|
|
45
|
+
}
|
|
46
|
+
block_offset_ = 0;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Invariant: we never leave < kHeaderSize bytes in a block.
|
|
50
|
+
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
|
|
51
|
+
|
|
52
|
+
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
|
|
53
|
+
const size_t fragment_length = (left < avail) ? left : avail;
|
|
54
|
+
|
|
55
|
+
RecordType type;
|
|
56
|
+
const bool end = (left == fragment_length);
|
|
57
|
+
if (begin && end) {
|
|
58
|
+
type = kFullType;
|
|
59
|
+
} else if (begin) {
|
|
60
|
+
type = kFirstType;
|
|
61
|
+
} else if (end) {
|
|
62
|
+
type = kLastType;
|
|
63
|
+
} else {
|
|
64
|
+
type = kMiddleType;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
s = EmitPhysicalRecord(type, ptr, fragment_length);
|
|
68
|
+
ptr += fragment_length;
|
|
69
|
+
left -= fragment_length;
|
|
70
|
+
begin = false;
|
|
71
|
+
} while (s.ok() && left > 0);
|
|
72
|
+
return s;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
|
|
76
|
+
assert(n <= 0xffff); // Must fit in two bytes
|
|
77
|
+
assert(block_offset_ + kHeaderSize + n <= kBlockSize);
|
|
78
|
+
|
|
79
|
+
// Format the header
|
|
80
|
+
char buf[kHeaderSize];
|
|
81
|
+
buf[4] = static_cast<char>(n & 0xff);
|
|
82
|
+
buf[5] = static_cast<char>(n >> 8);
|
|
83
|
+
buf[6] = static_cast<char>(t);
|
|
84
|
+
|
|
85
|
+
// Compute the crc of the record type and the payload.
|
|
86
|
+
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
|
|
87
|
+
crc = crc32c::Mask(crc); // Adjust for storage
|
|
88
|
+
EncodeFixed32(buf, crc);
|
|
89
|
+
|
|
90
|
+
// Write the header and the payload
|
|
91
|
+
Status s = dest_->Append(Slice(buf, kHeaderSize));
|
|
92
|
+
if (s.ok()) {
|
|
93
|
+
s = dest_->Append(Slice(ptr, n));
|
|
94
|
+
if (s.ok()) {
|
|
95
|
+
s = dest_->Flush();
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
block_offset_ += kHeaderSize + n;
|
|
99
|
+
return s;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
}
|
|
103
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
4
|
+
|
|
5
|
+
#ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
|
6
|
+
#define STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
|
7
|
+
|
|
8
|
+
#include <stdint.h>
|
|
9
|
+
#include "db/log_format.h"
|
|
10
|
+
#include "leveldb/slice.h"
|
|
11
|
+
#include "leveldb/status.h"
|
|
12
|
+
|
|
13
|
+
namespace leveldb {
|
|
14
|
+
|
|
15
|
+
class WritableFile;
|
|
16
|
+
|
|
17
|
+
namespace log {
|
|
18
|
+
|
|
19
|
+
class Writer {
|
|
20
|
+
public:
|
|
21
|
+
// Create a writer that will append data to "*dest".
|
|
22
|
+
// "*dest" must be initially empty.
|
|
23
|
+
// "*dest" must remain live while this Writer is in use.
|
|
24
|
+
explicit Writer(WritableFile* dest);
|
|
25
|
+
~Writer();
|
|
26
|
+
|
|
27
|
+
Status AddRecord(const Slice& slice);
|
|
28
|
+
|
|
29
|
+
private:
|
|
30
|
+
WritableFile* dest_;
|
|
31
|
+
int block_offset_; // Current offset in block
|
|
32
|
+
|
|
33
|
+
// crc32c values for all supported record types. These are
|
|
34
|
+
// pre-computed to reduce the overhead of computing the crc of the
|
|
35
|
+
// record type stored in the header.
|
|
36
|
+
uint32_t type_crc_[kMaxRecordType + 1];
|
|
37
|
+
|
|
38
|
+
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
|
|
39
|
+
|
|
40
|
+
// No copying allowed
|
|
41
|
+
Writer(const Writer&);
|
|
42
|
+
void operator=(const Writer&);
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
#endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
4
|
+
|
|
5
|
+
#include "db/memtable.h"
|
|
6
|
+
#include "db/dbformat.h"
|
|
7
|
+
#include "leveldb/comparator.h"
|
|
8
|
+
#include "leveldb/env.h"
|
|
9
|
+
#include "leveldb/iterator.h"
|
|
10
|
+
#include "util/coding.h"
|
|
11
|
+
|
|
12
|
+
namespace leveldb {
|
|
13
|
+
|
|
14
|
+
static Slice GetLengthPrefixedSlice(const char* data) {
|
|
15
|
+
uint32_t len;
|
|
16
|
+
const char* p = data;
|
|
17
|
+
p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted
|
|
18
|
+
return Slice(p, len);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
MemTable::MemTable(const InternalKeyComparator& cmp)
|
|
22
|
+
: comparator_(cmp),
|
|
23
|
+
refs_(0),
|
|
24
|
+
table_(comparator_, &arena_) {
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
MemTable::~MemTable() {
|
|
28
|
+
assert(refs_ == 0);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }
|
|
32
|
+
|
|
33
|
+
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
|
|
34
|
+
const {
|
|
35
|
+
// Internal keys are encoded as length-prefixed strings.
|
|
36
|
+
Slice a = GetLengthPrefixedSlice(aptr);
|
|
37
|
+
Slice b = GetLengthPrefixedSlice(bptr);
|
|
38
|
+
return comparator.Compare(a, b);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Encode a suitable internal key target for "target" and return it.
|
|
42
|
+
// Uses *scratch as scratch space, and the returned pointer will point
|
|
43
|
+
// into this scratch space.
|
|
44
|
+
static const char* EncodeKey(std::string* scratch, const Slice& target) {
|
|
45
|
+
scratch->clear();
|
|
46
|
+
PutVarint32(scratch, target.size());
|
|
47
|
+
scratch->append(target.data(), target.size());
|
|
48
|
+
return scratch->data();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
class MemTableIterator: public Iterator {
|
|
52
|
+
public:
|
|
53
|
+
explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
|
|
54
|
+
|
|
55
|
+
virtual bool Valid() const { return iter_.Valid(); }
|
|
56
|
+
virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
|
|
57
|
+
virtual void SeekToFirst() { iter_.SeekToFirst(); }
|
|
58
|
+
virtual void SeekToLast() { iter_.SeekToLast(); }
|
|
59
|
+
virtual void Next() { iter_.Next(); }
|
|
60
|
+
virtual void Prev() { iter_.Prev(); }
|
|
61
|
+
virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
|
|
62
|
+
virtual Slice value() const {
|
|
63
|
+
Slice key_slice = GetLengthPrefixedSlice(iter_.key());
|
|
64
|
+
return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
virtual Status status() const { return Status::OK(); }
|
|
68
|
+
|
|
69
|
+
private:
|
|
70
|
+
MemTable::Table::Iterator iter_;
|
|
71
|
+
std::string tmp_; // For passing to EncodeKey
|
|
72
|
+
|
|
73
|
+
// No copying allowed
|
|
74
|
+
MemTableIterator(const MemTableIterator&);
|
|
75
|
+
void operator=(const MemTableIterator&);
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
Iterator* MemTable::NewIterator() {
|
|
79
|
+
return new MemTableIterator(&table_);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
void MemTable::Add(SequenceNumber s, ValueType type,
|
|
83
|
+
const Slice& key,
|
|
84
|
+
const Slice& value) {
|
|
85
|
+
// Format of an entry is concatenation of:
|
|
86
|
+
// key_size : varint32 of internal_key.size()
|
|
87
|
+
// key bytes : char[internal_key.size()]
|
|
88
|
+
// value_size : varint32 of value.size()
|
|
89
|
+
// value bytes : char[value.size()]
|
|
90
|
+
size_t key_size = key.size();
|
|
91
|
+
size_t val_size = value.size();
|
|
92
|
+
size_t internal_key_size = key_size + 8;
|
|
93
|
+
const size_t encoded_len =
|
|
94
|
+
VarintLength(internal_key_size) + internal_key_size +
|
|
95
|
+
VarintLength(val_size) + val_size;
|
|
96
|
+
char* buf = arena_.Allocate(encoded_len);
|
|
97
|
+
char* p = EncodeVarint32(buf, internal_key_size);
|
|
98
|
+
memcpy(p, key.data(), key_size);
|
|
99
|
+
p += key_size;
|
|
100
|
+
EncodeFixed64(p, (s << 8) | type);
|
|
101
|
+
p += 8;
|
|
102
|
+
p = EncodeVarint32(p, val_size);
|
|
103
|
+
memcpy(p, value.data(), val_size);
|
|
104
|
+
assert((p + val_size) - buf == encoded_len);
|
|
105
|
+
table_.Insert(buf);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
4
|
+
|
|
5
|
+
#ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_
|
|
6
|
+
#define STORAGE_LEVELDB_DB_MEMTABLE_H_
|
|
7
|
+
|
|
8
|
+
#include <string>
|
|
9
|
+
#include "leveldb/db.h"
|
|
10
|
+
#include "db/dbformat.h"
|
|
11
|
+
#include "db/skiplist.h"
|
|
12
|
+
#include "util/arena.h"
|
|
13
|
+
|
|
14
|
+
namespace leveldb {
|
|
15
|
+
|
|
16
|
+
class InternalKeyComparator;
|
|
17
|
+
class Mutex;
|
|
18
|
+
class MemTableIterator;
|
|
19
|
+
|
|
20
|
+
class MemTable {
|
|
21
|
+
public:
|
|
22
|
+
// MemTables are reference counted. The initial reference count
|
|
23
|
+
// is zero and the caller must call Ref() at least once.
|
|
24
|
+
explicit MemTable(const InternalKeyComparator& comparator);
|
|
25
|
+
|
|
26
|
+
// Increase reference count.
|
|
27
|
+
void Ref() { ++refs_; }
|
|
28
|
+
|
|
29
|
+
// Drop reference count. Delete if no more references exist.
|
|
30
|
+
void Unref() {
|
|
31
|
+
--refs_;
|
|
32
|
+
assert(refs_ >= 0);
|
|
33
|
+
if (refs_ <= 0) {
|
|
34
|
+
delete this;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Returns an estimate of the number of bytes of data in use by this
|
|
39
|
+
// data structure.
|
|
40
|
+
//
|
|
41
|
+
// REQUIRES: external synchronization to prevent simultaneous
|
|
42
|
+
// operations on the same MemTable.
|
|
43
|
+
size_t ApproximateMemoryUsage();
|
|
44
|
+
|
|
45
|
+
// Return an iterator that yields the contents of the memtable.
|
|
46
|
+
//
|
|
47
|
+
// The caller must ensure that the underlying MemTable remains live
|
|
48
|
+
// while the returned iterator is live. The keys returned by this
|
|
49
|
+
// iterator are internal keys encoded by AppendInternalKey in the
|
|
50
|
+
// db/format.{h,cc} module.
|
|
51
|
+
Iterator* NewIterator();
|
|
52
|
+
|
|
53
|
+
// Add an entry into memtable that maps key to value at the
|
|
54
|
+
// specified sequence number and with the specified type.
|
|
55
|
+
// Typically value will be empty if type==kTypeDeletion.
|
|
56
|
+
void Add(SequenceNumber seq, ValueType type,
|
|
57
|
+
const Slice& key,
|
|
58
|
+
const Slice& value);
|
|
59
|
+
|
|
60
|
+
private:
|
|
61
|
+
~MemTable(); // Private since only Unref() should be used to delete it
|
|
62
|
+
|
|
63
|
+
struct KeyComparator {
|
|
64
|
+
const InternalKeyComparator comparator;
|
|
65
|
+
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
|
|
66
|
+
int operator()(const char* a, const char* b) const;
|
|
67
|
+
};
|
|
68
|
+
friend class MemTableIterator;
|
|
69
|
+
friend class MemTableBackwardIterator;
|
|
70
|
+
|
|
71
|
+
typedef SkipList<const char*, KeyComparator> Table;
|
|
72
|
+
|
|
73
|
+
KeyComparator comparator_;
|
|
74
|
+
int refs_;
|
|
75
|
+
Arena arena_;
|
|
76
|
+
Table table_;
|
|
77
|
+
|
|
78
|
+
// No copying allowed
|
|
79
|
+
MemTable(const MemTable&);
|
|
80
|
+
void operator=(const MemTable&);
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
#endif // STORAGE_LEVELDB_DB_MEMTABLE_H_
|
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
|
4
|
+
//
|
|
5
|
+
// We recover the contents of the descriptor from the other files we find.
|
|
6
|
+
// (1) Any log files are first converted to tables
|
|
7
|
+
// (2) We scan every table to compute
|
|
8
|
+
// (a) smallest/largest for the table
|
|
9
|
+
// (b) largest sequence number in the table
|
|
10
|
+
// (3) We generate descriptor contents:
|
|
11
|
+
// - log number is set to zero
|
|
12
|
+
// - next-file-number is set to 1 + largest file number we found
|
|
13
|
+
// - last-sequence-number is set to largest sequence# found across
|
|
14
|
+
// all tables (see 2c)
|
|
15
|
+
// - compaction pointers are cleared
|
|
16
|
+
// - every table file is added at level 0
|
|
17
|
+
//
|
|
18
|
+
// Possible optimization 1:
|
|
19
|
+
// (a) Compute total size and use to pick appropriate max-level M
|
|
20
|
+
// (b) Sort tables by largest sequence# in the table
|
|
21
|
+
// (c) For each table: if it overlaps earlier table, place in level-0,
|
|
22
|
+
// else place in level-M.
|
|
23
|
+
// Possible optimization 2:
|
|
24
|
+
// Store per-table metadata (smallest, largest, largest-seq#, ...)
|
|
25
|
+
// in the table's meta section to speed up ScanTable.
|
|
26
|
+
|
|
27
|
+
#include "db/builder.h"
|
|
28
|
+
#include "db/db_impl.h"
|
|
29
|
+
#include "db/dbformat.h"
|
|
30
|
+
#include "db/filename.h"
|
|
31
|
+
#include "db/log_reader.h"
|
|
32
|
+
#include "db/log_writer.h"
|
|
33
|
+
#include "db/memtable.h"
|
|
34
|
+
#include "db/table_cache.h"
|
|
35
|
+
#include "db/version_edit.h"
|
|
36
|
+
#include "db/write_batch_internal.h"
|
|
37
|
+
#include "leveldb/comparator.h"
|
|
38
|
+
#include "leveldb/db.h"
|
|
39
|
+
#include "leveldb/env.h"
|
|
40
|
+
|
|
41
|
+
namespace leveldb {
|
|
42
|
+
|
|
43
|
+
namespace {
|
|
44
|
+
|
|
45
|
+
class Repairer {
|
|
46
|
+
public:
|
|
47
|
+
Repairer(const std::string& dbname, const Options& options)
|
|
48
|
+
: dbname_(dbname),
|
|
49
|
+
env_(options.env),
|
|
50
|
+
icmp_(options.comparator),
|
|
51
|
+
options_(SanitizeOptions(dbname, &icmp_, options)),
|
|
52
|
+
owns_info_log_(options_.info_log != options.info_log),
|
|
53
|
+
next_file_number_(1) {
|
|
54
|
+
// TableCache can be small since we expect each table to be opened once.
|
|
55
|
+
table_cache_ = new TableCache(dbname_, &options_, 10);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
~Repairer() {
|
|
59
|
+
delete table_cache_;
|
|
60
|
+
if (owns_info_log_) {
|
|
61
|
+
delete options_.info_log;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
Status Run() {
|
|
66
|
+
Status status = FindFiles();
|
|
67
|
+
if (status.ok()) {
|
|
68
|
+
ConvertLogFilesToTables();
|
|
69
|
+
ExtractMetaData();
|
|
70
|
+
status = WriteDescriptor();
|
|
71
|
+
}
|
|
72
|
+
if (status.ok()) {
|
|
73
|
+
unsigned long long bytes = 0;
|
|
74
|
+
for (size_t i = 0; i < tables_.size(); i++) {
|
|
75
|
+
bytes += tables_[i].meta.file_size;
|
|
76
|
+
}
|
|
77
|
+
Log(env_, options_.info_log,
|
|
78
|
+
"**** Repaired leveldb %s; "
|
|
79
|
+
"recovered %d files; %llu bytes. "
|
|
80
|
+
"Some data may have been lost. "
|
|
81
|
+
"****",
|
|
82
|
+
dbname_.c_str(),
|
|
83
|
+
static_cast<int>(tables_.size()),
|
|
84
|
+
bytes);
|
|
85
|
+
}
|
|
86
|
+
return status;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private:
|
|
90
|
+
struct TableInfo {
|
|
91
|
+
FileMetaData meta;
|
|
92
|
+
SequenceNumber max_sequence;
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
std::string const dbname_;
|
|
96
|
+
Env* const env_;
|
|
97
|
+
InternalKeyComparator const icmp_;
|
|
98
|
+
Options const options_;
|
|
99
|
+
bool owns_info_log_;
|
|
100
|
+
TableCache* table_cache_;
|
|
101
|
+
VersionEdit edit_;
|
|
102
|
+
|
|
103
|
+
std::vector<std::string> manifests_;
|
|
104
|
+
std::vector<uint64_t> table_numbers_;
|
|
105
|
+
std::vector<uint64_t> logs_;
|
|
106
|
+
std::vector<TableInfo> tables_;
|
|
107
|
+
uint64_t next_file_number_;
|
|
108
|
+
|
|
109
|
+
Status FindFiles() {
|
|
110
|
+
std::vector<std::string> filenames;
|
|
111
|
+
Status status = env_->GetChildren(dbname_, &filenames);
|
|
112
|
+
if (!status.ok()) {
|
|
113
|
+
return status;
|
|
114
|
+
}
|
|
115
|
+
if (filenames.empty()) {
|
|
116
|
+
return Status::IOError(dbname_, "repair found no files");
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
uint64_t number;
|
|
120
|
+
FileType type;
|
|
121
|
+
for (size_t i = 0; i < filenames.size(); i++) {
|
|
122
|
+
if (ParseFileName(filenames[i], &number, &type)) {
|
|
123
|
+
if (type == kDescriptorFile) {
|
|
124
|
+
manifests_.push_back(filenames[i]);
|
|
125
|
+
} else {
|
|
126
|
+
if (number + 1 > next_file_number_) {
|
|
127
|
+
next_file_number_ = number + 1;
|
|
128
|
+
}
|
|
129
|
+
if (type == kLogFile) {
|
|
130
|
+
logs_.push_back(number);
|
|
131
|
+
} else if (type == kTableFile) {
|
|
132
|
+
table_numbers_.push_back(number);
|
|
133
|
+
} else {
|
|
134
|
+
// Ignore other files
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return status;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
void ConvertLogFilesToTables() {
|
|
143
|
+
for (size_t i = 0; i < logs_.size(); i++) {
|
|
144
|
+
std::string logname = LogFileName(dbname_, logs_[i]);
|
|
145
|
+
Status status = ConvertLogToTable(logs_[i]);
|
|
146
|
+
if (!status.ok()) {
|
|
147
|
+
Log(env_, options_.info_log, "Log #%llu: ignoring conversion error: %s",
|
|
148
|
+
(unsigned long long) logs_[i],
|
|
149
|
+
status.ToString().c_str());
|
|
150
|
+
}
|
|
151
|
+
ArchiveFile(logname);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
Status ConvertLogToTable(uint64_t log) {
|
|
156
|
+
struct LogReporter : public log::Reader::Reporter {
|
|
157
|
+
Env* env;
|
|
158
|
+
WritableFile* info_log;
|
|
159
|
+
uint64_t lognum;
|
|
160
|
+
virtual void Corruption(size_t bytes, const Status& s) {
|
|
161
|
+
// We print error messages for corruption, but continue repairing.
|
|
162
|
+
Log(env, info_log, "Log #%llu: dropping %d bytes; %s",
|
|
163
|
+
(unsigned long long) lognum,
|
|
164
|
+
static_cast<int>(bytes),
|
|
165
|
+
s.ToString().c_str());
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
// Open the log file
|
|
170
|
+
std::string logname = LogFileName(dbname_, log);
|
|
171
|
+
SequentialFile* lfile;
|
|
172
|
+
Status status = env_->NewSequentialFile(logname, &lfile);
|
|
173
|
+
if (!status.ok()) {
|
|
174
|
+
return status;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Create the log reader.
|
|
178
|
+
LogReporter reporter;
|
|
179
|
+
reporter.env = env_;
|
|
180
|
+
reporter.info_log = options_.info_log;
|
|
181
|
+
reporter.lognum = log;
|
|
182
|
+
// We intentially make log::Reader do checksumming so that
|
|
183
|
+
// corruptions cause entire commits to be skipped instead of
|
|
184
|
+
// propagating bad information (like overly large sequence
|
|
185
|
+
// numbers).
|
|
186
|
+
log::Reader reader(lfile, &reporter, false/*do not checksum*/,
|
|
187
|
+
0/*initial_offset*/);
|
|
188
|
+
|
|
189
|
+
// Read all the records and add to a memtable
|
|
190
|
+
std::string scratch;
|
|
191
|
+
Slice record;
|
|
192
|
+
WriteBatch batch;
|
|
193
|
+
MemTable* mem = new MemTable(icmp_);
|
|
194
|
+
mem->Ref();
|
|
195
|
+
int counter = 0;
|
|
196
|
+
while (reader.ReadRecord(&record, &scratch)) {
|
|
197
|
+
if (record.size() < 12) {
|
|
198
|
+
reporter.Corruption(
|
|
199
|
+
record.size(), Status::Corruption("log record too small"));
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
WriteBatchInternal::SetContents(&batch, record);
|
|
203
|
+
status = WriteBatchInternal::InsertInto(&batch, mem);
|
|
204
|
+
if (status.ok()) {
|
|
205
|
+
counter += WriteBatchInternal::Count(&batch);
|
|
206
|
+
} else {
|
|
207
|
+
Log(env_, options_.info_log, "Log #%llu: ignoring %s",
|
|
208
|
+
(unsigned long long) log,
|
|
209
|
+
status.ToString().c_str());
|
|
210
|
+
status = Status::OK(); // Keep going with rest of file
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
delete lfile;
|
|
214
|
+
|
|
215
|
+
// We ignore any version edits generated by the conversion to a Table
|
|
216
|
+
// since ExtractMetaData() will also generate edits.
|
|
217
|
+
VersionEdit skipped;
|
|
218
|
+
FileMetaData meta;
|
|
219
|
+
meta.number = next_file_number_++;
|
|
220
|
+
Iterator* iter = mem->NewIterator();
|
|
221
|
+
status = BuildTable(dbname_, env_, options_, table_cache_, iter,
|
|
222
|
+
&meta, &skipped);
|
|
223
|
+
delete iter;
|
|
224
|
+
mem->Unref();
|
|
225
|
+
mem = NULL;
|
|
226
|
+
if (status.ok()) {
|
|
227
|
+
if (meta.file_size > 0) {
|
|
228
|
+
table_numbers_.push_back(meta.number);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
Log(env_, options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
|
|
232
|
+
(unsigned long long) log,
|
|
233
|
+
counter,
|
|
234
|
+
(unsigned long long) meta.number,
|
|
235
|
+
status.ToString().c_str());
|
|
236
|
+
return status;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
void ExtractMetaData() {
|
|
240
|
+
std::vector<TableInfo> kept;
|
|
241
|
+
for (size_t i = 0; i < table_numbers_.size(); i++) {
|
|
242
|
+
TableInfo t;
|
|
243
|
+
t.meta.number = table_numbers_[i];
|
|
244
|
+
Status status = ScanTable(&t);
|
|
245
|
+
if (!status.ok()) {
|
|
246
|
+
std::string fname = TableFileName(dbname_, table_numbers_[i]);
|
|
247
|
+
Log(env_, options_.info_log, "Table #%llu: ignoring %s",
|
|
248
|
+
(unsigned long long) table_numbers_[i],
|
|
249
|
+
status.ToString().c_str());
|
|
250
|
+
ArchiveFile(fname);
|
|
251
|
+
} else {
|
|
252
|
+
tables_.push_back(t);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
Status ScanTable(TableInfo* t) {
|
|
258
|
+
std::string fname = TableFileName(dbname_, t->meta.number);
|
|
259
|
+
int counter = 0;
|
|
260
|
+
Status status = env_->GetFileSize(fname, &t->meta.file_size);
|
|
261
|
+
if (status.ok()) {
|
|
262
|
+
Iterator* iter = table_cache_->NewIterator(
|
|
263
|
+
ReadOptions(), t->meta.number, t->meta.file_size);
|
|
264
|
+
bool empty = true;
|
|
265
|
+
ParsedInternalKey parsed;
|
|
266
|
+
t->max_sequence = 0;
|
|
267
|
+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
|
268
|
+
Slice key = iter->key();
|
|
269
|
+
if (!ParseInternalKey(key, &parsed)) {
|
|
270
|
+
Log(env_, options_.info_log, "Table #%llu: unparsable key %s",
|
|
271
|
+
(unsigned long long) t->meta.number,
|
|
272
|
+
EscapeString(key).c_str());
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
counter++;
|
|
277
|
+
if (empty) {
|
|
278
|
+
empty = false;
|
|
279
|
+
t->meta.smallest.DecodeFrom(key);
|
|
280
|
+
}
|
|
281
|
+
t->meta.largest.DecodeFrom(key);
|
|
282
|
+
if (parsed.sequence > t->max_sequence) {
|
|
283
|
+
t->max_sequence = parsed.sequence;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
if (!iter->status().ok()) {
|
|
287
|
+
status = iter->status();
|
|
288
|
+
}
|
|
289
|
+
delete iter;
|
|
290
|
+
}
|
|
291
|
+
Log(env_, options_.info_log, "Table #%llu: %d entries %s",
|
|
292
|
+
(unsigned long long) t->meta.number,
|
|
293
|
+
counter,
|
|
294
|
+
status.ToString().c_str());
|
|
295
|
+
return status;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
Status WriteDescriptor() {
|
|
299
|
+
std::string tmp = TempFileName(dbname_, 1);
|
|
300
|
+
WritableFile* file;
|
|
301
|
+
Status status = env_->NewWritableFile(tmp, &file);
|
|
302
|
+
if (!status.ok()) {
|
|
303
|
+
return status;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
SequenceNumber max_sequence = 0;
|
|
307
|
+
for (size_t i = 0; i < tables_.size(); i++) {
|
|
308
|
+
if (max_sequence < tables_[i].max_sequence) {
|
|
309
|
+
max_sequence = tables_[i].max_sequence;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
edit_.SetComparatorName(icmp_.user_comparator()->Name());
|
|
314
|
+
edit_.SetLogNumber(0);
|
|
315
|
+
edit_.SetNextFile(next_file_number_);
|
|
316
|
+
edit_.SetLastSequence(max_sequence);
|
|
317
|
+
|
|
318
|
+
for (size_t i = 0; i < tables_.size(); i++) {
|
|
319
|
+
// TODO(opt): separate out into multiple levels
|
|
320
|
+
const TableInfo& t = tables_[i];
|
|
321
|
+
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
|
322
|
+
t.meta.smallest, t.meta.largest);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
|
|
326
|
+
{
|
|
327
|
+
log::Writer log(file);
|
|
328
|
+
std::string record;
|
|
329
|
+
edit_.EncodeTo(&record);
|
|
330
|
+
status = log.AddRecord(record);
|
|
331
|
+
}
|
|
332
|
+
if (status.ok()) {
|
|
333
|
+
status = file->Close();
|
|
334
|
+
}
|
|
335
|
+
delete file;
|
|
336
|
+
file = NULL;
|
|
337
|
+
|
|
338
|
+
if (!status.ok()) {
|
|
339
|
+
env_->DeleteFile(tmp);
|
|
340
|
+
} else {
|
|
341
|
+
// Discard older manifests
|
|
342
|
+
for (size_t i = 0; i < manifests_.size(); i++) {
|
|
343
|
+
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Install new manifest
|
|
347
|
+
status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1));
|
|
348
|
+
if (status.ok()) {
|
|
349
|
+
status = SetCurrentFile(env_, dbname_, 1);
|
|
350
|
+
} else {
|
|
351
|
+
env_->DeleteFile(tmp);
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
return status;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
void ArchiveFile(const std::string& fname) {
|
|
358
|
+
// Move into another directory. E.g., for
|
|
359
|
+
// dir/foo
|
|
360
|
+
// rename to
|
|
361
|
+
// dir/lost/foo
|
|
362
|
+
const char* slash = strrchr(fname.c_str(), '/');
|
|
363
|
+
std::string new_dir;
|
|
364
|
+
if (slash != NULL) {
|
|
365
|
+
new_dir.assign(fname.data(), slash - fname.data());
|
|
366
|
+
}
|
|
367
|
+
new_dir.append("/lost");
|
|
368
|
+
env_->CreateDir(new_dir); // Ignore error
|
|
369
|
+
std::string new_file = new_dir;
|
|
370
|
+
new_file.append("/");
|
|
371
|
+
new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
|
|
372
|
+
Status s = env_->RenameFile(fname, new_file);
|
|
373
|
+
Log(env_, options_.info_log, "Archiving %s: %s\n",
|
|
374
|
+
fname.c_str(), s.ToString().c_str());
|
|
375
|
+
}
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
Status RepairDB(const std::string& dbname, const Options& options) {
|
|
380
|
+
Repairer repairer(dbname, options);
|
|
381
|
+
return repairer.Run();
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
}
|