filiptepper-leveldb-ruby 0.14
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +24 -0
- data/README +72 -0
- data/ext/leveldb/extconf.rb +14 -0
- data/ext/leveldb/leveldb.cc +530 -0
- data/ext/leveldb/platform.rb +83 -0
- data/leveldb/Makefile +191 -0
- data/leveldb/build_detect_platform +160 -0
- data/leveldb/db/builder.cc +88 -0
- data/leveldb/db/builder.h +34 -0
- data/leveldb/db/c.cc +581 -0
- data/leveldb/db/corruption_test.cc +359 -0
- data/leveldb/db/db_bench.cc +970 -0
- data/leveldb/db/db_impl.cc +1448 -0
- data/leveldb/db/db_impl.h +194 -0
- data/leveldb/db/db_iter.cc +299 -0
- data/leveldb/db/db_iter.h +26 -0
- data/leveldb/db/db_test.cc +1901 -0
- data/leveldb/db/dbformat.cc +140 -0
- data/leveldb/db/dbformat.h +227 -0
- data/leveldb/db/dbformat_test.cc +112 -0
- data/leveldb/db/filename.cc +139 -0
- data/leveldb/db/filename.h +80 -0
- data/leveldb/db/filename_test.cc +122 -0
- data/leveldb/db/log_format.h +35 -0
- data/leveldb/db/log_reader.cc +259 -0
- data/leveldb/db/log_reader.h +108 -0
- data/leveldb/db/log_test.cc +500 -0
- data/leveldb/db/log_writer.cc +103 -0
- data/leveldb/db/log_writer.h +48 -0
- data/leveldb/db/memtable.cc +145 -0
- data/leveldb/db/memtable.h +91 -0
- data/leveldb/db/repair.cc +389 -0
- data/leveldb/db/skiplist.h +379 -0
- data/leveldb/db/skiplist_test.cc +378 -0
- data/leveldb/db/snapshot.h +66 -0
- data/leveldb/db/table_cache.cc +121 -0
- data/leveldb/db/table_cache.h +61 -0
- data/leveldb/db/version_edit.cc +266 -0
- data/leveldb/db/version_edit.h +107 -0
- data/leveldb/db/version_edit_test.cc +46 -0
- data/leveldb/db/version_set.cc +1402 -0
- data/leveldb/db/version_set.h +370 -0
- data/leveldb/db/version_set_test.cc +179 -0
- data/leveldb/db/write_batch.cc +147 -0
- data/leveldb/db/write_batch_internal.h +49 -0
- data/leveldb/db/write_batch_test.cc +120 -0
- data/leveldb/helpers/memenv/memenv.cc +374 -0
- data/leveldb/helpers/memenv/memenv.h +20 -0
- data/leveldb/helpers/memenv/memenv_test.cc +232 -0
- data/leveldb/include/leveldb/c.h +275 -0
- data/leveldb/include/leveldb/cache.h +99 -0
- data/leveldb/include/leveldb/comparator.h +63 -0
- data/leveldb/include/leveldb/db.h +161 -0
- data/leveldb/include/leveldb/env.h +323 -0
- data/leveldb/include/leveldb/filter_policy.h +70 -0
- data/leveldb/include/leveldb/iterator.h +100 -0
- data/leveldb/include/leveldb/options.h +195 -0
- data/leveldb/include/leveldb/slice.h +109 -0
- data/leveldb/include/leveldb/status.h +106 -0
- data/leveldb/include/leveldb/table.h +85 -0
- data/leveldb/include/leveldb/table_builder.h +92 -0
- data/leveldb/include/leveldb/write_batch.h +64 -0
- data/leveldb/port/atomic_pointer.h +144 -0
- data/leveldb/port/port.h +21 -0
- data/leveldb/port/port_android.cc +64 -0
- data/leveldb/port/port_android.h +159 -0
- data/leveldb/port/port_example.h +125 -0
- data/leveldb/port/port_posix.cc +50 -0
- data/leveldb/port/port_posix.h +129 -0
- data/leveldb/port/win/stdint.h +24 -0
- data/leveldb/table/block.cc +267 -0
- data/leveldb/table/block.h +44 -0
- data/leveldb/table/block_builder.cc +109 -0
- data/leveldb/table/block_builder.h +57 -0
- data/leveldb/table/filter_block.cc +111 -0
- data/leveldb/table/filter_block.h +68 -0
- data/leveldb/table/filter_block_test.cc +128 -0
- data/leveldb/table/format.cc +145 -0
- data/leveldb/table/format.h +108 -0
- data/leveldb/table/iterator.cc +67 -0
- data/leveldb/table/iterator_wrapper.h +63 -0
- data/leveldb/table/merger.cc +197 -0
- data/leveldb/table/merger.h +26 -0
- data/leveldb/table/table.cc +276 -0
- data/leveldb/table/table_builder.cc +270 -0
- data/leveldb/table/table_test.cc +838 -0
- data/leveldb/table/two_level_iterator.cc +182 -0
- data/leveldb/table/two_level_iterator.h +34 -0
- data/leveldb/util/arena.cc +68 -0
- data/leveldb/util/arena.h +68 -0
- data/leveldb/util/arena_test.cc +68 -0
- data/leveldb/util/bloom.cc +95 -0
- data/leveldb/util/bloom_test.cc +159 -0
- data/leveldb/util/cache.cc +328 -0
- data/leveldb/util/cache_test.cc +186 -0
- data/leveldb/util/coding.cc +194 -0
- data/leveldb/util/coding.h +104 -0
- data/leveldb/util/coding_test.cc +173 -0
- data/leveldb/util/comparator.cc +76 -0
- data/leveldb/util/crc32c.cc +332 -0
- data/leveldb/util/crc32c.h +45 -0
- data/leveldb/util/crc32c_test.cc +72 -0
- data/leveldb/util/env.cc +96 -0
- data/leveldb/util/env_posix.cc +609 -0
- data/leveldb/util/env_test.cc +104 -0
- data/leveldb/util/filter_policy.cc +11 -0
- data/leveldb/util/hash.cc +45 -0
- data/leveldb/util/hash.h +19 -0
- data/leveldb/util/histogram.cc +139 -0
- data/leveldb/util/histogram.h +42 -0
- data/leveldb/util/logging.cc +81 -0
- data/leveldb/util/logging.h +47 -0
- data/leveldb/util/mutexlock.h +39 -0
- data/leveldb/util/options.cc +29 -0
- data/leveldb/util/posix_logger.h +98 -0
- data/leveldb/util/random.h +59 -0
- data/leveldb/util/status.cc +75 -0
- data/leveldb/util/testharness.cc +77 -0
- data/leveldb/util/testharness.h +138 -0
- data/leveldb/util/testutil.cc +51 -0
- data/leveldb/util/testutil.h +53 -0
- data/lib/leveldb.rb +76 -0
- metadata +175 -0
@@ -0,0 +1,103 @@
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
4
|
+
|
5
|
+
#include "db/log_writer.h"
|
6
|
+
|
7
|
+
#include <stdint.h>
|
8
|
+
#include "leveldb/env.h"
|
9
|
+
#include "util/coding.h"
|
10
|
+
#include "util/crc32c.h"
|
11
|
+
|
12
|
+
namespace leveldb {
|
13
|
+
namespace log {
|
14
|
+
|
15
|
+
Writer::Writer(WritableFile* dest)
|
16
|
+
: dest_(dest),
|
17
|
+
block_offset_(0) {
|
18
|
+
for (int i = 0; i <= kMaxRecordType; i++) {
|
19
|
+
char t = static_cast<char>(i);
|
20
|
+
type_crc_[i] = crc32c::Value(&t, 1);
|
21
|
+
}
|
22
|
+
}
|
23
|
+
|
24
|
+
Writer::~Writer() {
|
25
|
+
}
|
26
|
+
|
27
|
+
Status Writer::AddRecord(const Slice& slice) {
|
28
|
+
const char* ptr = slice.data();
|
29
|
+
size_t left = slice.size();
|
30
|
+
|
31
|
+
// Fragment the record if necessary and emit it. Note that if slice
|
32
|
+
// is empty, we still want to iterate once to emit a single
|
33
|
+
// zero-length record
|
34
|
+
Status s;
|
35
|
+
bool begin = true;
|
36
|
+
do {
|
37
|
+
const int leftover = kBlockSize - block_offset_;
|
38
|
+
assert(leftover >= 0);
|
39
|
+
if (leftover < kHeaderSize) {
|
40
|
+
// Switch to a new block
|
41
|
+
if (leftover > 0) {
|
42
|
+
// Fill the trailer (literal below relies on kHeaderSize being 7)
|
43
|
+
assert(kHeaderSize == 7);
|
44
|
+
dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
|
45
|
+
}
|
46
|
+
block_offset_ = 0;
|
47
|
+
}
|
48
|
+
|
49
|
+
// Invariant: we never leave < kHeaderSize bytes in a block.
|
50
|
+
assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
|
51
|
+
|
52
|
+
const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
|
53
|
+
const size_t fragment_length = (left < avail) ? left : avail;
|
54
|
+
|
55
|
+
RecordType type;
|
56
|
+
const bool end = (left == fragment_length);
|
57
|
+
if (begin && end) {
|
58
|
+
type = kFullType;
|
59
|
+
} else if (begin) {
|
60
|
+
type = kFirstType;
|
61
|
+
} else if (end) {
|
62
|
+
type = kLastType;
|
63
|
+
} else {
|
64
|
+
type = kMiddleType;
|
65
|
+
}
|
66
|
+
|
67
|
+
s = EmitPhysicalRecord(type, ptr, fragment_length);
|
68
|
+
ptr += fragment_length;
|
69
|
+
left -= fragment_length;
|
70
|
+
begin = false;
|
71
|
+
} while (s.ok() && left > 0);
|
72
|
+
return s;
|
73
|
+
}
|
74
|
+
|
75
|
+
Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
|
76
|
+
assert(n <= 0xffff); // Must fit in two bytes
|
77
|
+
assert(block_offset_ + kHeaderSize + n <= kBlockSize);
|
78
|
+
|
79
|
+
// Format the header
|
80
|
+
char buf[kHeaderSize];
|
81
|
+
buf[4] = static_cast<char>(n & 0xff);
|
82
|
+
buf[5] = static_cast<char>(n >> 8);
|
83
|
+
buf[6] = static_cast<char>(t);
|
84
|
+
|
85
|
+
// Compute the crc of the record type and the payload.
|
86
|
+
uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
|
87
|
+
crc = crc32c::Mask(crc); // Adjust for storage
|
88
|
+
EncodeFixed32(buf, crc);
|
89
|
+
|
90
|
+
// Write the header and the payload
|
91
|
+
Status s = dest_->Append(Slice(buf, kHeaderSize));
|
92
|
+
if (s.ok()) {
|
93
|
+
s = dest_->Append(Slice(ptr, n));
|
94
|
+
if (s.ok()) {
|
95
|
+
s = dest_->Flush();
|
96
|
+
}
|
97
|
+
}
|
98
|
+
block_offset_ += kHeaderSize + n;
|
99
|
+
return s;
|
100
|
+
}
|
101
|
+
|
102
|
+
} // namespace log
|
103
|
+
} // namespace leveldb
|
@@ -0,0 +1,48 @@
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
4
|
+
|
5
|
+
#ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
6
|
+
#define STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
7
|
+
|
8
|
+
#include <stdint.h>
|
9
|
+
#include "db/log_format.h"
|
10
|
+
#include "leveldb/slice.h"
|
11
|
+
#include "leveldb/status.h"
|
12
|
+
|
13
|
+
namespace leveldb {
|
14
|
+
|
15
|
+
class WritableFile;
|
16
|
+
|
17
|
+
namespace log {
|
18
|
+
|
19
|
+
class Writer {
|
20
|
+
public:
|
21
|
+
// Create a writer that will append data to "*dest".
|
22
|
+
// "*dest" must be initially empty.
|
23
|
+
// "*dest" must remain live while this Writer is in use.
|
24
|
+
explicit Writer(WritableFile* dest);
|
25
|
+
~Writer();
|
26
|
+
|
27
|
+
Status AddRecord(const Slice& slice);
|
28
|
+
|
29
|
+
private:
|
30
|
+
WritableFile* dest_;
|
31
|
+
int block_offset_; // Current offset in block
|
32
|
+
|
33
|
+
// crc32c values for all supported record types. These are
|
34
|
+
// pre-computed to reduce the overhead of computing the crc of the
|
35
|
+
// record type stored in the header.
|
36
|
+
uint32_t type_crc_[kMaxRecordType + 1];
|
37
|
+
|
38
|
+
Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
|
39
|
+
|
40
|
+
// No copying allowed
|
41
|
+
Writer(const Writer&);
|
42
|
+
void operator=(const Writer&);
|
43
|
+
};
|
44
|
+
|
45
|
+
} // namespace log
|
46
|
+
} // namespace leveldb
|
47
|
+
|
48
|
+
#endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_
|
@@ -0,0 +1,145 @@
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
4
|
+
|
5
|
+
#include "db/memtable.h"
|
6
|
+
#include "db/dbformat.h"
|
7
|
+
#include "leveldb/comparator.h"
|
8
|
+
#include "leveldb/env.h"
|
9
|
+
#include "leveldb/iterator.h"
|
10
|
+
#include "util/coding.h"
|
11
|
+
|
12
|
+
namespace leveldb {
|
13
|
+
|
14
|
+
static Slice GetLengthPrefixedSlice(const char* data) {
|
15
|
+
uint32_t len;
|
16
|
+
const char* p = data;
|
17
|
+
p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted
|
18
|
+
return Slice(p, len);
|
19
|
+
}
|
20
|
+
|
21
|
+
MemTable::MemTable(const InternalKeyComparator& cmp)
|
22
|
+
: comparator_(cmp),
|
23
|
+
refs_(0),
|
24
|
+
table_(comparator_, &arena_) {
|
25
|
+
}
|
26
|
+
|
27
|
+
MemTable::~MemTable() {
|
28
|
+
assert(refs_ == 0);
|
29
|
+
}
|
30
|
+
|
31
|
+
size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }
|
32
|
+
|
33
|
+
int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
|
34
|
+
const {
|
35
|
+
// Internal keys are encoded as length-prefixed strings.
|
36
|
+
Slice a = GetLengthPrefixedSlice(aptr);
|
37
|
+
Slice b = GetLengthPrefixedSlice(bptr);
|
38
|
+
return comparator.Compare(a, b);
|
39
|
+
}
|
40
|
+
|
41
|
+
// Encode a suitable internal key target for "target" and return it.
|
42
|
+
// Uses *scratch as scratch space, and the returned pointer will point
|
43
|
+
// into this scratch space.
|
44
|
+
static const char* EncodeKey(std::string* scratch, const Slice& target) {
|
45
|
+
scratch->clear();
|
46
|
+
PutVarint32(scratch, target.size());
|
47
|
+
scratch->append(target.data(), target.size());
|
48
|
+
return scratch->data();
|
49
|
+
}
|
50
|
+
|
51
|
+
class MemTableIterator: public Iterator {
|
52
|
+
public:
|
53
|
+
explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
|
54
|
+
|
55
|
+
virtual bool Valid() const { return iter_.Valid(); }
|
56
|
+
virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
|
57
|
+
virtual void SeekToFirst() { iter_.SeekToFirst(); }
|
58
|
+
virtual void SeekToLast() { iter_.SeekToLast(); }
|
59
|
+
virtual void Next() { iter_.Next(); }
|
60
|
+
virtual void Prev() { iter_.Prev(); }
|
61
|
+
virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
|
62
|
+
virtual Slice value() const {
|
63
|
+
Slice key_slice = GetLengthPrefixedSlice(iter_.key());
|
64
|
+
return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
|
65
|
+
}
|
66
|
+
|
67
|
+
virtual Status status() const { return Status::OK(); }
|
68
|
+
|
69
|
+
private:
|
70
|
+
MemTable::Table::Iterator iter_;
|
71
|
+
std::string tmp_; // For passing to EncodeKey
|
72
|
+
|
73
|
+
// No copying allowed
|
74
|
+
MemTableIterator(const MemTableIterator&);
|
75
|
+
void operator=(const MemTableIterator&);
|
76
|
+
};
|
77
|
+
|
78
|
+
Iterator* MemTable::NewIterator() {
|
79
|
+
return new MemTableIterator(&table_);
|
80
|
+
}
|
81
|
+
|
82
|
+
void MemTable::Add(SequenceNumber s, ValueType type,
|
83
|
+
const Slice& key,
|
84
|
+
const Slice& value) {
|
85
|
+
// Format of an entry is concatenation of:
|
86
|
+
// key_size : varint32 of internal_key.size()
|
87
|
+
// key bytes : char[internal_key.size()]
|
88
|
+
// value_size : varint32 of value.size()
|
89
|
+
// value bytes : char[value.size()]
|
90
|
+
size_t key_size = key.size();
|
91
|
+
size_t val_size = value.size();
|
92
|
+
size_t internal_key_size = key_size + 8;
|
93
|
+
const size_t encoded_len =
|
94
|
+
VarintLength(internal_key_size) + internal_key_size +
|
95
|
+
VarintLength(val_size) + val_size;
|
96
|
+
char* buf = arena_.Allocate(encoded_len);
|
97
|
+
char* p = EncodeVarint32(buf, internal_key_size);
|
98
|
+
memcpy(p, key.data(), key_size);
|
99
|
+
p += key_size;
|
100
|
+
EncodeFixed64(p, (s << 8) | type);
|
101
|
+
p += 8;
|
102
|
+
p = EncodeVarint32(p, val_size);
|
103
|
+
memcpy(p, value.data(), val_size);
|
104
|
+
assert((p + val_size) - buf == encoded_len);
|
105
|
+
table_.Insert(buf);
|
106
|
+
}
|
107
|
+
|
108
|
+
bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
|
109
|
+
Slice memkey = key.memtable_key();
|
110
|
+
Table::Iterator iter(&table_);
|
111
|
+
iter.Seek(memkey.data());
|
112
|
+
if (iter.Valid()) {
|
113
|
+
// entry format is:
|
114
|
+
// klength varint32
|
115
|
+
// userkey char[klength]
|
116
|
+
// tag uint64
|
117
|
+
// vlength varint32
|
118
|
+
// value char[vlength]
|
119
|
+
// Check that it belongs to same user key. We do not check the
|
120
|
+
// sequence number since the Seek() call above should have skipped
|
121
|
+
// all entries with overly large sequence numbers.
|
122
|
+
const char* entry = iter.key();
|
123
|
+
uint32_t key_length;
|
124
|
+
const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
|
125
|
+
if (comparator_.comparator.user_comparator()->Compare(
|
126
|
+
Slice(key_ptr, key_length - 8),
|
127
|
+
key.user_key()) == 0) {
|
128
|
+
// Correct user key
|
129
|
+
const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
|
130
|
+
switch (static_cast<ValueType>(tag & 0xff)) {
|
131
|
+
case kTypeValue: {
|
132
|
+
Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
|
133
|
+
value->assign(v.data(), v.size());
|
134
|
+
return true;
|
135
|
+
}
|
136
|
+
case kTypeDeletion:
|
137
|
+
*s = Status::NotFound(Slice());
|
138
|
+
return true;
|
139
|
+
}
|
140
|
+
}
|
141
|
+
}
|
142
|
+
return false;
|
143
|
+
}
|
144
|
+
|
145
|
+
} // namespace leveldb
|
@@ -0,0 +1,91 @@
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
4
|
+
|
5
|
+
#ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_
|
6
|
+
#define STORAGE_LEVELDB_DB_MEMTABLE_H_
|
7
|
+
|
8
|
+
#include <string>
|
9
|
+
#include "leveldb/db.h"
|
10
|
+
#include "db/dbformat.h"
|
11
|
+
#include "db/skiplist.h"
|
12
|
+
#include "util/arena.h"
|
13
|
+
|
14
|
+
namespace leveldb {
|
15
|
+
|
16
|
+
class InternalKeyComparator;
|
17
|
+
class Mutex;
|
18
|
+
class MemTableIterator;
|
19
|
+
|
20
|
+
class MemTable {
|
21
|
+
public:
|
22
|
+
// MemTables are reference counted. The initial reference count
|
23
|
+
// is zero and the caller must call Ref() at least once.
|
24
|
+
explicit MemTable(const InternalKeyComparator& comparator);
|
25
|
+
|
26
|
+
// Increase reference count.
|
27
|
+
void Ref() { ++refs_; }
|
28
|
+
|
29
|
+
// Drop reference count. Delete if no more references exist.
|
30
|
+
void Unref() {
|
31
|
+
--refs_;
|
32
|
+
assert(refs_ >= 0);
|
33
|
+
if (refs_ <= 0) {
|
34
|
+
delete this;
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
// Returns an estimate of the number of bytes of data in use by this
|
39
|
+
// data structure.
|
40
|
+
//
|
41
|
+
// REQUIRES: external synchronization to prevent simultaneous
|
42
|
+
// operations on the same MemTable.
|
43
|
+
size_t ApproximateMemoryUsage();
|
44
|
+
|
45
|
+
// Return an iterator that yields the contents of the memtable.
|
46
|
+
//
|
47
|
+
// The caller must ensure that the underlying MemTable remains live
|
48
|
+
// while the returned iterator is live. The keys returned by this
|
49
|
+
// iterator are internal keys encoded by AppendInternalKey in the
|
50
|
+
// db/format.{h,cc} module.
|
51
|
+
Iterator* NewIterator();
|
52
|
+
|
53
|
+
// Add an entry into memtable that maps key to value at the
|
54
|
+
// specified sequence number and with the specified type.
|
55
|
+
// Typically value will be empty if type==kTypeDeletion.
|
56
|
+
void Add(SequenceNumber seq, ValueType type,
|
57
|
+
const Slice& key,
|
58
|
+
const Slice& value);
|
59
|
+
|
60
|
+
// If memtable contains a value for key, store it in *value and return true.
|
61
|
+
// If memtable contains a deletion for key, store a NotFound() error
|
62
|
+
// in *status and return true.
|
63
|
+
// Else, return false.
|
64
|
+
bool Get(const LookupKey& key, std::string* value, Status* s);
|
65
|
+
|
66
|
+
private:
|
67
|
+
~MemTable(); // Private since only Unref() should be used to delete it
|
68
|
+
|
69
|
+
struct KeyComparator {
|
70
|
+
const InternalKeyComparator comparator;
|
71
|
+
explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
|
72
|
+
int operator()(const char* a, const char* b) const;
|
73
|
+
};
|
74
|
+
friend class MemTableIterator;
|
75
|
+
friend class MemTableBackwardIterator;
|
76
|
+
|
77
|
+
typedef SkipList<const char*, KeyComparator> Table;
|
78
|
+
|
79
|
+
KeyComparator comparator_;
|
80
|
+
int refs_;
|
81
|
+
Arena arena_;
|
82
|
+
Table table_;
|
83
|
+
|
84
|
+
// No copying allowed
|
85
|
+
MemTable(const MemTable&);
|
86
|
+
void operator=(const MemTable&);
|
87
|
+
};
|
88
|
+
|
89
|
+
} // namespace leveldb
|
90
|
+
|
91
|
+
#endif // STORAGE_LEVELDB_DB_MEMTABLE_H_
|
@@ -0,0 +1,389 @@
|
|
1
|
+
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
|
2
|
+
// Use of this source code is governed by a BSD-style license that can be
|
3
|
+
// found in the LICENSE file. See the AUTHORS file for names of contributors.
|
4
|
+
//
|
5
|
+
// We recover the contents of the descriptor from the other files we find.
|
6
|
+
// (1) Any log files are first converted to tables
|
7
|
+
// (2) We scan every table to compute
|
8
|
+
// (a) smallest/largest for the table
|
9
|
+
// (b) largest sequence number in the table
|
10
|
+
// (3) We generate descriptor contents:
|
11
|
+
// - log number is set to zero
|
12
|
+
// - next-file-number is set to 1 + largest file number we found
|
13
|
+
// - last-sequence-number is set to largest sequence# found across
|
14
|
+
// all tables (see 2c)
|
15
|
+
// - compaction pointers are cleared
|
16
|
+
// - every table file is added at level 0
|
17
|
+
//
|
18
|
+
// Possible optimization 1:
|
19
|
+
// (a) Compute total size and use to pick appropriate max-level M
|
20
|
+
// (b) Sort tables by largest sequence# in the table
|
21
|
+
// (c) For each table: if it overlaps earlier table, place in level-0,
|
22
|
+
// else place in level-M.
|
23
|
+
// Possible optimization 2:
|
24
|
+
// Store per-table metadata (smallest, largest, largest-seq#, ...)
|
25
|
+
// in the table's meta section to speed up ScanTable.
|
26
|
+
|
27
|
+
#include "db/builder.h"
|
28
|
+
#include "db/db_impl.h"
|
29
|
+
#include "db/dbformat.h"
|
30
|
+
#include "db/filename.h"
|
31
|
+
#include "db/log_reader.h"
|
32
|
+
#include "db/log_writer.h"
|
33
|
+
#include "db/memtable.h"
|
34
|
+
#include "db/table_cache.h"
|
35
|
+
#include "db/version_edit.h"
|
36
|
+
#include "db/write_batch_internal.h"
|
37
|
+
#include "leveldb/comparator.h"
|
38
|
+
#include "leveldb/db.h"
|
39
|
+
#include "leveldb/env.h"
|
40
|
+
|
41
|
+
namespace leveldb {
|
42
|
+
|
43
|
+
namespace {
|
44
|
+
|
45
|
+
class Repairer {
|
46
|
+
public:
|
47
|
+
Repairer(const std::string& dbname, const Options& options)
|
48
|
+
: dbname_(dbname),
|
49
|
+
env_(options.env),
|
50
|
+
icmp_(options.comparator),
|
51
|
+
ipolicy_(options.filter_policy),
|
52
|
+
options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
|
53
|
+
owns_info_log_(options_.info_log != options.info_log),
|
54
|
+
owns_cache_(options_.block_cache != options.block_cache),
|
55
|
+
next_file_number_(1) {
|
56
|
+
// TableCache can be small since we expect each table to be opened once.
|
57
|
+
table_cache_ = new TableCache(dbname_, &options_, 10);
|
58
|
+
}
|
59
|
+
|
60
|
+
~Repairer() {
|
61
|
+
delete table_cache_;
|
62
|
+
if (owns_info_log_) {
|
63
|
+
delete options_.info_log;
|
64
|
+
}
|
65
|
+
if (owns_cache_) {
|
66
|
+
delete options_.block_cache;
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
Status Run() {
|
71
|
+
Status status = FindFiles();
|
72
|
+
if (status.ok()) {
|
73
|
+
ConvertLogFilesToTables();
|
74
|
+
ExtractMetaData();
|
75
|
+
status = WriteDescriptor();
|
76
|
+
}
|
77
|
+
if (status.ok()) {
|
78
|
+
unsigned long long bytes = 0;
|
79
|
+
for (size_t i = 0; i < tables_.size(); i++) {
|
80
|
+
bytes += tables_[i].meta.file_size;
|
81
|
+
}
|
82
|
+
Log(options_.info_log,
|
83
|
+
"**** Repaired leveldb %s; "
|
84
|
+
"recovered %d files; %llu bytes. "
|
85
|
+
"Some data may have been lost. "
|
86
|
+
"****",
|
87
|
+
dbname_.c_str(),
|
88
|
+
static_cast<int>(tables_.size()),
|
89
|
+
bytes);
|
90
|
+
}
|
91
|
+
return status;
|
92
|
+
}
|
93
|
+
|
94
|
+
private:
|
95
|
+
struct TableInfo {
|
96
|
+
FileMetaData meta;
|
97
|
+
SequenceNumber max_sequence;
|
98
|
+
};
|
99
|
+
|
100
|
+
std::string const dbname_;
|
101
|
+
Env* const env_;
|
102
|
+
InternalKeyComparator const icmp_;
|
103
|
+
InternalFilterPolicy const ipolicy_;
|
104
|
+
Options const options_;
|
105
|
+
bool owns_info_log_;
|
106
|
+
bool owns_cache_;
|
107
|
+
TableCache* table_cache_;
|
108
|
+
VersionEdit edit_;
|
109
|
+
|
110
|
+
std::vector<std::string> manifests_;
|
111
|
+
std::vector<uint64_t> table_numbers_;
|
112
|
+
std::vector<uint64_t> logs_;
|
113
|
+
std::vector<TableInfo> tables_;
|
114
|
+
uint64_t next_file_number_;
|
115
|
+
|
116
|
+
Status FindFiles() {
|
117
|
+
std::vector<std::string> filenames;
|
118
|
+
Status status = env_->GetChildren(dbname_, &filenames);
|
119
|
+
if (!status.ok()) {
|
120
|
+
return status;
|
121
|
+
}
|
122
|
+
if (filenames.empty()) {
|
123
|
+
return Status::IOError(dbname_, "repair found no files");
|
124
|
+
}
|
125
|
+
|
126
|
+
uint64_t number;
|
127
|
+
FileType type;
|
128
|
+
for (size_t i = 0; i < filenames.size(); i++) {
|
129
|
+
if (ParseFileName(filenames[i], &number, &type)) {
|
130
|
+
if (type == kDescriptorFile) {
|
131
|
+
manifests_.push_back(filenames[i]);
|
132
|
+
} else {
|
133
|
+
if (number + 1 > next_file_number_) {
|
134
|
+
next_file_number_ = number + 1;
|
135
|
+
}
|
136
|
+
if (type == kLogFile) {
|
137
|
+
logs_.push_back(number);
|
138
|
+
} else if (type == kTableFile) {
|
139
|
+
table_numbers_.push_back(number);
|
140
|
+
} else {
|
141
|
+
// Ignore other files
|
142
|
+
}
|
143
|
+
}
|
144
|
+
}
|
145
|
+
}
|
146
|
+
return status;
|
147
|
+
}
|
148
|
+
|
149
|
+
void ConvertLogFilesToTables() {
|
150
|
+
for (size_t i = 0; i < logs_.size(); i++) {
|
151
|
+
std::string logname = LogFileName(dbname_, logs_[i]);
|
152
|
+
Status status = ConvertLogToTable(logs_[i]);
|
153
|
+
if (!status.ok()) {
|
154
|
+
Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
|
155
|
+
(unsigned long long) logs_[i],
|
156
|
+
status.ToString().c_str());
|
157
|
+
}
|
158
|
+
ArchiveFile(logname);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
Status ConvertLogToTable(uint64_t log) {
|
163
|
+
struct LogReporter : public log::Reader::Reporter {
|
164
|
+
Env* env;
|
165
|
+
Logger* info_log;
|
166
|
+
uint64_t lognum;
|
167
|
+
virtual void Corruption(size_t bytes, const Status& s) {
|
168
|
+
// We print error messages for corruption, but continue repairing.
|
169
|
+
Log(info_log, "Log #%llu: dropping %d bytes; %s",
|
170
|
+
(unsigned long long) lognum,
|
171
|
+
static_cast<int>(bytes),
|
172
|
+
s.ToString().c_str());
|
173
|
+
}
|
174
|
+
};
|
175
|
+
|
176
|
+
// Open the log file
|
177
|
+
std::string logname = LogFileName(dbname_, log);
|
178
|
+
SequentialFile* lfile;
|
179
|
+
Status status = env_->NewSequentialFile(logname, &lfile);
|
180
|
+
if (!status.ok()) {
|
181
|
+
return status;
|
182
|
+
}
|
183
|
+
|
184
|
+
// Create the log reader.
|
185
|
+
LogReporter reporter;
|
186
|
+
reporter.env = env_;
|
187
|
+
reporter.info_log = options_.info_log;
|
188
|
+
reporter.lognum = log;
|
189
|
+
// We intentially make log::Reader do checksumming so that
|
190
|
+
// corruptions cause entire commits to be skipped instead of
|
191
|
+
// propagating bad information (like overly large sequence
|
192
|
+
// numbers).
|
193
|
+
log::Reader reader(lfile, &reporter, false/*do not checksum*/,
|
194
|
+
0/*initial_offset*/);
|
195
|
+
|
196
|
+
// Read all the records and add to a memtable
|
197
|
+
std::string scratch;
|
198
|
+
Slice record;
|
199
|
+
WriteBatch batch;
|
200
|
+
MemTable* mem = new MemTable(icmp_);
|
201
|
+
mem->Ref();
|
202
|
+
int counter = 0;
|
203
|
+
while (reader.ReadRecord(&record, &scratch)) {
|
204
|
+
if (record.size() < 12) {
|
205
|
+
reporter.Corruption(
|
206
|
+
record.size(), Status::Corruption("log record too small"));
|
207
|
+
continue;
|
208
|
+
}
|
209
|
+
WriteBatchInternal::SetContents(&batch, record);
|
210
|
+
status = WriteBatchInternal::InsertInto(&batch, mem);
|
211
|
+
if (status.ok()) {
|
212
|
+
counter += WriteBatchInternal::Count(&batch);
|
213
|
+
} else {
|
214
|
+
Log(options_.info_log, "Log #%llu: ignoring %s",
|
215
|
+
(unsigned long long) log,
|
216
|
+
status.ToString().c_str());
|
217
|
+
status = Status::OK(); // Keep going with rest of file
|
218
|
+
}
|
219
|
+
}
|
220
|
+
delete lfile;
|
221
|
+
|
222
|
+
// Do not record a version edit for this conversion to a Table
|
223
|
+
// since ExtractMetaData() will also generate edits.
|
224
|
+
FileMetaData meta;
|
225
|
+
meta.number = next_file_number_++;
|
226
|
+
Iterator* iter = mem->NewIterator();
|
227
|
+
status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
|
228
|
+
delete iter;
|
229
|
+
mem->Unref();
|
230
|
+
mem = NULL;
|
231
|
+
if (status.ok()) {
|
232
|
+
if (meta.file_size > 0) {
|
233
|
+
table_numbers_.push_back(meta.number);
|
234
|
+
}
|
235
|
+
}
|
236
|
+
Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
|
237
|
+
(unsigned long long) log,
|
238
|
+
counter,
|
239
|
+
(unsigned long long) meta.number,
|
240
|
+
status.ToString().c_str());
|
241
|
+
return status;
|
242
|
+
}
|
243
|
+
|
244
|
+
void ExtractMetaData() {
|
245
|
+
std::vector<TableInfo> kept;
|
246
|
+
for (size_t i = 0; i < table_numbers_.size(); i++) {
|
247
|
+
TableInfo t;
|
248
|
+
t.meta.number = table_numbers_[i];
|
249
|
+
Status status = ScanTable(&t);
|
250
|
+
if (!status.ok()) {
|
251
|
+
std::string fname = TableFileName(dbname_, table_numbers_[i]);
|
252
|
+
Log(options_.info_log, "Table #%llu: ignoring %s",
|
253
|
+
(unsigned long long) table_numbers_[i],
|
254
|
+
status.ToString().c_str());
|
255
|
+
ArchiveFile(fname);
|
256
|
+
} else {
|
257
|
+
tables_.push_back(t);
|
258
|
+
}
|
259
|
+
}
|
260
|
+
}
|
261
|
+
|
262
|
+
Status ScanTable(TableInfo* t) {
|
263
|
+
std::string fname = TableFileName(dbname_, t->meta.number);
|
264
|
+
int counter = 0;
|
265
|
+
Status status = env_->GetFileSize(fname, &t->meta.file_size);
|
266
|
+
if (status.ok()) {
|
267
|
+
Iterator* iter = table_cache_->NewIterator(
|
268
|
+
ReadOptions(), t->meta.number, t->meta.file_size);
|
269
|
+
bool empty = true;
|
270
|
+
ParsedInternalKey parsed;
|
271
|
+
t->max_sequence = 0;
|
272
|
+
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
|
273
|
+
Slice key = iter->key();
|
274
|
+
if (!ParseInternalKey(key, &parsed)) {
|
275
|
+
Log(options_.info_log, "Table #%llu: unparsable key %s",
|
276
|
+
(unsigned long long) t->meta.number,
|
277
|
+
EscapeString(key).c_str());
|
278
|
+
continue;
|
279
|
+
}
|
280
|
+
|
281
|
+
counter++;
|
282
|
+
if (empty) {
|
283
|
+
empty = false;
|
284
|
+
t->meta.smallest.DecodeFrom(key);
|
285
|
+
}
|
286
|
+
t->meta.largest.DecodeFrom(key);
|
287
|
+
if (parsed.sequence > t->max_sequence) {
|
288
|
+
t->max_sequence = parsed.sequence;
|
289
|
+
}
|
290
|
+
}
|
291
|
+
if (!iter->status().ok()) {
|
292
|
+
status = iter->status();
|
293
|
+
}
|
294
|
+
delete iter;
|
295
|
+
}
|
296
|
+
Log(options_.info_log, "Table #%llu: %d entries %s",
|
297
|
+
(unsigned long long) t->meta.number,
|
298
|
+
counter,
|
299
|
+
status.ToString().c_str());
|
300
|
+
return status;
|
301
|
+
}
|
302
|
+
|
303
|
+
Status WriteDescriptor() {
|
304
|
+
std::string tmp = TempFileName(dbname_, 1);
|
305
|
+
WritableFile* file;
|
306
|
+
Status status = env_->NewWritableFile(tmp, &file);
|
307
|
+
if (!status.ok()) {
|
308
|
+
return status;
|
309
|
+
}
|
310
|
+
|
311
|
+
SequenceNumber max_sequence = 0;
|
312
|
+
for (size_t i = 0; i < tables_.size(); i++) {
|
313
|
+
if (max_sequence < tables_[i].max_sequence) {
|
314
|
+
max_sequence = tables_[i].max_sequence;
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
edit_.SetComparatorName(icmp_.user_comparator()->Name());
|
319
|
+
edit_.SetLogNumber(0);
|
320
|
+
edit_.SetNextFile(next_file_number_);
|
321
|
+
edit_.SetLastSequence(max_sequence);
|
322
|
+
|
323
|
+
for (size_t i = 0; i < tables_.size(); i++) {
|
324
|
+
// TODO(opt): separate out into multiple levels
|
325
|
+
const TableInfo& t = tables_[i];
|
326
|
+
edit_.AddFile(0, t.meta.number, t.meta.file_size,
|
327
|
+
t.meta.smallest, t.meta.largest);
|
328
|
+
}
|
329
|
+
|
330
|
+
//fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
|
331
|
+
{
|
332
|
+
log::Writer log(file);
|
333
|
+
std::string record;
|
334
|
+
edit_.EncodeTo(&record);
|
335
|
+
status = log.AddRecord(record);
|
336
|
+
}
|
337
|
+
if (status.ok()) {
|
338
|
+
status = file->Close();
|
339
|
+
}
|
340
|
+
delete file;
|
341
|
+
file = NULL;
|
342
|
+
|
343
|
+
if (!status.ok()) {
|
344
|
+
env_->DeleteFile(tmp);
|
345
|
+
} else {
|
346
|
+
// Discard older manifests
|
347
|
+
for (size_t i = 0; i < manifests_.size(); i++) {
|
348
|
+
ArchiveFile(dbname_ + "/" + manifests_[i]);
|
349
|
+
}
|
350
|
+
|
351
|
+
// Install new manifest
|
352
|
+
status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1));
|
353
|
+
if (status.ok()) {
|
354
|
+
status = SetCurrentFile(env_, dbname_, 1);
|
355
|
+
} else {
|
356
|
+
env_->DeleteFile(tmp);
|
357
|
+
}
|
358
|
+
}
|
359
|
+
return status;
|
360
|
+
}
|
361
|
+
|
362
|
+
void ArchiveFile(const std::string& fname) {
|
363
|
+
// Move into another directory. E.g., for
|
364
|
+
// dir/foo
|
365
|
+
// rename to
|
366
|
+
// dir/lost/foo
|
367
|
+
const char* slash = strrchr(fname.c_str(), '/');
|
368
|
+
std::string new_dir;
|
369
|
+
if (slash != NULL) {
|
370
|
+
new_dir.assign(fname.data(), slash - fname.data());
|
371
|
+
}
|
372
|
+
new_dir.append("/lost");
|
373
|
+
env_->CreateDir(new_dir); // Ignore error
|
374
|
+
std::string new_file = new_dir;
|
375
|
+
new_file.append("/");
|
376
|
+
new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
|
377
|
+
Status s = env_->RenameFile(fname, new_file);
|
378
|
+
Log(options_.info_log, "Archiving %s: %s\n",
|
379
|
+
fname.c_str(), s.ToString().c_str());
|
380
|
+
}
|
381
|
+
};
|
382
|
+
} // namespace
|
383
|
+
|
384
|
+
Status RepairDB(const std::string& dbname, const Options& options) {
|
385
|
+
Repairer repairer(dbname, options);
|
386
|
+
return repairer.Run();
|
387
|
+
}
|
388
|
+
|
389
|
+
} // namespace leveldb
|