filiptepper-leveldb-ruby 0.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. data/LICENSE +24 -0
  2. data/README +72 -0
  3. data/ext/leveldb/extconf.rb +14 -0
  4. data/ext/leveldb/leveldb.cc +530 -0
  5. data/ext/leveldb/platform.rb +83 -0
  6. data/leveldb/Makefile +191 -0
  7. data/leveldb/build_detect_platform +160 -0
  8. data/leveldb/db/builder.cc +88 -0
  9. data/leveldb/db/builder.h +34 -0
  10. data/leveldb/db/c.cc +581 -0
  11. data/leveldb/db/corruption_test.cc +359 -0
  12. data/leveldb/db/db_bench.cc +970 -0
  13. data/leveldb/db/db_impl.cc +1448 -0
  14. data/leveldb/db/db_impl.h +194 -0
  15. data/leveldb/db/db_iter.cc +299 -0
  16. data/leveldb/db/db_iter.h +26 -0
  17. data/leveldb/db/db_test.cc +1901 -0
  18. data/leveldb/db/dbformat.cc +140 -0
  19. data/leveldb/db/dbformat.h +227 -0
  20. data/leveldb/db/dbformat_test.cc +112 -0
  21. data/leveldb/db/filename.cc +139 -0
  22. data/leveldb/db/filename.h +80 -0
  23. data/leveldb/db/filename_test.cc +122 -0
  24. data/leveldb/db/log_format.h +35 -0
  25. data/leveldb/db/log_reader.cc +259 -0
  26. data/leveldb/db/log_reader.h +108 -0
  27. data/leveldb/db/log_test.cc +500 -0
  28. data/leveldb/db/log_writer.cc +103 -0
  29. data/leveldb/db/log_writer.h +48 -0
  30. data/leveldb/db/memtable.cc +145 -0
  31. data/leveldb/db/memtable.h +91 -0
  32. data/leveldb/db/repair.cc +389 -0
  33. data/leveldb/db/skiplist.h +379 -0
  34. data/leveldb/db/skiplist_test.cc +378 -0
  35. data/leveldb/db/snapshot.h +66 -0
  36. data/leveldb/db/table_cache.cc +121 -0
  37. data/leveldb/db/table_cache.h +61 -0
  38. data/leveldb/db/version_edit.cc +266 -0
  39. data/leveldb/db/version_edit.h +107 -0
  40. data/leveldb/db/version_edit_test.cc +46 -0
  41. data/leveldb/db/version_set.cc +1402 -0
  42. data/leveldb/db/version_set.h +370 -0
  43. data/leveldb/db/version_set_test.cc +179 -0
  44. data/leveldb/db/write_batch.cc +147 -0
  45. data/leveldb/db/write_batch_internal.h +49 -0
  46. data/leveldb/db/write_batch_test.cc +120 -0
  47. data/leveldb/helpers/memenv/memenv.cc +374 -0
  48. data/leveldb/helpers/memenv/memenv.h +20 -0
  49. data/leveldb/helpers/memenv/memenv_test.cc +232 -0
  50. data/leveldb/include/leveldb/c.h +275 -0
  51. data/leveldb/include/leveldb/cache.h +99 -0
  52. data/leveldb/include/leveldb/comparator.h +63 -0
  53. data/leveldb/include/leveldb/db.h +161 -0
  54. data/leveldb/include/leveldb/env.h +323 -0
  55. data/leveldb/include/leveldb/filter_policy.h +70 -0
  56. data/leveldb/include/leveldb/iterator.h +100 -0
  57. data/leveldb/include/leveldb/options.h +195 -0
  58. data/leveldb/include/leveldb/slice.h +109 -0
  59. data/leveldb/include/leveldb/status.h +106 -0
  60. data/leveldb/include/leveldb/table.h +85 -0
  61. data/leveldb/include/leveldb/table_builder.h +92 -0
  62. data/leveldb/include/leveldb/write_batch.h +64 -0
  63. data/leveldb/port/atomic_pointer.h +144 -0
  64. data/leveldb/port/port.h +21 -0
  65. data/leveldb/port/port_android.cc +64 -0
  66. data/leveldb/port/port_android.h +159 -0
  67. data/leveldb/port/port_example.h +125 -0
  68. data/leveldb/port/port_posix.cc +50 -0
  69. data/leveldb/port/port_posix.h +129 -0
  70. data/leveldb/port/win/stdint.h +24 -0
  71. data/leveldb/table/block.cc +267 -0
  72. data/leveldb/table/block.h +44 -0
  73. data/leveldb/table/block_builder.cc +109 -0
  74. data/leveldb/table/block_builder.h +57 -0
  75. data/leveldb/table/filter_block.cc +111 -0
  76. data/leveldb/table/filter_block.h +68 -0
  77. data/leveldb/table/filter_block_test.cc +128 -0
  78. data/leveldb/table/format.cc +145 -0
  79. data/leveldb/table/format.h +108 -0
  80. data/leveldb/table/iterator.cc +67 -0
  81. data/leveldb/table/iterator_wrapper.h +63 -0
  82. data/leveldb/table/merger.cc +197 -0
  83. data/leveldb/table/merger.h +26 -0
  84. data/leveldb/table/table.cc +276 -0
  85. data/leveldb/table/table_builder.cc +270 -0
  86. data/leveldb/table/table_test.cc +838 -0
  87. data/leveldb/table/two_level_iterator.cc +182 -0
  88. data/leveldb/table/two_level_iterator.h +34 -0
  89. data/leveldb/util/arena.cc +68 -0
  90. data/leveldb/util/arena.h +68 -0
  91. data/leveldb/util/arena_test.cc +68 -0
  92. data/leveldb/util/bloom.cc +95 -0
  93. data/leveldb/util/bloom_test.cc +159 -0
  94. data/leveldb/util/cache.cc +328 -0
  95. data/leveldb/util/cache_test.cc +186 -0
  96. data/leveldb/util/coding.cc +194 -0
  97. data/leveldb/util/coding.h +104 -0
  98. data/leveldb/util/coding_test.cc +173 -0
  99. data/leveldb/util/comparator.cc +76 -0
  100. data/leveldb/util/crc32c.cc +332 -0
  101. data/leveldb/util/crc32c.h +45 -0
  102. data/leveldb/util/crc32c_test.cc +72 -0
  103. data/leveldb/util/env.cc +96 -0
  104. data/leveldb/util/env_posix.cc +609 -0
  105. data/leveldb/util/env_test.cc +104 -0
  106. data/leveldb/util/filter_policy.cc +11 -0
  107. data/leveldb/util/hash.cc +45 -0
  108. data/leveldb/util/hash.h +19 -0
  109. data/leveldb/util/histogram.cc +139 -0
  110. data/leveldb/util/histogram.h +42 -0
  111. data/leveldb/util/logging.cc +81 -0
  112. data/leveldb/util/logging.h +47 -0
  113. data/leveldb/util/mutexlock.h +39 -0
  114. data/leveldb/util/options.cc +29 -0
  115. data/leveldb/util/posix_logger.h +98 -0
  116. data/leveldb/util/random.h +59 -0
  117. data/leveldb/util/status.cc +75 -0
  118. data/leveldb/util/testharness.cc +77 -0
  119. data/leveldb/util/testharness.h +138 -0
  120. data/leveldb/util/testutil.cc +51 -0
  121. data/leveldb/util/testutil.h +53 -0
  122. data/lib/leveldb.rb +76 -0
  123. metadata +175 -0
@@ -0,0 +1,103 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "db/log_writer.h"
6
+
7
+ #include <stdint.h>
8
+ #include "leveldb/env.h"
9
+ #include "util/coding.h"
10
+ #include "util/crc32c.h"
11
+
12
+ namespace leveldb {
13
+ namespace log {
14
+
15
+ Writer::Writer(WritableFile* dest)
16
+ : dest_(dest),
17
+ block_offset_(0) {
18
+ for (int i = 0; i <= kMaxRecordType; i++) {
19
+ char t = static_cast<char>(i);
20
+ type_crc_[i] = crc32c::Value(&t, 1);
21
+ }
22
+ }
23
+
24
+ Writer::~Writer() {
25
+ }
26
+
27
+ Status Writer::AddRecord(const Slice& slice) {
28
+ const char* ptr = slice.data();
29
+ size_t left = slice.size();
30
+
31
+ // Fragment the record if necessary and emit it. Note that if slice
32
+ // is empty, we still want to iterate once to emit a single
33
+ // zero-length record
34
+ Status s;
35
+ bool begin = true;
36
+ do {
37
+ const int leftover = kBlockSize - block_offset_;
38
+ assert(leftover >= 0);
39
+ if (leftover < kHeaderSize) {
40
+ // Switch to a new block
41
+ if (leftover > 0) {
42
+ // Fill the trailer (literal below relies on kHeaderSize being 7)
43
+ assert(kHeaderSize == 7);
44
+ dest_->Append(Slice("\x00\x00\x00\x00\x00\x00", leftover));
45
+ }
46
+ block_offset_ = 0;
47
+ }
48
+
49
+ // Invariant: we never leave < kHeaderSize bytes in a block.
50
+ assert(kBlockSize - block_offset_ - kHeaderSize >= 0);
51
+
52
+ const size_t avail = kBlockSize - block_offset_ - kHeaderSize;
53
+ const size_t fragment_length = (left < avail) ? left : avail;
54
+
55
+ RecordType type;
56
+ const bool end = (left == fragment_length);
57
+ if (begin && end) {
58
+ type = kFullType;
59
+ } else if (begin) {
60
+ type = kFirstType;
61
+ } else if (end) {
62
+ type = kLastType;
63
+ } else {
64
+ type = kMiddleType;
65
+ }
66
+
67
+ s = EmitPhysicalRecord(type, ptr, fragment_length);
68
+ ptr += fragment_length;
69
+ left -= fragment_length;
70
+ begin = false;
71
+ } while (s.ok() && left > 0);
72
+ return s;
73
+ }
74
+
75
+ Status Writer::EmitPhysicalRecord(RecordType t, const char* ptr, size_t n) {
76
+ assert(n <= 0xffff); // Must fit in two bytes
77
+ assert(block_offset_ + kHeaderSize + n <= kBlockSize);
78
+
79
+ // Format the header
80
+ char buf[kHeaderSize];
81
+ buf[4] = static_cast<char>(n & 0xff);
82
+ buf[5] = static_cast<char>(n >> 8);
83
+ buf[6] = static_cast<char>(t);
84
+
85
+ // Compute the crc of the record type and the payload.
86
+ uint32_t crc = crc32c::Extend(type_crc_[t], ptr, n);
87
+ crc = crc32c::Mask(crc); // Adjust for storage
88
+ EncodeFixed32(buf, crc);
89
+
90
+ // Write the header and the payload
91
+ Status s = dest_->Append(Slice(buf, kHeaderSize));
92
+ if (s.ok()) {
93
+ s = dest_->Append(Slice(ptr, n));
94
+ if (s.ok()) {
95
+ s = dest_->Flush();
96
+ }
97
+ }
98
+ block_offset_ += kHeaderSize + n;
99
+ return s;
100
+ }
101
+
102
+ } // namespace log
103
+ } // namespace leveldb
@@ -0,0 +1,48 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #ifndef STORAGE_LEVELDB_DB_LOG_WRITER_H_
6
+ #define STORAGE_LEVELDB_DB_LOG_WRITER_H_
7
+
8
+ #include <stdint.h>
9
+ #include "db/log_format.h"
10
+ #include "leveldb/slice.h"
11
+ #include "leveldb/status.h"
12
+
13
+ namespace leveldb {
14
+
15
+ class WritableFile;
16
+
17
+ namespace log {
18
+
19
+ class Writer {
20
+ public:
21
+ // Create a writer that will append data to "*dest".
22
+ // "*dest" must be initially empty.
23
+ // "*dest" must remain live while this Writer is in use.
24
+ explicit Writer(WritableFile* dest);
25
+ ~Writer();
26
+
27
+ Status AddRecord(const Slice& slice);
28
+
29
+ private:
30
+ WritableFile* dest_;
31
+ int block_offset_; // Current offset in block
32
+
33
+ // crc32c values for all supported record types. These are
34
+ // pre-computed to reduce the overhead of computing the crc of the
35
+ // record type stored in the header.
36
+ uint32_t type_crc_[kMaxRecordType + 1];
37
+
38
+ Status EmitPhysicalRecord(RecordType type, const char* ptr, size_t length);
39
+
40
+ // No copying allowed
41
+ Writer(const Writer&);
42
+ void operator=(const Writer&);
43
+ };
44
+
45
+ } // namespace log
46
+ } // namespace leveldb
47
+
48
+ #endif // STORAGE_LEVELDB_DB_LOG_WRITER_H_
@@ -0,0 +1,145 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "db/memtable.h"
6
+ #include "db/dbformat.h"
7
+ #include "leveldb/comparator.h"
8
+ #include "leveldb/env.h"
9
+ #include "leveldb/iterator.h"
10
+ #include "util/coding.h"
11
+
12
+ namespace leveldb {
13
+
14
+ static Slice GetLengthPrefixedSlice(const char* data) {
15
+ uint32_t len;
16
+ const char* p = data;
17
+ p = GetVarint32Ptr(p, p + 5, &len); // +5: we assume "p" is not corrupted
18
+ return Slice(p, len);
19
+ }
20
+
21
+ MemTable::MemTable(const InternalKeyComparator& cmp)
22
+ : comparator_(cmp),
23
+ refs_(0),
24
+ table_(comparator_, &arena_) {
25
+ }
26
+
27
+ MemTable::~MemTable() {
28
+ assert(refs_ == 0);
29
+ }
30
+
31
+ size_t MemTable::ApproximateMemoryUsage() { return arena_.MemoryUsage(); }
32
+
33
+ int MemTable::KeyComparator::operator()(const char* aptr, const char* bptr)
34
+ const {
35
+ // Internal keys are encoded as length-prefixed strings.
36
+ Slice a = GetLengthPrefixedSlice(aptr);
37
+ Slice b = GetLengthPrefixedSlice(bptr);
38
+ return comparator.Compare(a, b);
39
+ }
40
+
41
+ // Encode a suitable internal key target for "target" and return it.
42
+ // Uses *scratch as scratch space, and the returned pointer will point
43
+ // into this scratch space.
44
+ static const char* EncodeKey(std::string* scratch, const Slice& target) {
45
+ scratch->clear();
46
+ PutVarint32(scratch, target.size());
47
+ scratch->append(target.data(), target.size());
48
+ return scratch->data();
49
+ }
50
+
51
+ class MemTableIterator: public Iterator {
52
+ public:
53
+ explicit MemTableIterator(MemTable::Table* table) : iter_(table) { }
54
+
55
+ virtual bool Valid() const { return iter_.Valid(); }
56
+ virtual void Seek(const Slice& k) { iter_.Seek(EncodeKey(&tmp_, k)); }
57
+ virtual void SeekToFirst() { iter_.SeekToFirst(); }
58
+ virtual void SeekToLast() { iter_.SeekToLast(); }
59
+ virtual void Next() { iter_.Next(); }
60
+ virtual void Prev() { iter_.Prev(); }
61
+ virtual Slice key() const { return GetLengthPrefixedSlice(iter_.key()); }
62
+ virtual Slice value() const {
63
+ Slice key_slice = GetLengthPrefixedSlice(iter_.key());
64
+ return GetLengthPrefixedSlice(key_slice.data() + key_slice.size());
65
+ }
66
+
67
+ virtual Status status() const { return Status::OK(); }
68
+
69
+ private:
70
+ MemTable::Table::Iterator iter_;
71
+ std::string tmp_; // For passing to EncodeKey
72
+
73
+ // No copying allowed
74
+ MemTableIterator(const MemTableIterator&);
75
+ void operator=(const MemTableIterator&);
76
+ };
77
+
78
+ Iterator* MemTable::NewIterator() {
79
+ return new MemTableIterator(&table_);
80
+ }
81
+
82
+ void MemTable::Add(SequenceNumber s, ValueType type,
83
+ const Slice& key,
84
+ const Slice& value) {
85
+ // Format of an entry is concatenation of:
86
+ // key_size : varint32 of internal_key.size()
87
+ // key bytes : char[internal_key.size()]
88
+ // value_size : varint32 of value.size()
89
+ // value bytes : char[value.size()]
90
+ size_t key_size = key.size();
91
+ size_t val_size = value.size();
92
+ size_t internal_key_size = key_size + 8;
93
+ const size_t encoded_len =
94
+ VarintLength(internal_key_size) + internal_key_size +
95
+ VarintLength(val_size) + val_size;
96
+ char* buf = arena_.Allocate(encoded_len);
97
+ char* p = EncodeVarint32(buf, internal_key_size);
98
+ memcpy(p, key.data(), key_size);
99
+ p += key_size;
100
+ EncodeFixed64(p, (s << 8) | type);
101
+ p += 8;
102
+ p = EncodeVarint32(p, val_size);
103
+ memcpy(p, value.data(), val_size);
104
+ assert((p + val_size) - buf == encoded_len);
105
+ table_.Insert(buf);
106
+ }
107
+
108
+ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
109
+ Slice memkey = key.memtable_key();
110
+ Table::Iterator iter(&table_);
111
+ iter.Seek(memkey.data());
112
+ if (iter.Valid()) {
113
+ // entry format is:
114
+ // klength varint32
115
+ // userkey char[klength]
116
+ // tag uint64
117
+ // vlength varint32
118
+ // value char[vlength]
119
+ // Check that it belongs to same user key. We do not check the
120
+ // sequence number since the Seek() call above should have skipped
121
+ // all entries with overly large sequence numbers.
122
+ const char* entry = iter.key();
123
+ uint32_t key_length;
124
+ const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
125
+ if (comparator_.comparator.user_comparator()->Compare(
126
+ Slice(key_ptr, key_length - 8),
127
+ key.user_key()) == 0) {
128
+ // Correct user key
129
+ const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
130
+ switch (static_cast<ValueType>(tag & 0xff)) {
131
+ case kTypeValue: {
132
+ Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
133
+ value->assign(v.data(), v.size());
134
+ return true;
135
+ }
136
+ case kTypeDeletion:
137
+ *s = Status::NotFound(Slice());
138
+ return true;
139
+ }
140
+ }
141
+ }
142
+ return false;
143
+ }
144
+
145
+ } // namespace leveldb
@@ -0,0 +1,91 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #ifndef STORAGE_LEVELDB_DB_MEMTABLE_H_
6
+ #define STORAGE_LEVELDB_DB_MEMTABLE_H_
7
+
8
+ #include <string>
9
+ #include "leveldb/db.h"
10
+ #include "db/dbformat.h"
11
+ #include "db/skiplist.h"
12
+ #include "util/arena.h"
13
+
14
+ namespace leveldb {
15
+
16
+ class InternalKeyComparator;
17
+ class Mutex;
18
+ class MemTableIterator;
19
+
20
+ class MemTable {
21
+ public:
22
+ // MemTables are reference counted. The initial reference count
23
+ // is zero and the caller must call Ref() at least once.
24
+ explicit MemTable(const InternalKeyComparator& comparator);
25
+
26
+ // Increase reference count.
27
+ void Ref() { ++refs_; }
28
+
29
+ // Drop reference count. Delete if no more references exist.
30
+ void Unref() {
31
+ --refs_;
32
+ assert(refs_ >= 0);
33
+ if (refs_ <= 0) {
34
+ delete this;
35
+ }
36
+ }
37
+
38
+ // Returns an estimate of the number of bytes of data in use by this
39
+ // data structure.
40
+ //
41
+ // REQUIRES: external synchronization to prevent simultaneous
42
+ // operations on the same MemTable.
43
+ size_t ApproximateMemoryUsage();
44
+
45
+ // Return an iterator that yields the contents of the memtable.
46
+ //
47
+ // The caller must ensure that the underlying MemTable remains live
48
+ // while the returned iterator is live. The keys returned by this
49
+ // iterator are internal keys encoded by AppendInternalKey in the
50
+ // db/format.{h,cc} module.
51
+ Iterator* NewIterator();
52
+
53
+ // Add an entry into memtable that maps key to value at the
54
+ // specified sequence number and with the specified type.
55
+ // Typically value will be empty if type==kTypeDeletion.
56
+ void Add(SequenceNumber seq, ValueType type,
57
+ const Slice& key,
58
+ const Slice& value);
59
+
60
+ // If memtable contains a value for key, store it in *value and return true.
61
+ // If memtable contains a deletion for key, store a NotFound() error
62
+ // in *status and return true.
63
+ // Else, return false.
64
+ bool Get(const LookupKey& key, std::string* value, Status* s);
65
+
66
+ private:
67
+ ~MemTable(); // Private since only Unref() should be used to delete it
68
+
69
+ struct KeyComparator {
70
+ const InternalKeyComparator comparator;
71
+ explicit KeyComparator(const InternalKeyComparator& c) : comparator(c) { }
72
+ int operator()(const char* a, const char* b) const;
73
+ };
74
+ friend class MemTableIterator;
75
+ friend class MemTableBackwardIterator;
76
+
77
+ typedef SkipList<const char*, KeyComparator> Table;
78
+
79
+ KeyComparator comparator_;
80
+ int refs_;
81
+ Arena arena_;
82
+ Table table_;
83
+
84
+ // No copying allowed
85
+ MemTable(const MemTable&);
86
+ void operator=(const MemTable&);
87
+ };
88
+
89
+ } // namespace leveldb
90
+
91
+ #endif // STORAGE_LEVELDB_DB_MEMTABLE_H_
@@ -0,0 +1,389 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+ //
5
+ // We recover the contents of the descriptor from the other files we find.
6
+ // (1) Any log files are first converted to tables
7
+ // (2) We scan every table to compute
8
+ // (a) smallest/largest for the table
9
+ // (b) largest sequence number in the table
10
+ // (3) We generate descriptor contents:
11
+ // - log number is set to zero
12
+ // - next-file-number is set to 1 + largest file number we found
13
+ // - last-sequence-number is set to largest sequence# found across
14
+ // all tables (see 2c)
15
+ // - compaction pointers are cleared
16
+ // - every table file is added at level 0
17
+ //
18
+ // Possible optimization 1:
19
+ // (a) Compute total size and use to pick appropriate max-level M
20
+ // (b) Sort tables by largest sequence# in the table
21
+ // (c) For each table: if it overlaps earlier table, place in level-0,
22
+ // else place in level-M.
23
+ // Possible optimization 2:
24
+ // Store per-table metadata (smallest, largest, largest-seq#, ...)
25
+ // in the table's meta section to speed up ScanTable.
26
+
27
+ #include "db/builder.h"
28
+ #include "db/db_impl.h"
29
+ #include "db/dbformat.h"
30
+ #include "db/filename.h"
31
+ #include "db/log_reader.h"
32
+ #include "db/log_writer.h"
33
+ #include "db/memtable.h"
34
+ #include "db/table_cache.h"
35
+ #include "db/version_edit.h"
36
+ #include "db/write_batch_internal.h"
37
+ #include "leveldb/comparator.h"
38
+ #include "leveldb/db.h"
39
+ #include "leveldb/env.h"
40
+
41
+ namespace leveldb {
42
+
43
+ namespace {
44
+
45
+ class Repairer {
46
+ public:
47
+ Repairer(const std::string& dbname, const Options& options)
48
+ : dbname_(dbname),
49
+ env_(options.env),
50
+ icmp_(options.comparator),
51
+ ipolicy_(options.filter_policy),
52
+ options_(SanitizeOptions(dbname, &icmp_, &ipolicy_, options)),
53
+ owns_info_log_(options_.info_log != options.info_log),
54
+ owns_cache_(options_.block_cache != options.block_cache),
55
+ next_file_number_(1) {
56
+ // TableCache can be small since we expect each table to be opened once.
57
+ table_cache_ = new TableCache(dbname_, &options_, 10);
58
+ }
59
+
60
+ ~Repairer() {
61
+ delete table_cache_;
62
+ if (owns_info_log_) {
63
+ delete options_.info_log;
64
+ }
65
+ if (owns_cache_) {
66
+ delete options_.block_cache;
67
+ }
68
+ }
69
+
70
+ Status Run() {
71
+ Status status = FindFiles();
72
+ if (status.ok()) {
73
+ ConvertLogFilesToTables();
74
+ ExtractMetaData();
75
+ status = WriteDescriptor();
76
+ }
77
+ if (status.ok()) {
78
+ unsigned long long bytes = 0;
79
+ for (size_t i = 0; i < tables_.size(); i++) {
80
+ bytes += tables_[i].meta.file_size;
81
+ }
82
+ Log(options_.info_log,
83
+ "**** Repaired leveldb %s; "
84
+ "recovered %d files; %llu bytes. "
85
+ "Some data may have been lost. "
86
+ "****",
87
+ dbname_.c_str(),
88
+ static_cast<int>(tables_.size()),
89
+ bytes);
90
+ }
91
+ return status;
92
+ }
93
+
94
+ private:
95
+ struct TableInfo {
96
+ FileMetaData meta;
97
+ SequenceNumber max_sequence;
98
+ };
99
+
100
+ std::string const dbname_;
101
+ Env* const env_;
102
+ InternalKeyComparator const icmp_;
103
+ InternalFilterPolicy const ipolicy_;
104
+ Options const options_;
105
+ bool owns_info_log_;
106
+ bool owns_cache_;
107
+ TableCache* table_cache_;
108
+ VersionEdit edit_;
109
+
110
+ std::vector<std::string> manifests_;
111
+ std::vector<uint64_t> table_numbers_;
112
+ std::vector<uint64_t> logs_;
113
+ std::vector<TableInfo> tables_;
114
+ uint64_t next_file_number_;
115
+
116
+ Status FindFiles() {
117
+ std::vector<std::string> filenames;
118
+ Status status = env_->GetChildren(dbname_, &filenames);
119
+ if (!status.ok()) {
120
+ return status;
121
+ }
122
+ if (filenames.empty()) {
123
+ return Status::IOError(dbname_, "repair found no files");
124
+ }
125
+
126
+ uint64_t number;
127
+ FileType type;
128
+ for (size_t i = 0; i < filenames.size(); i++) {
129
+ if (ParseFileName(filenames[i], &number, &type)) {
130
+ if (type == kDescriptorFile) {
131
+ manifests_.push_back(filenames[i]);
132
+ } else {
133
+ if (number + 1 > next_file_number_) {
134
+ next_file_number_ = number + 1;
135
+ }
136
+ if (type == kLogFile) {
137
+ logs_.push_back(number);
138
+ } else if (type == kTableFile) {
139
+ table_numbers_.push_back(number);
140
+ } else {
141
+ // Ignore other files
142
+ }
143
+ }
144
+ }
145
+ }
146
+ return status;
147
+ }
148
+
149
+ void ConvertLogFilesToTables() {
150
+ for (size_t i = 0; i < logs_.size(); i++) {
151
+ std::string logname = LogFileName(dbname_, logs_[i]);
152
+ Status status = ConvertLogToTable(logs_[i]);
153
+ if (!status.ok()) {
154
+ Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
155
+ (unsigned long long) logs_[i],
156
+ status.ToString().c_str());
157
+ }
158
+ ArchiveFile(logname);
159
+ }
160
+ }
161
+
162
+ Status ConvertLogToTable(uint64_t log) {
163
+ struct LogReporter : public log::Reader::Reporter {
164
+ Env* env;
165
+ Logger* info_log;
166
+ uint64_t lognum;
167
+ virtual void Corruption(size_t bytes, const Status& s) {
168
+ // We print error messages for corruption, but continue repairing.
169
+ Log(info_log, "Log #%llu: dropping %d bytes; %s",
170
+ (unsigned long long) lognum,
171
+ static_cast<int>(bytes),
172
+ s.ToString().c_str());
173
+ }
174
+ };
175
+
176
+ // Open the log file
177
+ std::string logname = LogFileName(dbname_, log);
178
+ SequentialFile* lfile;
179
+ Status status = env_->NewSequentialFile(logname, &lfile);
180
+ if (!status.ok()) {
181
+ return status;
182
+ }
183
+
184
+ // Create the log reader.
185
+ LogReporter reporter;
186
+ reporter.env = env_;
187
+ reporter.info_log = options_.info_log;
188
+ reporter.lognum = log;
189
+ // We intentially make log::Reader do checksumming so that
190
+ // corruptions cause entire commits to be skipped instead of
191
+ // propagating bad information (like overly large sequence
192
+ // numbers).
193
+ log::Reader reader(lfile, &reporter, false/*do not checksum*/,
194
+ 0/*initial_offset*/);
195
+
196
+ // Read all the records and add to a memtable
197
+ std::string scratch;
198
+ Slice record;
199
+ WriteBatch batch;
200
+ MemTable* mem = new MemTable(icmp_);
201
+ mem->Ref();
202
+ int counter = 0;
203
+ while (reader.ReadRecord(&record, &scratch)) {
204
+ if (record.size() < 12) {
205
+ reporter.Corruption(
206
+ record.size(), Status::Corruption("log record too small"));
207
+ continue;
208
+ }
209
+ WriteBatchInternal::SetContents(&batch, record);
210
+ status = WriteBatchInternal::InsertInto(&batch, mem);
211
+ if (status.ok()) {
212
+ counter += WriteBatchInternal::Count(&batch);
213
+ } else {
214
+ Log(options_.info_log, "Log #%llu: ignoring %s",
215
+ (unsigned long long) log,
216
+ status.ToString().c_str());
217
+ status = Status::OK(); // Keep going with rest of file
218
+ }
219
+ }
220
+ delete lfile;
221
+
222
+ // Do not record a version edit for this conversion to a Table
223
+ // since ExtractMetaData() will also generate edits.
224
+ FileMetaData meta;
225
+ meta.number = next_file_number_++;
226
+ Iterator* iter = mem->NewIterator();
227
+ status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
228
+ delete iter;
229
+ mem->Unref();
230
+ mem = NULL;
231
+ if (status.ok()) {
232
+ if (meta.file_size > 0) {
233
+ table_numbers_.push_back(meta.number);
234
+ }
235
+ }
236
+ Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
237
+ (unsigned long long) log,
238
+ counter,
239
+ (unsigned long long) meta.number,
240
+ status.ToString().c_str());
241
+ return status;
242
+ }
243
+
244
+ void ExtractMetaData() {
245
+ std::vector<TableInfo> kept;
246
+ for (size_t i = 0; i < table_numbers_.size(); i++) {
247
+ TableInfo t;
248
+ t.meta.number = table_numbers_[i];
249
+ Status status = ScanTable(&t);
250
+ if (!status.ok()) {
251
+ std::string fname = TableFileName(dbname_, table_numbers_[i]);
252
+ Log(options_.info_log, "Table #%llu: ignoring %s",
253
+ (unsigned long long) table_numbers_[i],
254
+ status.ToString().c_str());
255
+ ArchiveFile(fname);
256
+ } else {
257
+ tables_.push_back(t);
258
+ }
259
+ }
260
+ }
261
+
262
+ Status ScanTable(TableInfo* t) {
263
+ std::string fname = TableFileName(dbname_, t->meta.number);
264
+ int counter = 0;
265
+ Status status = env_->GetFileSize(fname, &t->meta.file_size);
266
+ if (status.ok()) {
267
+ Iterator* iter = table_cache_->NewIterator(
268
+ ReadOptions(), t->meta.number, t->meta.file_size);
269
+ bool empty = true;
270
+ ParsedInternalKey parsed;
271
+ t->max_sequence = 0;
272
+ for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
273
+ Slice key = iter->key();
274
+ if (!ParseInternalKey(key, &parsed)) {
275
+ Log(options_.info_log, "Table #%llu: unparsable key %s",
276
+ (unsigned long long) t->meta.number,
277
+ EscapeString(key).c_str());
278
+ continue;
279
+ }
280
+
281
+ counter++;
282
+ if (empty) {
283
+ empty = false;
284
+ t->meta.smallest.DecodeFrom(key);
285
+ }
286
+ t->meta.largest.DecodeFrom(key);
287
+ if (parsed.sequence > t->max_sequence) {
288
+ t->max_sequence = parsed.sequence;
289
+ }
290
+ }
291
+ if (!iter->status().ok()) {
292
+ status = iter->status();
293
+ }
294
+ delete iter;
295
+ }
296
+ Log(options_.info_log, "Table #%llu: %d entries %s",
297
+ (unsigned long long) t->meta.number,
298
+ counter,
299
+ status.ToString().c_str());
300
+ return status;
301
+ }
302
+
303
+ Status WriteDescriptor() {
304
+ std::string tmp = TempFileName(dbname_, 1);
305
+ WritableFile* file;
306
+ Status status = env_->NewWritableFile(tmp, &file);
307
+ if (!status.ok()) {
308
+ return status;
309
+ }
310
+
311
+ SequenceNumber max_sequence = 0;
312
+ for (size_t i = 0; i < tables_.size(); i++) {
313
+ if (max_sequence < tables_[i].max_sequence) {
314
+ max_sequence = tables_[i].max_sequence;
315
+ }
316
+ }
317
+
318
+ edit_.SetComparatorName(icmp_.user_comparator()->Name());
319
+ edit_.SetLogNumber(0);
320
+ edit_.SetNextFile(next_file_number_);
321
+ edit_.SetLastSequence(max_sequence);
322
+
323
+ for (size_t i = 0; i < tables_.size(); i++) {
324
+ // TODO(opt): separate out into multiple levels
325
+ const TableInfo& t = tables_[i];
326
+ edit_.AddFile(0, t.meta.number, t.meta.file_size,
327
+ t.meta.smallest, t.meta.largest);
328
+ }
329
+
330
+ //fprintf(stderr, "NewDescriptor:\n%s\n", edit_.DebugString().c_str());
331
+ {
332
+ log::Writer log(file);
333
+ std::string record;
334
+ edit_.EncodeTo(&record);
335
+ status = log.AddRecord(record);
336
+ }
337
+ if (status.ok()) {
338
+ status = file->Close();
339
+ }
340
+ delete file;
341
+ file = NULL;
342
+
343
+ if (!status.ok()) {
344
+ env_->DeleteFile(tmp);
345
+ } else {
346
+ // Discard older manifests
347
+ for (size_t i = 0; i < manifests_.size(); i++) {
348
+ ArchiveFile(dbname_ + "/" + manifests_[i]);
349
+ }
350
+
351
+ // Install new manifest
352
+ status = env_->RenameFile(tmp, DescriptorFileName(dbname_, 1));
353
+ if (status.ok()) {
354
+ status = SetCurrentFile(env_, dbname_, 1);
355
+ } else {
356
+ env_->DeleteFile(tmp);
357
+ }
358
+ }
359
+ return status;
360
+ }
361
+
362
+ void ArchiveFile(const std::string& fname) {
363
+ // Move into another directory. E.g., for
364
+ // dir/foo
365
+ // rename to
366
+ // dir/lost/foo
367
+ const char* slash = strrchr(fname.c_str(), '/');
368
+ std::string new_dir;
369
+ if (slash != NULL) {
370
+ new_dir.assign(fname.data(), slash - fname.data());
371
+ }
372
+ new_dir.append("/lost");
373
+ env_->CreateDir(new_dir); // Ignore error
374
+ std::string new_file = new_dir;
375
+ new_file.append("/");
376
+ new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
377
+ Status s = env_->RenameFile(fname, new_file);
378
+ Log(options_.info_log, "Archiving %s: %s\n",
379
+ fname.c_str(), s.ToString().c_str());
380
+ }
381
+ };
382
+ } // namespace
383
+
384
+ Status RepairDB(const std::string& dbname, const Options& options) {
385
+ Repairer repairer(dbname, options);
386
+ return repairer.Run();
387
+ }
388
+
389
+ } // namespace leveldb