leveldb-ruby 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/README +1 -1
  2. data/leveldb/Makefile +70 -29
  3. data/leveldb/build_detect_platform +74 -0
  4. data/leveldb/db/builder.cc +2 -4
  5. data/leveldb/db/builder.h +4 -6
  6. data/leveldb/db/c.cc +471 -0
  7. data/leveldb/db/corruption_test.cc +21 -16
  8. data/leveldb/db/db_bench.cc +400 -200
  9. data/leveldb/db/db_impl.cc +276 -131
  10. data/leveldb/db/db_impl.h +22 -10
  11. data/leveldb/db/db_iter.cc +2 -1
  12. data/leveldb/db/db_test.cc +391 -43
  13. data/leveldb/db/dbformat.cc +31 -0
  14. data/leveldb/db/dbformat.h +51 -1
  15. data/leveldb/db/filename.h +1 -1
  16. data/leveldb/db/log_format.h +1 -1
  17. data/leveldb/db/log_reader.cc +16 -11
  18. data/leveldb/db/memtable.cc +37 -0
  19. data/leveldb/db/memtable.h +6 -0
  20. data/leveldb/db/repair.cc +17 -14
  21. data/leveldb/db/skiplist_test.cc +2 -2
  22. data/leveldb/db/version_edit.cc +7 -9
  23. data/leveldb/db/version_edit.h +2 -1
  24. data/leveldb/db/version_set.cc +416 -104
  25. data/leveldb/db/version_set.h +78 -14
  26. data/leveldb/db/version_set_test.cc +179 -0
  27. data/leveldb/db/write_batch_internal.h +2 -0
  28. data/leveldb/include/leveldb/c.h +246 -0
  29. data/leveldb/include/leveldb/db.h +14 -2
  30. data/leveldb/include/leveldb/env.h +31 -10
  31. data/leveldb/include/leveldb/options.h +7 -18
  32. data/leveldb/include/leveldb/slice.h +2 -2
  33. data/leveldb/include/leveldb/status.h +1 -1
  34. data/leveldb/port/atomic_pointer.h +144 -0
  35. data/leveldb/port/port.h +0 -2
  36. data/leveldb/port/port_android.h +7 -1
  37. data/leveldb/port/port_example.h +11 -1
  38. data/leveldb/port/port_posix.h +56 -38
  39. data/leveldb/table/format.cc +12 -8
  40. data/leveldb/table/table_test.cc +16 -7
  41. data/leveldb/util/cache.cc +173 -100
  42. data/leveldb/util/cache_test.cc +28 -11
  43. data/leveldb/util/coding.h +4 -4
  44. data/leveldb/util/comparator.cc +1 -0
  45. data/leveldb/util/env.cc +10 -5
  46. data/leveldb/util/env_posix.cc +48 -87
  47. data/leveldb/util/histogram.cc +11 -0
  48. data/leveldb/util/histogram.h +1 -0
  49. data/leveldb/util/posix_logger.h +98 -0
  50. data/leveldb/util/testharness.cc +12 -0
  51. data/leveldb/util/testharness.h +10 -1
  52. data/lib/leveldb.rb +11 -3
  53. metadata +41 -22
@@ -31,6 +31,18 @@ std::string ParsedInternalKey::DebugString() const {
31
31
  return result;
32
32
  }
33
33
 
34
+ std::string InternalKey::DebugString() const {
35
+ std::string result;
36
+ ParsedInternalKey parsed;
37
+ if (ParseInternalKey(rep_, &parsed)) {
38
+ result = parsed.DebugString();
39
+ } else {
40
+ result = "(bad)";
41
+ result.append(EscapeString(rep_));
42
+ }
43
+ return result;
44
+ }
45
+
34
46
  const char* InternalKeyComparator::Name() const {
35
47
  return "leveldb.InternalKeyComparator";
36
48
  }
@@ -84,4 +96,23 @@ void InternalKeyComparator::FindShortSuccessor(std::string* key) const {
84
96
  }
85
97
  }
86
98
 
99
+ LookupKey::LookupKey(const Slice& user_key, SequenceNumber s) {
100
+ size_t usize = user_key.size();
101
+ size_t needed = usize + 13; // A conservative estimate
102
+ char* dst;
103
+ if (needed <= sizeof(space_)) {
104
+ dst = space_;
105
+ } else {
106
+ dst = new char[needed];
107
+ }
108
+ start_ = dst;
109
+ dst = EncodeVarint32(dst, usize + 8);
110
+ kstart_ = dst;
111
+ memcpy(dst, user_key.data(), usize);
112
+ dst += usize;
113
+ EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
114
+ dst += 8;
115
+ end_ = dst;
116
+ }
117
+
87
118
  }
@@ -29,6 +29,14 @@ static const int kL0_SlowdownWritesTrigger = 8;
29
29
  // Maximum number of level-0 files. We stop writes at this point.
30
30
  static const int kL0_StopWritesTrigger = 12;
31
31
 
32
+ // Maximum level to which a new compacted memtable is pushed if it
33
+ // does not create overlap. We try to push to level 2 to avoid the
34
+ // relatively expensive level 0=>1 compactions and to avoid some
35
+ // expensive manifest file operations. We do not push all the way to
36
+ // the largest level since that can generate a lot of wasted disk
37
+ // space if the same key space is being repeatedly overwritten.
38
+ static const int kMaxMemCompactLevel = 2;
39
+
32
40
  }
33
41
 
34
42
  class InternalKey;
@@ -38,7 +46,7 @@ class InternalKey;
38
46
  // data structures.
39
47
  enum ValueType {
40
48
  kTypeDeletion = 0x0,
41
- kTypeValue = 0x1,
49
+ kTypeValue = 0x1
42
50
  };
43
51
  // kValueTypeForSeek defines the ValueType that should be passed when
44
52
  // constructing a ParsedInternalKey object for seeking to a particular
@@ -141,6 +149,8 @@ class InternalKey {
141
149
  }
142
150
 
143
151
  void Clear() { rep_.clear(); }
152
+
153
+ std::string DebugString() const;
144
154
  };
145
155
 
146
156
  inline int InternalKeyComparator::Compare(
@@ -160,6 +170,46 @@ inline bool ParseInternalKey(const Slice& internal_key,
160
170
  return (c <= static_cast<unsigned char>(kTypeValue));
161
171
  }
162
172
 
173
+ // A helper class useful for DBImpl::Get()
174
+ class LookupKey {
175
+ public:
176
+ // Initialize *this for looking up user_key at a snapshot with
177
+ // the specified sequence number.
178
+ LookupKey(const Slice& user_key, SequenceNumber sequence);
179
+
180
+ ~LookupKey();
181
+
182
+ // Return a key suitable for lookup in a MemTable.
183
+ Slice memtable_key() const { return Slice(start_, end_ - start_); }
184
+
185
+ // Return an internal key (suitable for passing to an internal iterator)
186
+ Slice internal_key() const { return Slice(kstart_, end_ - kstart_); }
187
+
188
+ // Return the user key
189
+ Slice user_key() const { return Slice(kstart_, end_ - kstart_ - 8); }
190
+
191
+ private:
192
+ // We construct a char array of the form:
193
+ // klength varint32 <-- start_
194
+ // userkey char[klength] <-- kstart_
195
+ // tag uint64
196
+ // <-- end_
197
+ // The array is a suitable MemTable key.
198
+ // The suffix starting with "userkey" can be used as an InternalKey.
199
+ const char* start_;
200
+ const char* kstart_;
201
+ const char* end_;
202
+ char space_[200]; // Avoid allocation for short keys
203
+
204
+ // No copying allowed
205
+ LookupKey(const LookupKey&);
206
+ void operator=(const LookupKey&);
207
+ };
208
+
209
+ inline LookupKey::~LookupKey() {
210
+ if (start_ != space_) delete[] start_;
211
+ }
212
+
163
213
  }
164
214
 
165
215
  #endif // STORAGE_LEVELDB_DB_FORMAT_H_
@@ -24,7 +24,7 @@ enum FileType {
24
24
  kDescriptorFile,
25
25
  kCurrentFile,
26
26
  kTempFile,
27
- kInfoLogFile, // Either the current one, or an old one
27
+ kInfoLogFile // Either the current one, or an old one
28
28
  };
29
29
 
30
30
  // Return the name of the log file with the specified number
@@ -20,7 +20,7 @@ enum RecordType {
20
20
  // For fragments
21
21
  kFirstType = 2,
22
22
  kMiddleType = 3,
23
- kLastType = 4,
23
+ kLastType = 4
24
24
  };
25
25
  static const int kMaxRecordType = kLastType;
26
26
 
@@ -4,6 +4,7 @@
4
4
 
5
5
  #include "db/log_reader.h"
6
6
 
7
+ #include <stdio.h>
7
8
  #include "leveldb/env.h"
8
9
  #include "util/coding.h"
9
10
  #include "util/crc32c.h"
@@ -72,7 +73,8 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
72
73
  Slice fragment;
73
74
  while (true) {
74
75
  uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
75
- switch (ReadPhysicalRecord(&fragment)) {
76
+ const unsigned int record_type = ReadPhysicalRecord(&fragment);
77
+ switch (record_type) {
76
78
  case kFullType:
77
79
  if (in_fragmented_record) {
78
80
  // Handle bug in earlier versions of log::Writer where
@@ -144,13 +146,16 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
144
146
  }
145
147
  break;
146
148
 
147
- default:
149
+ default: {
150
+ char buf[40];
151
+ snprintf(buf, sizeof(buf), "unknown record type %u", record_type);
148
152
  ReportCorruption(
149
153
  (fragment.size() + (in_fragmented_record ? scratch->size() : 0)),
150
- "unknown record type");
154
+ buf);
151
155
  in_fragmented_record = false;
152
156
  scratch->clear();
153
157
  break;
158
+ }
154
159
  }
155
160
  }
156
161
  return false;
@@ -212,16 +217,16 @@ unsigned int Reader::ReadPhysicalRecord(Slice* result) {
212
217
  return kBadRecord;
213
218
  }
214
219
 
220
+ if (type == kZeroType && length == 0) {
221
+ // Skip zero length record without reporting any drops since
222
+ // such records are produced by the mmap based writing code in
223
+ // env_posix.cc that preallocates file regions.
224
+ buffer_.clear();
225
+ return kBadRecord;
226
+ }
227
+
215
228
  // Check crc
216
229
  if (checksum_) {
217
- if (type == kZeroType && length == 0) {
218
- // Skip zero length record without reporting any drops since
219
- // such records are produced by the mmap based writing code in
220
- // env_posix.cc that preallocates file regions.
221
- buffer_.clear();
222
- return kBadRecord;
223
- }
224
-
225
230
  uint32_t expected_crc = crc32c::Unmask(DecodeFixed32(header));
226
231
  uint32_t actual_crc = crc32c::Value(header + 6, 1 + length);
227
232
  if (actual_crc != expected_crc) {
@@ -105,4 +105,41 @@ void MemTable::Add(SequenceNumber s, ValueType type,
105
105
  table_.Insert(buf);
106
106
  }
107
107
 
108
+ bool MemTable::Get(const LookupKey& key, std::string* value, Status* s) {
109
+ Slice memkey = key.memtable_key();
110
+ Table::Iterator iter(&table_);
111
+ iter.Seek(memkey.data());
112
+ if (iter.Valid()) {
113
+ // entry format is:
114
+ // klength varint32
115
+ // userkey char[klength]
116
+ // tag uint64
117
+ // vlength varint32
118
+ // value char[vlength]
119
+ // Check that it belongs to same user key. We do not check the
120
+ // sequence number since the Seek() call above should have skipped
121
+ // all entries with overly large sequence numbers.
122
+ const char* entry = iter.key();
123
+ uint32_t key_length;
124
+ const char* key_ptr = GetVarint32Ptr(entry, entry+5, &key_length);
125
+ if (comparator_.comparator.user_comparator()->Compare(
126
+ Slice(key_ptr, key_length - 8),
127
+ key.user_key()) == 0) {
128
+ // Correct user key
129
+ const uint64_t tag = DecodeFixed64(key_ptr + key_length - 8);
130
+ switch (static_cast<ValueType>(tag & 0xff)) {
131
+ case kTypeValue: {
132
+ Slice v = GetLengthPrefixedSlice(key_ptr + key_length);
133
+ value->assign(v.data(), v.size());
134
+ return true;
135
+ }
136
+ case kTypeDeletion:
137
+ *s = Status::NotFound(Slice());
138
+ return true;
139
+ }
140
+ }
141
+ }
142
+ return false;
143
+ }
144
+
108
145
  }
@@ -57,6 +57,12 @@ class MemTable {
57
57
  const Slice& key,
58
58
  const Slice& value);
59
59
 
60
+ // If memtable contains a value for key, store it in *value and return true.
61
+ // If memtable contains a deletion for key, store a NotFound() error
62
+ // in *status and return true.
63
+ // Else, return false.
64
+ bool Get(const LookupKey& key, std::string* value, Status* s);
65
+
60
66
  private:
61
67
  ~MemTable(); // Private since only Unref() should be used to delete it
62
68
 
data/leveldb/db/repair.cc CHANGED
@@ -50,6 +50,7 @@ class Repairer {
50
50
  icmp_(options.comparator),
51
51
  options_(SanitizeOptions(dbname, &icmp_, options)),
52
52
  owns_info_log_(options_.info_log != options.info_log),
53
+ owns_cache_(options_.block_cache != options.block_cache),
53
54
  next_file_number_(1) {
54
55
  // TableCache can be small since we expect each table to be opened once.
55
56
  table_cache_ = new TableCache(dbname_, &options_, 10);
@@ -60,6 +61,9 @@ class Repairer {
60
61
  if (owns_info_log_) {
61
62
  delete options_.info_log;
62
63
  }
64
+ if (owns_cache_) {
65
+ delete options_.block_cache;
66
+ }
63
67
  }
64
68
 
65
69
  Status Run() {
@@ -74,7 +78,7 @@ class Repairer {
74
78
  for (size_t i = 0; i < tables_.size(); i++) {
75
79
  bytes += tables_[i].meta.file_size;
76
80
  }
77
- Log(env_, options_.info_log,
81
+ Log(options_.info_log,
78
82
  "**** Repaired leveldb %s; "
79
83
  "recovered %d files; %llu bytes. "
80
84
  "Some data may have been lost. "
@@ -97,6 +101,7 @@ class Repairer {
97
101
  InternalKeyComparator const icmp_;
98
102
  Options const options_;
99
103
  bool owns_info_log_;
104
+ bool owns_cache_;
100
105
  TableCache* table_cache_;
101
106
  VersionEdit edit_;
102
107
 
@@ -144,7 +149,7 @@ class Repairer {
144
149
  std::string logname = LogFileName(dbname_, logs_[i]);
145
150
  Status status = ConvertLogToTable(logs_[i]);
146
151
  if (!status.ok()) {
147
- Log(env_, options_.info_log, "Log #%llu: ignoring conversion error: %s",
152
+ Log(options_.info_log, "Log #%llu: ignoring conversion error: %s",
148
153
  (unsigned long long) logs_[i],
149
154
  status.ToString().c_str());
150
155
  }
@@ -155,11 +160,11 @@ class Repairer {
155
160
  Status ConvertLogToTable(uint64_t log) {
156
161
  struct LogReporter : public log::Reader::Reporter {
157
162
  Env* env;
158
- WritableFile* info_log;
163
+ Logger* info_log;
159
164
  uint64_t lognum;
160
165
  virtual void Corruption(size_t bytes, const Status& s) {
161
166
  // We print error messages for corruption, but continue repairing.
162
- Log(env, info_log, "Log #%llu: dropping %d bytes; %s",
167
+ Log(info_log, "Log #%llu: dropping %d bytes; %s",
163
168
  (unsigned long long) lognum,
164
169
  static_cast<int>(bytes),
165
170
  s.ToString().c_str());
@@ -204,7 +209,7 @@ class Repairer {
204
209
  if (status.ok()) {
205
210
  counter += WriteBatchInternal::Count(&batch);
206
211
  } else {
207
- Log(env_, options_.info_log, "Log #%llu: ignoring %s",
212
+ Log(options_.info_log, "Log #%llu: ignoring %s",
208
213
  (unsigned long long) log,
209
214
  status.ToString().c_str());
210
215
  status = Status::OK(); // Keep going with rest of file
@@ -212,14 +217,12 @@ class Repairer {
212
217
  }
213
218
  delete lfile;
214
219
 
215
- // We ignore any version edits generated by the conversion to a Table
220
+ // Do not record a version edit for this conversion to a Table
216
221
  // since ExtractMetaData() will also generate edits.
217
- VersionEdit skipped;
218
222
  FileMetaData meta;
219
223
  meta.number = next_file_number_++;
220
224
  Iterator* iter = mem->NewIterator();
221
- status = BuildTable(dbname_, env_, options_, table_cache_, iter,
222
- &meta, &skipped);
225
+ status = BuildTable(dbname_, env_, options_, table_cache_, iter, &meta);
223
226
  delete iter;
224
227
  mem->Unref();
225
228
  mem = NULL;
@@ -228,7 +231,7 @@ class Repairer {
228
231
  table_numbers_.push_back(meta.number);
229
232
  }
230
233
  }
231
- Log(env_, options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
234
+ Log(options_.info_log, "Log #%llu: %d ops saved to Table #%llu %s",
232
235
  (unsigned long long) log,
233
236
  counter,
234
237
  (unsigned long long) meta.number,
@@ -244,7 +247,7 @@ class Repairer {
244
247
  Status status = ScanTable(&t);
245
248
  if (!status.ok()) {
246
249
  std::string fname = TableFileName(dbname_, table_numbers_[i]);
247
- Log(env_, options_.info_log, "Table #%llu: ignoring %s",
250
+ Log(options_.info_log, "Table #%llu: ignoring %s",
248
251
  (unsigned long long) table_numbers_[i],
249
252
  status.ToString().c_str());
250
253
  ArchiveFile(fname);
@@ -267,7 +270,7 @@ class Repairer {
267
270
  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
268
271
  Slice key = iter->key();
269
272
  if (!ParseInternalKey(key, &parsed)) {
270
- Log(env_, options_.info_log, "Table #%llu: unparsable key %s",
273
+ Log(options_.info_log, "Table #%llu: unparsable key %s",
271
274
  (unsigned long long) t->meta.number,
272
275
  EscapeString(key).c_str());
273
276
  continue;
@@ -288,7 +291,7 @@ class Repairer {
288
291
  }
289
292
  delete iter;
290
293
  }
291
- Log(env_, options_.info_log, "Table #%llu: %d entries %s",
294
+ Log(options_.info_log, "Table #%llu: %d entries %s",
292
295
  (unsigned long long) t->meta.number,
293
296
  counter,
294
297
  status.ToString().c_str());
@@ -370,7 +373,7 @@ class Repairer {
370
373
  new_file.append("/");
371
374
  new_file.append((slash == NULL) ? fname.c_str() : slash + 1);
372
375
  Status s = env_->RenameFile(fname, new_file);
373
- Log(env_, options_.info_log, "Archiving %s: %s\n",
376
+ Log(options_.info_log, "Archiving %s: %s\n",
374
377
  fname.c_str(), s.ToString().c_str());
375
378
  }
376
379
  };
@@ -238,14 +238,14 @@ class ConcurrentTest {
238
238
  current = MakeKey(K, 0);
239
239
  } else {
240
240
  current = iter.key();
241
- ASSERT_TRUE(IsValidKey(current)) << std::hex << current;
241
+ ASSERT_TRUE(IsValidKey(current)) << current;
242
242
  }
243
243
  ASSERT_LE(pos, current) << "should not go backwards";
244
244
 
245
245
  // Verify that everything in [pos,current) was not present in
246
246
  // initial_state.
247
247
  while (pos < current) {
248
- ASSERT_LT(key(pos), K) << std::hex << pos;
248
+ ASSERT_LT(key(pos), K) << pos;
249
249
 
250
250
  // Note that generation 0 is never inserted, so it is ok if
251
251
  // <*,0,*> is missing.
@@ -20,7 +20,7 @@ enum Tag {
20
20
  kDeletedFile = 6,
21
21
  kNewFile = 7,
22
22
  // 8 was used for large value refs
23
- kPrevLogNumber = 9,
23
+ kPrevLogNumber = 9
24
24
  };
25
25
 
26
26
  void VersionEdit::Clear() {
@@ -235,9 +235,8 @@ std::string VersionEdit::DebugString() const {
235
235
  for (size_t i = 0; i < compact_pointers_.size(); i++) {
236
236
  r.append("\n CompactPointer: ");
237
237
  AppendNumberTo(&r, compact_pointers_[i].first);
238
- r.append(" '");
239
- AppendEscapedStringTo(&r, compact_pointers_[i].second.Encode());
240
- r.append("'");
238
+ r.append(" ");
239
+ r.append(compact_pointers_[i].second.DebugString());
241
240
  }
242
241
  for (DeletedFileSet::const_iterator iter = deleted_files_.begin();
243
242
  iter != deleted_files_.end();
@@ -255,11 +254,10 @@ std::string VersionEdit::DebugString() const {
255
254
  AppendNumberTo(&r, f.number);
256
255
  r.append(" ");
257
256
  AppendNumberTo(&r, f.file_size);
258
- r.append(" '");
259
- AppendEscapedStringTo(&r, f.smallest.Encode());
260
- r.append("' .. '");
261
- AppendEscapedStringTo(&r, f.largest.Encode());
262
- r.append("'");
257
+ r.append(" ");
258
+ r.append(f.smallest.DebugString());
259
+ r.append(" .. ");
260
+ r.append(f.largest.DebugString());
263
261
  }
264
262
  r.append("\n}\n");
265
263
  return r;
@@ -16,12 +16,13 @@ class VersionSet;
16
16
 
17
17
  struct FileMetaData {
18
18
  int refs;
19
+ int allowed_seeks; // Seeks allowed until compaction
19
20
  uint64_t number;
20
21
  uint64_t file_size; // File size in bytes
21
22
  InternalKey smallest; // Smallest internal key served by table
22
23
  InternalKey largest; // Largest internal key served by table
23
24
 
24
- FileMetaData() : refs(0), file_size(0) { }
25
+ FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
25
26
  };
26
27
 
27
28
  class VersionEdit {