leveldb-ruby 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/README +1 -1
  2. data/leveldb/Makefile +70 -29
  3. data/leveldb/build_detect_platform +74 -0
  4. data/leveldb/db/builder.cc +2 -4
  5. data/leveldb/db/builder.h +4 -6
  6. data/leveldb/db/c.cc +471 -0
  7. data/leveldb/db/corruption_test.cc +21 -16
  8. data/leveldb/db/db_bench.cc +400 -200
  9. data/leveldb/db/db_impl.cc +276 -131
  10. data/leveldb/db/db_impl.h +22 -10
  11. data/leveldb/db/db_iter.cc +2 -1
  12. data/leveldb/db/db_test.cc +391 -43
  13. data/leveldb/db/dbformat.cc +31 -0
  14. data/leveldb/db/dbformat.h +51 -1
  15. data/leveldb/db/filename.h +1 -1
  16. data/leveldb/db/log_format.h +1 -1
  17. data/leveldb/db/log_reader.cc +16 -11
  18. data/leveldb/db/memtable.cc +37 -0
  19. data/leveldb/db/memtable.h +6 -0
  20. data/leveldb/db/repair.cc +17 -14
  21. data/leveldb/db/skiplist_test.cc +2 -2
  22. data/leveldb/db/version_edit.cc +7 -9
  23. data/leveldb/db/version_edit.h +2 -1
  24. data/leveldb/db/version_set.cc +416 -104
  25. data/leveldb/db/version_set.h +78 -14
  26. data/leveldb/db/version_set_test.cc +179 -0
  27. data/leveldb/db/write_batch_internal.h +2 -0
  28. data/leveldb/include/leveldb/c.h +246 -0
  29. data/leveldb/include/leveldb/db.h +14 -2
  30. data/leveldb/include/leveldb/env.h +31 -10
  31. data/leveldb/include/leveldb/options.h +7 -18
  32. data/leveldb/include/leveldb/slice.h +2 -2
  33. data/leveldb/include/leveldb/status.h +1 -1
  34. data/leveldb/port/atomic_pointer.h +144 -0
  35. data/leveldb/port/port.h +0 -2
  36. data/leveldb/port/port_android.h +7 -1
  37. data/leveldb/port/port_example.h +11 -1
  38. data/leveldb/port/port_posix.h +56 -38
  39. data/leveldb/table/format.cc +12 -8
  40. data/leveldb/table/table_test.cc +16 -7
  41. data/leveldb/util/cache.cc +173 -100
  42. data/leveldb/util/cache_test.cc +28 -11
  43. data/leveldb/util/coding.h +4 -4
  44. data/leveldb/util/comparator.cc +1 -0
  45. data/leveldb/util/env.cc +10 -5
  46. data/leveldb/util/env_posix.cc +48 -87
  47. data/leveldb/util/histogram.cc +11 -0
  48. data/leveldb/util/histogram.h +1 -0
  49. data/leveldb/util/posix_logger.h +98 -0
  50. data/leveldb/util/testharness.cc +12 -0
  51. data/leveldb/util/testharness.h +10 -1
  52. data/lib/leveldb.rb +11 -3
  53. metadata +41 -22
@@ -107,16 +107,20 @@ Status ReadBlock(RandomAccessFile* file,
107
107
  // Ok
108
108
  break;
109
109
  case kSnappyCompression: {
110
- std::string decompressed;
111
- if (!port::Snappy_Uncompress(data, n, &decompressed)) {
110
+ size_t ulength = 0;
111
+ if (!port::Snappy_GetUncompressedLength(data, n, &ulength)) {
112
112
  delete[] buf;
113
- s = Status::Corruption("corrupted compressed block contents");
114
- return s;
113
+ return Status::Corruption("corrupted compressed block contents");
115
114
  }
116
- delete[] buf; // Done with uncompressed data
117
- buf = new char[decompressed.size()];
118
- memcpy(buf, decompressed.data(), decompressed.size());
119
- n = decompressed.size();
115
+ char* ubuf = new char[ulength];
116
+ if (!port::Snappy_Uncompress(data, n, ubuf)) {
117
+ delete[] buf;
118
+ delete[] ubuf;
119
+ return Status::Corruption("corrupted compressed block contents");
120
+ }
121
+ delete[] buf;
122
+ buf = ubuf;
123
+ n = ulength;
120
124
  break;
121
125
  }
122
126
  default:
@@ -5,6 +5,7 @@
5
5
  #include "leveldb/table.h"
6
6
 
7
7
  #include <map>
8
+ #include <string>
8
9
  #include "db/dbformat.h"
9
10
  #include "db/memtable.h"
10
11
  #include "db/write_batch_internal.h"
@@ -25,7 +26,11 @@ namespace leveldb {
25
26
  // Used to test non-lexicographic comparators.
26
27
  static std::string Reverse(const Slice& key) {
27
28
  std::string str(key.ToString());
28
- std::string rev(str.rbegin(), str.rend());
29
+ std::string rev("");
30
+ for (std::string::reverse_iterator rit = str.rbegin();
31
+ rit != str.rend(); ++rit) {
32
+ rev.push_back(*rit);
33
+ }
29
34
  return rev;
30
35
  }
31
36
 
@@ -411,7 +416,7 @@ enum TestType {
411
416
  TABLE_TEST,
412
417
  BLOCK_TEST,
413
418
  MEMTABLE_TEST,
414
- DB_TEST,
419
+ DB_TEST
415
420
  };
416
421
 
417
422
  struct TestArgs {
@@ -727,11 +732,15 @@ TEST(Harness, RandomizedLongDB) {
727
732
  Test(&rnd);
728
733
 
729
734
  // We must have created enough data to force merging
730
- std::string l0_files, l1_files;
731
- ASSERT_TRUE(db()->GetProperty("leveldb.num-files-at-level0", &l0_files));
732
- ASSERT_TRUE(db()->GetProperty("leveldb.num-files-at-level1", &l1_files));
733
- ASSERT_GT(atoi(l0_files.c_str()) + atoi(l1_files.c_str()), 0);
734
-
735
+ int files = 0;
736
+ for (int level = 0; level < config::kNumLevels; level++) {
737
+ std::string value;
738
+ char name[100];
739
+ snprintf(name, sizeof(name), "leveldb.num-files-at-level%d", level);
740
+ ASSERT_TRUE(db()->GetProperty(name, &value));
741
+ files += atoi(value.c_str());
742
+ }
743
+ ASSERT_GT(files, 0);
735
744
  }
736
745
 
737
746
  class MemTableTest { };
@@ -2,17 +2,9 @@
2
2
  // Use of this source code is governed by a BSD-style license that can be
3
3
  // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
4
 
5
- #if defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_ANDROID)
6
- #include <unordered_set>
7
- #elif defined(LEVELDB_PLATFORM_OSX)
8
- #include <ext/hash_set>
9
- #elif defined(LEVELDB_PLATFORM_CHROMIUM)
10
- #include "base/hash_tables.h"
11
- #else
12
- #include <hash_set> // TODO(sanjay): Switch to unordered_set when possible.
13
- #endif
14
-
15
5
  #include <assert.h>
6
+ #include <stdio.h>
7
+ #include <stdlib.h>
16
8
 
17
9
  #include "leveldb/cache.h"
18
10
  #include "port/port.h"
@@ -33,11 +25,13 @@ namespace {
33
25
  struct LRUHandle {
34
26
  void* value;
35
27
  void (*deleter)(const Slice&, void* value);
28
+ LRUHandle* next_hash;
36
29
  LRUHandle* next;
37
30
  LRUHandle* prev;
38
31
  size_t charge; // TODO(opt): Only allow uint32_t?
39
32
  size_t key_length;
40
- size_t refs; // TODO(opt): Pack with "key_length"?
33
+ uint32_t refs;
34
+ uint32_t hash; // Hash of key(); used for fast sharding and comparisons
41
35
  char key_data[1]; // Beginning of key
42
36
 
43
37
  Slice key() const {
@@ -51,64 +45,117 @@ struct LRUHandle {
51
45
  }
52
46
  };
53
47
 
54
- // Pick a platform specific hash_set instantiation
55
- #if defined(LEVELDB_PLATFORM_CHROMIUM) && defined(OS_WIN)
56
- // Microsoft's hash_set deviates from the standard. See
57
- // http://msdn.microsoft.com/en-us/library/1t4xas78(v=vs.80).aspx
58
- // for details. Basically the 2 param () operator is a less than and
59
- // the 1 param () operator is a hash function.
60
- struct HandleHashCompare : public stdext::hash_compare<LRUHandle*> {
61
- size_t operator() (LRUHandle* h) const {
62
- Slice k = h->key();
63
- return Hash(k.data(), k.size(), 0);
48
+ // We provide our own simple hash table since it removes a whole bunch
49
+ // of porting hacks and is also faster than some of the built-in hash
50
+ // table implementations in some of the compiler/runtime combinations
51
+ // we have tested. E.g., readrandom speeds up by ~5% over the g++
52
+ // 4.4.3's builtin hashtable.
53
+ class HandleTable {
54
+ public:
55
+ HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); }
56
+ ~HandleTable() { delete[] list_; }
57
+
58
+ LRUHandle* Lookup(const Slice& key, uint32_t hash) {
59
+ return *FindPointer(key, hash);
60
+ }
61
+
62
+ LRUHandle* Insert(LRUHandle* h) {
63
+ LRUHandle** ptr = FindPointer(h->key(), h->hash);
64
+ LRUHandle* old = *ptr;
65
+ h->next_hash = (old == NULL ? NULL : old->next_hash);
66
+ *ptr = h;
67
+ if (old == NULL) {
68
+ ++elems_;
69
+ if (elems_ > length_) {
70
+ // Since each cache entry is fairly large, we aim for a small
71
+ // average linked list length (<= 1).
72
+ Resize();
73
+ }
64
74
  }
65
- bool operator() (LRUHandle* a, LRUHandle* b) const {
66
- return a->key().compare(b->key()) < 0;
75
+ return old;
76
+ }
77
+
78
+ LRUHandle* Remove(const Slice& key, uint32_t hash) {
79
+ LRUHandle** ptr = FindPointer(key, hash);
80
+ LRUHandle* result = *ptr;
81
+ if (result != NULL) {
82
+ *ptr = result->next_hash;
83
+ --elems_;
67
84
  }
68
- };
69
- typedef base::hash_set<LRUHandle*, HandleHashCompare> HandleTable;
70
- #else
71
- struct HandleHash {
72
- inline size_t operator()(LRUHandle* h) const {
73
- Slice k = h->key();
74
- return Hash(k.data(), k.size(), 0);
85
+ return result;
86
+ }
87
+
88
+ private:
89
+ // The table consists of an array of buckets where each bucket is
90
+ // a linked list of cache entries that hash into the bucket.
91
+ uint32_t length_;
92
+ uint32_t elems_;
93
+ LRUHandle** list_;
94
+
95
+ // Return a pointer to slot that points to a cache entry that
96
+ // matches key/hash. If there is no such cache entry, return a
97
+ // pointer to the trailing slot in the corresponding linked list.
98
+ LRUHandle** FindPointer(const Slice& key, uint32_t hash) {
99
+ LRUHandle** ptr = &list_[hash & (length_ - 1)];
100
+ while (*ptr != NULL &&
101
+ ((*ptr)->hash != hash || key != (*ptr)->key())) {
102
+ ptr = &(*ptr)->next_hash;
75
103
  }
76
- };
104
+ return ptr;
105
+ }
77
106
 
78
- struct HandleEq {
79
- inline bool operator()(LRUHandle* a, LRUHandle* b) const {
80
- return a->key() == b->key();
107
+ void Resize() {
108
+ uint32_t new_length = 4;
109
+ while (new_length < elems_) {
110
+ new_length *= 2;
81
111
  }
82
- };
83
- # if defined(LEVELDB_PLATFORM_CHROMIUM)
84
- typedef base::hash_set<LRUHandle*, HandleHash, HandleEq> HandleTable;
85
- # elif defined(LEVELDB_PLATFORM_POSIX) || defined(LEVELDB_PLATFORM_ANDROID)
86
- typedef std::unordered_set<LRUHandle*, HandleHash, HandleEq> HandleTable;
87
- # else
88
- typedef __gnu_cxx::hash_set<LRUHandle*, HandleHash, HandleEq> HandleTable;
89
- # endif
90
- #endif
91
-
92
- class LRUCache : public Cache {
112
+ LRUHandle** new_list = new LRUHandle*[new_length];
113
+ memset(new_list, 0, sizeof(new_list[0]) * new_length);
114
+ uint32_t count = 0;
115
+ for (uint32_t i = 0; i < length_; i++) {
116
+ LRUHandle* h = list_[i];
117
+ while (h != NULL) {
118
+ LRUHandle* next = h->next_hash;
119
+ Slice key = h->key();
120
+ uint32_t hash = h->hash;
121
+ LRUHandle** ptr = &new_list[hash & (new_length - 1)];
122
+ h->next_hash = *ptr;
123
+ *ptr = h;
124
+ h = next;
125
+ count++;
126
+ }
127
+ }
128
+ assert(elems_ == count);
129
+ delete[] list_;
130
+ list_ = new_list;
131
+ length_ = new_length;
132
+ }
133
+ };
134
+
135
+ // A single shard of sharded cache.
136
+ class LRUCache {
93
137
  public:
94
- explicit LRUCache(size_t capacity);
95
- virtual ~LRUCache();
138
+ LRUCache();
139
+ ~LRUCache();
96
140
 
97
- virtual Handle* Insert(const Slice& key, void* value, size_t charge,
98
- void (*deleter)(const Slice& key, void* value));
99
- virtual Handle* Lookup(const Slice& key);
100
- virtual void Release(Handle* handle);
101
- virtual void* Value(Handle* handle);
102
- virtual void Erase(const Slice& key);
103
- virtual uint64_t NewId();
141
+ // Separate from constructor so caller can easily make an array of LRUCache
142
+ void SetCapacity(size_t capacity) { capacity_ = capacity; }
143
+
144
+ // Like Cache methods, but with an extra "hash" parameter.
145
+ Cache::Handle* Insert(const Slice& key, uint32_t hash,
146
+ void* value, size_t charge,
147
+ void (*deleter)(const Slice& key, void* value));
148
+ Cache::Handle* Lookup(const Slice& key, uint32_t hash);
149
+ void Release(Cache::Handle* handle);
150
+ void Erase(const Slice& key, uint32_t hash);
104
151
 
105
152
  private:
106
153
  void LRU_Remove(LRUHandle* e);
107
154
  void LRU_Append(LRUHandle* e);
108
155
  void Unref(LRUHandle* e);
109
156
 
110
- // Constructor parameters
111
- const size_t capacity_;
157
+ // Initialized before use.
158
+ size_t capacity_;
112
159
 
113
160
  // mutex_ protects the following state.
114
161
  port::Mutex mutex_;
@@ -122,9 +169,8 @@ class LRUCache : public Cache {
122
169
  HandleTable table_;
123
170
  };
124
171
 
125
- LRUCache::LRUCache(size_t capacity)
126
- : capacity_(capacity),
127
- usage_(0),
172
+ LRUCache::LRUCache()
173
+ : usage_(0),
128
174
  last_id_(0) {
129
175
  // Make empty circular linked list
130
176
  lru_.next = &lru_;
@@ -132,7 +178,6 @@ LRUCache::LRUCache(size_t capacity)
132
178
  }
133
179
 
134
180
  LRUCache::~LRUCache() {
135
- table_.clear();
136
181
  for (LRUHandle* e = lru_.next; e != &lru_; ) {
137
182
  LRUHandle* next = e->next;
138
183
  assert(e->refs == 1); // Error if caller has an unreleased handle
@@ -164,35 +209,25 @@ void LRUCache::LRU_Append(LRUHandle* e) {
164
209
  e->next->prev = e;
165
210
  }
166
211
 
167
- Cache::Handle* LRUCache::Lookup(const Slice& key) {
212
+ Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
168
213
  MutexLock l(&mutex_);
169
-
170
- LRUHandle dummy;
171
- dummy.next = &dummy;
172
- dummy.value = const_cast<Slice*>(&key);
173
- HandleTable::iterator iter = table_.find(&dummy);
174
- if (iter == table_.end()) {
175
- return NULL;
176
- } else {
177
- LRUHandle* e = const_cast<LRUHandle*>(*iter);
214
+ LRUHandle* e = table_.Lookup(key, hash);
215
+ if (e != NULL) {
178
216
  e->refs++;
179
217
  LRU_Remove(e);
180
218
  LRU_Append(e);
181
- return reinterpret_cast<Handle*>(e);
182
219
  }
220
+ return reinterpret_cast<Cache::Handle*>(e);
183
221
  }
184
222
 
185
- void* LRUCache::Value(Handle* handle) {
186
- return reinterpret_cast<LRUHandle*>(handle)->value;
187
- }
188
-
189
- void LRUCache::Release(Handle* handle) {
223
+ void LRUCache::Release(Cache::Handle* handle) {
190
224
  MutexLock l(&mutex_);
191
225
  Unref(reinterpret_cast<LRUHandle*>(handle));
192
226
  }
193
227
 
194
- Cache::Handle* LRUCache::Insert(const Slice& key, void* value, size_t charge,
195
- void (*deleter)(const Slice& key, void* value)) {
228
+ Cache::Handle* LRUCache::Insert(
229
+ const Slice& key, uint32_t hash, void* value, size_t charge,
230
+ void (*deleter)(const Slice& key, void* value)) {
196
231
  MutexLock l(&mutex_);
197
232
 
198
233
  LRUHandle* e = reinterpret_cast<LRUHandle*>(
@@ -201,55 +236,93 @@ Cache::Handle* LRUCache::Insert(const Slice& key, void* value, size_t charge,
201
236
  e->deleter = deleter;
202
237
  e->charge = charge;
203
238
  e->key_length = key.size();
239
+ e->hash = hash;
204
240
  e->refs = 2; // One from LRUCache, one for the returned handle
205
241
  memcpy(e->key_data, key.data(), key.size());
206
242
  LRU_Append(e);
207
243
  usage_ += charge;
208
244
 
209
- std::pair<HandleTable::iterator,bool> p = table_.insert(e);
210
- if (!p.second) {
211
- // Kill existing entry
212
- LRUHandle* old = const_cast<LRUHandle*>(*(p.first));
245
+ LRUHandle* old = table_.Insert(e);
246
+ if (old != NULL) {
213
247
  LRU_Remove(old);
214
- table_.erase(p.first);
215
- table_.insert(e);
216
248
  Unref(old);
217
249
  }
218
250
 
219
251
  while (usage_ > capacity_ && lru_.next != &lru_) {
220
252
  LRUHandle* old = lru_.next;
221
253
  LRU_Remove(old);
222
- table_.erase(old);
254
+ table_.Remove(old->key(), old->hash);
223
255
  Unref(old);
224
256
  }
225
257
 
226
- return reinterpret_cast<Handle*>(e);
258
+ return reinterpret_cast<Cache::Handle*>(e);
227
259
  }
228
260
 
229
- void LRUCache::Erase(const Slice& key) {
261
+ void LRUCache::Erase(const Slice& key, uint32_t hash) {
230
262
  MutexLock l(&mutex_);
231
-
232
- LRUHandle dummy;
233
- dummy.next = &dummy;
234
- dummy.value = const_cast<Slice*>(&key);
235
- HandleTable::iterator iter = table_.find(&dummy);
236
- if (iter != table_.end()) {
237
- LRUHandle* e = const_cast<LRUHandle*>(*iter);
263
+ LRUHandle* e = table_.Remove(key, hash);
264
+ if (e != NULL) {
238
265
  LRU_Remove(e);
239
- table_.erase(iter);
240
266
  Unref(e);
241
267
  }
242
268
  }
243
269
 
244
- uint64_t LRUCache::NewId() {
245
- MutexLock l(&mutex_);
246
- return ++(last_id_);
247
- }
270
+ static const int kNumShardBits = 4;
271
+ static const int kNumShards = 1 << kNumShardBits;
272
+
273
+ class ShardedLRUCache : public Cache {
274
+ private:
275
+ LRUCache shard_[kNumShards];
276
+ port::Mutex id_mutex_;
277
+ uint64_t last_id_;
278
+
279
+ static inline uint32_t HashSlice(const Slice& s) {
280
+ return Hash(s.data(), s.size(), 0);
281
+ }
282
+
283
+ static uint32_t Shard(uint32_t hash) {
284
+ return hash >> (32 - kNumShardBits);
285
+ }
286
+
287
+ public:
288
+ explicit ShardedLRUCache(size_t capacity)
289
+ : last_id_(0) {
290
+ const size_t per_shard = (capacity + (kNumShards - 1)) / kNumShards;
291
+ for (int s = 0; s < kNumShards; s++) {
292
+ shard_[s].SetCapacity(per_shard);
293
+ }
294
+ }
295
+ virtual ~ShardedLRUCache() { }
296
+ virtual Handle* Insert(const Slice& key, void* value, size_t charge,
297
+ void (*deleter)(const Slice& key, void* value)) {
298
+ const uint32_t hash = HashSlice(key);
299
+ return shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
300
+ }
301
+ virtual Handle* Lookup(const Slice& key) {
302
+ const uint32_t hash = HashSlice(key);
303
+ return shard_[Shard(hash)].Lookup(key, hash);
304
+ }
305
+ virtual void Release(Handle* handle) {
306
+ LRUHandle* h = reinterpret_cast<LRUHandle*>(handle);
307
+ shard_[Shard(h->hash)].Release(handle);
308
+ }
309
+ virtual void Erase(const Slice& key) {
310
+ const uint32_t hash = HashSlice(key);
311
+ shard_[Shard(hash)].Erase(key, hash);
312
+ }
313
+ virtual void* Value(Handle* handle) {
314
+ return reinterpret_cast<LRUHandle*>(handle)->value;
315
+ }
316
+ virtual uint64_t NewId() {
317
+ MutexLock l(&id_mutex_);
318
+ return ++(last_id_);
319
+ }
320
+ };
248
321
 
249
322
  } // end anonymous namespace
250
323
 
251
324
  Cache* NewLRUCache(size_t capacity) {
252
- return new LRUCache(capacity);
325
+ return new ShardedLRUCache(capacity);
253
326
  }
254
327
 
255
328
  }