leveldb 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +22 -0
  3. data/README.md +95 -0
  4. data/ext/Rakefile +11 -0
  5. data/ext/leveldb/LICENSE +27 -0
  6. data/ext/leveldb/Makefile +206 -0
  7. data/ext/leveldb/build_config.mk +13 -0
  8. data/ext/leveldb/db/builder.cc +88 -0
  9. data/ext/leveldb/db/builder.h +34 -0
  10. data/ext/leveldb/db/c.cc +595 -0
  11. data/ext/leveldb/db/c_test.c +390 -0
  12. data/ext/leveldb/db/corruption_test.cc +359 -0
  13. data/ext/leveldb/db/db_bench.cc +979 -0
  14. data/ext/leveldb/db/db_impl.cc +1485 -0
  15. data/ext/leveldb/db/db_impl.h +203 -0
  16. data/ext/leveldb/db/db_iter.cc +299 -0
  17. data/ext/leveldb/db/db_iter.h +26 -0
  18. data/ext/leveldb/db/db_test.cc +2092 -0
  19. data/ext/leveldb/db/dbformat.cc +140 -0
  20. data/ext/leveldb/db/dbformat.h +227 -0
  21. data/ext/leveldb/db/dbformat_test.cc +112 -0
  22. data/ext/leveldb/db/filename.cc +139 -0
  23. data/ext/leveldb/db/filename.h +80 -0
  24. data/ext/leveldb/db/filename_test.cc +122 -0
  25. data/ext/leveldb/db/leveldb_main.cc +238 -0
  26. data/ext/leveldb/db/log_format.h +35 -0
  27. data/ext/leveldb/db/log_reader.cc +259 -0
  28. data/ext/leveldb/db/log_reader.h +108 -0
  29. data/ext/leveldb/db/log_test.cc +500 -0
  30. data/ext/leveldb/db/log_writer.cc +103 -0
  31. data/ext/leveldb/db/log_writer.h +48 -0
  32. data/ext/leveldb/db/memtable.cc +145 -0
  33. data/ext/leveldb/db/memtable.h +91 -0
  34. data/ext/leveldb/db/repair.cc +389 -0
  35. data/ext/leveldb/db/skiplist.h +379 -0
  36. data/ext/leveldb/db/skiplist_test.cc +378 -0
  37. data/ext/leveldb/db/snapshot.h +66 -0
  38. data/ext/leveldb/db/table_cache.cc +121 -0
  39. data/ext/leveldb/db/table_cache.h +61 -0
  40. data/ext/leveldb/db/version_edit.cc +266 -0
  41. data/ext/leveldb/db/version_edit.h +107 -0
  42. data/ext/leveldb/db/version_edit_test.cc +46 -0
  43. data/ext/leveldb/db/version_set.cc +1443 -0
  44. data/ext/leveldb/db/version_set.h +383 -0
  45. data/ext/leveldb/db/version_set_test.cc +179 -0
  46. data/ext/leveldb/db/write_batch.cc +147 -0
  47. data/ext/leveldb/db/write_batch_internal.h +49 -0
  48. data/ext/leveldb/db/write_batch_test.cc +120 -0
  49. data/ext/leveldb/doc/bench/db_bench_sqlite3.cc +718 -0
  50. data/ext/leveldb/doc/bench/db_bench_tree_db.cc +528 -0
  51. data/ext/leveldb/helpers/memenv/memenv.cc +384 -0
  52. data/ext/leveldb/helpers/memenv/memenv.h +20 -0
  53. data/ext/leveldb/helpers/memenv/memenv_test.cc +232 -0
  54. data/ext/leveldb/include/leveldb/c.h +291 -0
  55. data/ext/leveldb/include/leveldb/cache.h +99 -0
  56. data/ext/leveldb/include/leveldb/comparator.h +63 -0
  57. data/ext/leveldb/include/leveldb/db.h +161 -0
  58. data/ext/leveldb/include/leveldb/env.h +333 -0
  59. data/ext/leveldb/include/leveldb/filter_policy.h +70 -0
  60. data/ext/leveldb/include/leveldb/iterator.h +100 -0
  61. data/ext/leveldb/include/leveldb/options.h +195 -0
  62. data/ext/leveldb/include/leveldb/slice.h +109 -0
  63. data/ext/leveldb/include/leveldb/status.h +106 -0
  64. data/ext/leveldb/include/leveldb/table.h +85 -0
  65. data/ext/leveldb/include/leveldb/table_builder.h +92 -0
  66. data/ext/leveldb/include/leveldb/write_batch.h +64 -0
  67. data/ext/leveldb/issues/issue178_test.cc +92 -0
  68. data/ext/leveldb/port/atomic_pointer.h +224 -0
  69. data/ext/leveldb/port/port.h +19 -0
  70. data/ext/leveldb/port/port_example.h +135 -0
  71. data/ext/leveldb/port/port_posix.cc +54 -0
  72. data/ext/leveldb/port/port_posix.h +157 -0
  73. data/ext/leveldb/port/thread_annotations.h +59 -0
  74. data/ext/leveldb/port/win/stdint.h +24 -0
  75. data/ext/leveldb/table/block.cc +268 -0
  76. data/ext/leveldb/table/block.h +44 -0
  77. data/ext/leveldb/table/block_builder.cc +109 -0
  78. data/ext/leveldb/table/block_builder.h +57 -0
  79. data/ext/leveldb/table/filter_block.cc +111 -0
  80. data/ext/leveldb/table/filter_block.h +68 -0
  81. data/ext/leveldb/table/filter_block_test.cc +128 -0
  82. data/ext/leveldb/table/format.cc +145 -0
  83. data/ext/leveldb/table/format.h +108 -0
  84. data/ext/leveldb/table/iterator.cc +67 -0
  85. data/ext/leveldb/table/iterator_wrapper.h +63 -0
  86. data/ext/leveldb/table/merger.cc +197 -0
  87. data/ext/leveldb/table/merger.h +26 -0
  88. data/ext/leveldb/table/table.cc +275 -0
  89. data/ext/leveldb/table/table_builder.cc +270 -0
  90. data/ext/leveldb/table/table_test.cc +868 -0
  91. data/ext/leveldb/table/two_level_iterator.cc +182 -0
  92. data/ext/leveldb/table/two_level_iterator.h +34 -0
  93. data/ext/leveldb/util/arena.cc +68 -0
  94. data/ext/leveldb/util/arena.h +68 -0
  95. data/ext/leveldb/util/arena_test.cc +68 -0
  96. data/ext/leveldb/util/bloom.cc +95 -0
  97. data/ext/leveldb/util/bloom_test.cc +160 -0
  98. data/ext/leveldb/util/cache.cc +325 -0
  99. data/ext/leveldb/util/cache_test.cc +186 -0
  100. data/ext/leveldb/util/coding.cc +194 -0
  101. data/ext/leveldb/util/coding.h +104 -0
  102. data/ext/leveldb/util/coding_test.cc +196 -0
  103. data/ext/leveldb/util/comparator.cc +81 -0
  104. data/ext/leveldb/util/crc32c.cc +332 -0
  105. data/ext/leveldb/util/crc32c.h +45 -0
  106. data/ext/leveldb/util/crc32c_test.cc +72 -0
  107. data/ext/leveldb/util/env.cc +96 -0
  108. data/ext/leveldb/util/env_posix.cc +698 -0
  109. data/ext/leveldb/util/env_test.cc +104 -0
  110. data/ext/leveldb/util/filter_policy.cc +11 -0
  111. data/ext/leveldb/util/hash.cc +52 -0
  112. data/ext/leveldb/util/hash.h +19 -0
  113. data/ext/leveldb/util/histogram.cc +139 -0
  114. data/ext/leveldb/util/histogram.h +42 -0
  115. data/ext/leveldb/util/logging.cc +81 -0
  116. data/ext/leveldb/util/logging.h +47 -0
  117. data/ext/leveldb/util/mutexlock.h +41 -0
  118. data/ext/leveldb/util/options.cc +29 -0
  119. data/ext/leveldb/util/posix_logger.h +98 -0
  120. data/ext/leveldb/util/random.h +59 -0
  121. data/ext/leveldb/util/status.cc +75 -0
  122. data/ext/leveldb/util/testharness.cc +77 -0
  123. data/ext/leveldb/util/testharness.h +138 -0
  124. data/ext/leveldb/util/testutil.cc +51 -0
  125. data/ext/leveldb/util/testutil.h +53 -0
  126. data/lib/leveldb/version.rb +3 -0
  127. data/lib/leveldb.rb +1006 -0
  128. metadata +228 -0
@@ -0,0 +1,182 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "table/two_level_iterator.h"
6
+
7
+ #include "leveldb/table.h"
8
+ #include "table/block.h"
9
+ #include "table/format.h"
10
+ #include "table/iterator_wrapper.h"
11
+
12
+ namespace leveldb {
13
+
14
+ namespace {
15
+
16
+ typedef Iterator* (*BlockFunction)(void*, const ReadOptions&, const Slice&);
17
+
18
+ class TwoLevelIterator: public Iterator {
19
+ public:
20
+ TwoLevelIterator(
21
+ Iterator* index_iter,
22
+ BlockFunction block_function,
23
+ void* arg,
24
+ const ReadOptions& options);
25
+
26
+ virtual ~TwoLevelIterator();
27
+
28
+ virtual void Seek(const Slice& target);
29
+ virtual void SeekToFirst();
30
+ virtual void SeekToLast();
31
+ virtual void Next();
32
+ virtual void Prev();
33
+
34
+ virtual bool Valid() const {
35
+ return data_iter_.Valid();
36
+ }
37
+ virtual Slice key() const {
38
+ assert(Valid());
39
+ return data_iter_.key();
40
+ }
41
+ virtual Slice value() const {
42
+ assert(Valid());
43
+ return data_iter_.value();
44
+ }
45
+ virtual Status status() const {
46
+ // It'd be nice if status() returned a const Status& instead of a Status
47
+ if (!index_iter_.status().ok()) {
48
+ return index_iter_.status();
49
+ } else if (data_iter_.iter() != NULL && !data_iter_.status().ok()) {
50
+ return data_iter_.status();
51
+ } else {
52
+ return status_;
53
+ }
54
+ }
55
+
56
+ private:
57
+ void SaveError(const Status& s) {
58
+ if (status_.ok() && !s.ok()) status_ = s;
59
+ }
60
+ void SkipEmptyDataBlocksForward();
61
+ void SkipEmptyDataBlocksBackward();
62
+ void SetDataIterator(Iterator* data_iter);
63
+ void InitDataBlock();
64
+
65
+ BlockFunction block_function_;
66
+ void* arg_;
67
+ const ReadOptions options_;
68
+ Status status_;
69
+ IteratorWrapper index_iter_;
70
+ IteratorWrapper data_iter_; // May be NULL
71
+ // If data_iter_ is non-NULL, then "data_block_handle_" holds the
72
+ // "index_value" passed to block_function_ to create the data_iter_.
73
+ std::string data_block_handle_;
74
+ };
75
+
76
+ TwoLevelIterator::TwoLevelIterator(
77
+ Iterator* index_iter,
78
+ BlockFunction block_function,
79
+ void* arg,
80
+ const ReadOptions& options)
81
+ : block_function_(block_function),
82
+ arg_(arg),
83
+ options_(options),
84
+ index_iter_(index_iter),
85
+ data_iter_(NULL) {
86
+ }
87
+
88
+ TwoLevelIterator::~TwoLevelIterator() {
89
+ }
90
+
91
+ void TwoLevelIterator::Seek(const Slice& target) {
92
+ index_iter_.Seek(target);
93
+ InitDataBlock();
94
+ if (data_iter_.iter() != NULL) data_iter_.Seek(target);
95
+ SkipEmptyDataBlocksForward();
96
+ }
97
+
98
+ void TwoLevelIterator::SeekToFirst() {
99
+ index_iter_.SeekToFirst();
100
+ InitDataBlock();
101
+ if (data_iter_.iter() != NULL) data_iter_.SeekToFirst();
102
+ SkipEmptyDataBlocksForward();
103
+ }
104
+
105
+ void TwoLevelIterator::SeekToLast() {
106
+ index_iter_.SeekToLast();
107
+ InitDataBlock();
108
+ if (data_iter_.iter() != NULL) data_iter_.SeekToLast();
109
+ SkipEmptyDataBlocksBackward();
110
+ }
111
+
112
+ void TwoLevelIterator::Next() {
113
+ assert(Valid());
114
+ data_iter_.Next();
115
+ SkipEmptyDataBlocksForward();
116
+ }
117
+
118
+ void TwoLevelIterator::Prev() {
119
+ assert(Valid());
120
+ data_iter_.Prev();
121
+ SkipEmptyDataBlocksBackward();
122
+ }
123
+
124
+
125
+ void TwoLevelIterator::SkipEmptyDataBlocksForward() {
126
+ while (data_iter_.iter() == NULL || !data_iter_.Valid()) {
127
+ // Move to next block
128
+ if (!index_iter_.Valid()) {
129
+ SetDataIterator(NULL);
130
+ return;
131
+ }
132
+ index_iter_.Next();
133
+ InitDataBlock();
134
+ if (data_iter_.iter() != NULL) data_iter_.SeekToFirst();
135
+ }
136
+ }
137
+
138
+ void TwoLevelIterator::SkipEmptyDataBlocksBackward() {
139
+ while (data_iter_.iter() == NULL || !data_iter_.Valid()) {
140
+ // Move to next block
141
+ if (!index_iter_.Valid()) {
142
+ SetDataIterator(NULL);
143
+ return;
144
+ }
145
+ index_iter_.Prev();
146
+ InitDataBlock();
147
+ if (data_iter_.iter() != NULL) data_iter_.SeekToLast();
148
+ }
149
+ }
150
+
151
+ void TwoLevelIterator::SetDataIterator(Iterator* data_iter) {
152
+ if (data_iter_.iter() != NULL) SaveError(data_iter_.status());
153
+ data_iter_.Set(data_iter);
154
+ }
155
+
156
+ void TwoLevelIterator::InitDataBlock() {
157
+ if (!index_iter_.Valid()) {
158
+ SetDataIterator(NULL);
159
+ } else {
160
+ Slice handle = index_iter_.value();
161
+ if (data_iter_.iter() != NULL && handle.compare(data_block_handle_) == 0) {
162
+ // data_iter_ is already constructed with this iterator, so
163
+ // no need to change anything
164
+ } else {
165
+ Iterator* iter = (*block_function_)(arg_, options_, handle);
166
+ data_block_handle_.assign(handle.data(), handle.size());
167
+ SetDataIterator(iter);
168
+ }
169
+ }
170
+ }
171
+
172
+ } // namespace
173
+
174
+ Iterator* NewTwoLevelIterator(
175
+ Iterator* index_iter,
176
+ BlockFunction block_function,
177
+ void* arg,
178
+ const ReadOptions& options) {
179
+ return new TwoLevelIterator(index_iter, block_function, arg, options);
180
+ }
181
+
182
+ } // namespace leveldb
@@ -0,0 +1,34 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #ifndef STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_
6
+ #define STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_
7
+
8
+ #include "leveldb/iterator.h"
9
+
10
+ namespace leveldb {
11
+
12
+ struct ReadOptions;
13
+
14
+ // Return a new two level iterator. A two-level iterator contains an
15
+ // index iterator whose values point to a sequence of blocks where
16
+ // each block is itself a sequence of key,value pairs. The returned
17
+ // two-level iterator yields the concatenation of all key/value pairs
18
+ // in the sequence of blocks. Takes ownership of "index_iter" and
19
+ // will delete it when no longer needed.
20
+ //
21
+ // Uses a supplied function to convert an index_iter value into
22
+ // an iterator over the contents of the corresponding block.
23
+ extern Iterator* NewTwoLevelIterator(
24
+ Iterator* index_iter,
25
+ Iterator* (*block_function)(
26
+ void* arg,
27
+ const ReadOptions& options,
28
+ const Slice& index_value),
29
+ void* arg,
30
+ const ReadOptions& options);
31
+
32
+ } // namespace leveldb
33
+
34
+ #endif // STORAGE_LEVELDB_TABLE_TWO_LEVEL_ITERATOR_H_
@@ -0,0 +1,68 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "util/arena.h"
6
+ #include <assert.h>
7
+
8
+ namespace leveldb {
9
+
10
+ static const int kBlockSize = 4096;
11
+
12
+ Arena::Arena() {
13
+ blocks_memory_ = 0;
14
+ alloc_ptr_ = NULL; // First allocation will allocate a block
15
+ alloc_bytes_remaining_ = 0;
16
+ }
17
+
18
+ Arena::~Arena() {
19
+ for (size_t i = 0; i < blocks_.size(); i++) {
20
+ delete[] blocks_[i];
21
+ }
22
+ }
23
+
24
+ char* Arena::AllocateFallback(size_t bytes) {
25
+ if (bytes > kBlockSize / 4) {
26
+ // Object is more than a quarter of our block size. Allocate it separately
27
+ // to avoid wasting too much space in leftover bytes.
28
+ char* result = AllocateNewBlock(bytes);
29
+ return result;
30
+ }
31
+
32
+ // We waste the remaining space in the current block.
33
+ alloc_ptr_ = AllocateNewBlock(kBlockSize);
34
+ alloc_bytes_remaining_ = kBlockSize;
35
+
36
+ char* result = alloc_ptr_;
37
+ alloc_ptr_ += bytes;
38
+ alloc_bytes_remaining_ -= bytes;
39
+ return result;
40
+ }
41
+
42
+ char* Arena::AllocateAligned(size_t bytes) {
43
+ const int align = sizeof(void*); // We'll align to pointer size
44
+ assert((align & (align-1)) == 0); // Pointer size should be a power of 2
45
+ size_t current_mod = reinterpret_cast<uintptr_t>(alloc_ptr_) & (align-1);
46
+ size_t slop = (current_mod == 0 ? 0 : align - current_mod);
47
+ size_t needed = bytes + slop;
48
+ char* result;
49
+ if (needed <= alloc_bytes_remaining_) {
50
+ result = alloc_ptr_ + slop;
51
+ alloc_ptr_ += needed;
52
+ alloc_bytes_remaining_ -= needed;
53
+ } else {
54
+ // AllocateFallback always returned aligned memory
55
+ result = AllocateFallback(bytes);
56
+ }
57
+ assert((reinterpret_cast<uintptr_t>(result) & (align-1)) == 0);
58
+ return result;
59
+ }
60
+
61
+ char* Arena::AllocateNewBlock(size_t block_bytes) {
62
+ char* result = new char[block_bytes];
63
+ blocks_memory_ += block_bytes;
64
+ blocks_.push_back(result);
65
+ return result;
66
+ }
67
+
68
+ } // namespace leveldb
@@ -0,0 +1,68 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #ifndef STORAGE_LEVELDB_UTIL_ARENA_H_
6
+ #define STORAGE_LEVELDB_UTIL_ARENA_H_
7
+
8
+ #include <cstddef>
9
+ #include <vector>
10
+ #include <assert.h>
11
+ #include <stdint.h>
12
+
13
+ namespace leveldb {
14
+
15
+ class Arena {
16
+ public:
17
+ Arena();
18
+ ~Arena();
19
+
20
+ // Return a pointer to a newly allocated memory block of "bytes" bytes.
21
+ char* Allocate(size_t bytes);
22
+
23
+ // Allocate memory with the normal alignment guarantees provided by malloc
24
+ char* AllocateAligned(size_t bytes);
25
+
26
+ // Returns an estimate of the total memory usage of data allocated
27
+ // by the arena (including space allocated but not yet used for user
28
+ // allocations).
29
+ size_t MemoryUsage() const {
30
+ return blocks_memory_ + blocks_.capacity() * sizeof(char*);
31
+ }
32
+
33
+ private:
34
+ char* AllocateFallback(size_t bytes);
35
+ char* AllocateNewBlock(size_t block_bytes);
36
+
37
+ // Allocation state
38
+ char* alloc_ptr_;
39
+ size_t alloc_bytes_remaining_;
40
+
41
+ // Array of new[] allocated memory blocks
42
+ std::vector<char*> blocks_;
43
+
44
+ // Bytes of memory in blocks allocated so far
45
+ size_t blocks_memory_;
46
+
47
+ // No copying allowed
48
+ Arena(const Arena&);
49
+ void operator=(const Arena&);
50
+ };
51
+
52
+ inline char* Arena::Allocate(size_t bytes) {
53
+ // The semantics of what to return are a bit messy if we allow
54
+ // 0-byte allocations, so we disallow them here (we don't need
55
+ // them for our internal use).
56
+ assert(bytes > 0);
57
+ if (bytes <= alloc_bytes_remaining_) {
58
+ char* result = alloc_ptr_;
59
+ alloc_ptr_ += bytes;
60
+ alloc_bytes_remaining_ -= bytes;
61
+ return result;
62
+ }
63
+ return AllocateFallback(bytes);
64
+ }
65
+
66
+ } // namespace leveldb
67
+
68
+ #endif // STORAGE_LEVELDB_UTIL_ARENA_H_
@@ -0,0 +1,68 @@
1
+ // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "util/arena.h"
6
+
7
+ #include "util/random.h"
8
+ #include "util/testharness.h"
9
+
10
+ namespace leveldb {
11
+
12
+ class ArenaTest { };
13
+
14
+ TEST(ArenaTest, Empty) {
15
+ Arena arena;
16
+ }
17
+
18
+ TEST(ArenaTest, Simple) {
19
+ std::vector<std::pair<size_t, char*> > allocated;
20
+ Arena arena;
21
+ const int N = 100000;
22
+ size_t bytes = 0;
23
+ Random rnd(301);
24
+ for (int i = 0; i < N; i++) {
25
+ size_t s;
26
+ if (i % (N / 10) == 0) {
27
+ s = i;
28
+ } else {
29
+ s = rnd.OneIn(4000) ? rnd.Uniform(6000) :
30
+ (rnd.OneIn(10) ? rnd.Uniform(100) : rnd.Uniform(20));
31
+ }
32
+ if (s == 0) {
33
+ // Our arena disallows size 0 allocations.
34
+ s = 1;
35
+ }
36
+ char* r;
37
+ if (rnd.OneIn(10)) {
38
+ r = arena.AllocateAligned(s);
39
+ } else {
40
+ r = arena.Allocate(s);
41
+ }
42
+
43
+ for (int b = 0; b < s; b++) {
44
+ // Fill the "i"th allocation with a known bit pattern
45
+ r[b] = i % 256;
46
+ }
47
+ bytes += s;
48
+ allocated.push_back(std::make_pair(s, r));
49
+ ASSERT_GE(arena.MemoryUsage(), bytes);
50
+ if (i > N/10) {
51
+ ASSERT_LE(arena.MemoryUsage(), bytes * 1.10);
52
+ }
53
+ }
54
+ for (int i = 0; i < allocated.size(); i++) {
55
+ size_t num_bytes = allocated[i].first;
56
+ const char* p = allocated[i].second;
57
+ for (int b = 0; b < num_bytes; b++) {
58
+ // Check the "i"th allocation for the known bit pattern
59
+ ASSERT_EQ(int(p[b]) & 0xff, i % 256);
60
+ }
61
+ }
62
+ }
63
+
64
+ } // namespace leveldb
65
+
66
+ int main(int argc, char** argv) {
67
+ return leveldb::test::RunAllTests();
68
+ }
@@ -0,0 +1,95 @@
1
+ // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "leveldb/filter_policy.h"
6
+
7
+ #include "leveldb/slice.h"
8
+ #include "util/hash.h"
9
+
10
+ namespace leveldb {
11
+
12
+ namespace {
13
+ static uint32_t BloomHash(const Slice& key) {
14
+ return Hash(key.data(), key.size(), 0xbc9f1d34);
15
+ }
16
+
17
+ class BloomFilterPolicy : public FilterPolicy {
18
+ private:
19
+ size_t bits_per_key_;
20
+ size_t k_;
21
+
22
+ public:
23
+ explicit BloomFilterPolicy(int bits_per_key)
24
+ : bits_per_key_(bits_per_key) {
25
+ // We intentionally round down to reduce probing cost a little bit
26
+ k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
27
+ if (k_ < 1) k_ = 1;
28
+ if (k_ > 30) k_ = 30;
29
+ }
30
+
31
+ virtual const char* Name() const {
32
+ return "leveldb.BuiltinBloomFilter";
33
+ }
34
+
35
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
36
+ // Compute bloom filter size (in both bits and bytes)
37
+ size_t bits = n * bits_per_key_;
38
+
39
+ // For small n, we can see a very high false positive rate. Fix it
40
+ // by enforcing a minimum bloom filter length.
41
+ if (bits < 64) bits = 64;
42
+
43
+ size_t bytes = (bits + 7) / 8;
44
+ bits = bytes * 8;
45
+
46
+ const size_t init_size = dst->size();
47
+ dst->resize(init_size + bytes, 0);
48
+ dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
49
+ char* array = &(*dst)[init_size];
50
+ for (size_t i = 0; i < n; i++) {
51
+ // Use double-hashing to generate a sequence of hash values.
52
+ // See analysis in [Kirsch,Mitzenmacher 2006].
53
+ uint32_t h = BloomHash(keys[i]);
54
+ const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
55
+ for (size_t j = 0; j < k_; j++) {
56
+ const uint32_t bitpos = h % bits;
57
+ array[bitpos/8] |= (1 << (bitpos % 8));
58
+ h += delta;
59
+ }
60
+ }
61
+ }
62
+
63
+ virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
64
+ const size_t len = bloom_filter.size();
65
+ if (len < 2) return false;
66
+
67
+ const char* array = bloom_filter.data();
68
+ const size_t bits = (len - 1) * 8;
69
+
70
+ // Use the encoded k so that we can read filters generated by
71
+ // bloom filters created using different parameters.
72
+ const size_t k = array[len-1];
73
+ if (k > 30) {
74
+ // Reserved for potentially new encodings for short bloom filters.
75
+ // Consider it a match.
76
+ return true;
77
+ }
78
+
79
+ uint32_t h = BloomHash(key);
80
+ const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
81
+ for (size_t j = 0; j < k; j++) {
82
+ const uint32_t bitpos = h % bits;
83
+ if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
84
+ h += delta;
85
+ }
86
+ return true;
87
+ }
88
+ };
89
+ }
90
+
91
+ const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
92
+ return new BloomFilterPolicy(bits_per_key);
93
+ }
94
+
95
+ } // namespace leveldb
@@ -0,0 +1,160 @@
1
+ // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "leveldb/filter_policy.h"
6
+
7
+ #include "util/coding.h"
8
+ #include "util/logging.h"
9
+ #include "util/testharness.h"
10
+ #include "util/testutil.h"
11
+
12
+ namespace leveldb {
13
+
14
+ static const int kVerbose = 1;
15
+
16
+ static Slice Key(int i, char* buffer) {
17
+ EncodeFixed32(buffer, i);
18
+ return Slice(buffer, sizeof(uint32_t));
19
+ }
20
+
21
+ class BloomTest {
22
+ private:
23
+ const FilterPolicy* policy_;
24
+ std::string filter_;
25
+ std::vector<std::string> keys_;
26
+
27
+ public:
28
+ BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
29
+
30
+ ~BloomTest() {
31
+ delete policy_;
32
+ }
33
+
34
+ void Reset() {
35
+ keys_.clear();
36
+ filter_.clear();
37
+ }
38
+
39
+ void Add(const Slice& s) {
40
+ keys_.push_back(s.ToString());
41
+ }
42
+
43
+ void Build() {
44
+ std::vector<Slice> key_slices;
45
+ for (size_t i = 0; i < keys_.size(); i++) {
46
+ key_slices.push_back(Slice(keys_[i]));
47
+ }
48
+ filter_.clear();
49
+ policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
50
+ keys_.clear();
51
+ if (kVerbose >= 2) DumpFilter();
52
+ }
53
+
54
+ size_t FilterSize() const {
55
+ return filter_.size();
56
+ }
57
+
58
+ void DumpFilter() {
59
+ fprintf(stderr, "F(");
60
+ for (size_t i = 0; i+1 < filter_.size(); i++) {
61
+ const unsigned int c = static_cast<unsigned int>(filter_[i]);
62
+ for (int j = 0; j < 8; j++) {
63
+ fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
64
+ }
65
+ }
66
+ fprintf(stderr, ")\n");
67
+ }
68
+
69
+ bool Matches(const Slice& s) {
70
+ if (!keys_.empty()) {
71
+ Build();
72
+ }
73
+ return policy_->KeyMayMatch(s, filter_);
74
+ }
75
+
76
+ double FalsePositiveRate() {
77
+ char buffer[sizeof(int)];
78
+ int result = 0;
79
+ for (int i = 0; i < 10000; i++) {
80
+ if (Matches(Key(i + 1000000000, buffer))) {
81
+ result++;
82
+ }
83
+ }
84
+ return result / 10000.0;
85
+ }
86
+ };
87
+
88
+ TEST(BloomTest, EmptyFilter) {
89
+ ASSERT_TRUE(! Matches("hello"));
90
+ ASSERT_TRUE(! Matches("world"));
91
+ }
92
+
93
+ TEST(BloomTest, Small) {
94
+ Add("hello");
95
+ Add("world");
96
+ ASSERT_TRUE(Matches("hello"));
97
+ ASSERT_TRUE(Matches("world"));
98
+ ASSERT_TRUE(! Matches("x"));
99
+ ASSERT_TRUE(! Matches("foo"));
100
+ }
101
+
102
+ static int NextLength(int length) {
103
+ if (length < 10) {
104
+ length += 1;
105
+ } else if (length < 100) {
106
+ length += 10;
107
+ } else if (length < 1000) {
108
+ length += 100;
109
+ } else {
110
+ length += 1000;
111
+ }
112
+ return length;
113
+ }
114
+
115
+ TEST(BloomTest, VaryingLengths) {
116
+ char buffer[sizeof(int)];
117
+
118
+ // Count number of filters that significantly exceed the false positive rate
119
+ int mediocre_filters = 0;
120
+ int good_filters = 0;
121
+
122
+ for (int length = 1; length <= 10000; length = NextLength(length)) {
123
+ Reset();
124
+ for (int i = 0; i < length; i++) {
125
+ Add(Key(i, buffer));
126
+ }
127
+ Build();
128
+
129
+ ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
130
+
131
+ // All added keys must match
132
+ for (int i = 0; i < length; i++) {
133
+ ASSERT_TRUE(Matches(Key(i, buffer)))
134
+ << "Length " << length << "; key " << i;
135
+ }
136
+
137
+ // Check false positive rate
138
+ double rate = FalsePositiveRate();
139
+ if (kVerbose >= 1) {
140
+ fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
141
+ rate*100.0, length, static_cast<int>(FilterSize()));
142
+ }
143
+ ASSERT_LE(rate, 0.02); // Must not be over 2%
144
+ if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
145
+ else good_filters++;
146
+ }
147
+ if (kVerbose >= 1) {
148
+ fprintf(stderr, "Filters: %d good, %d mediocre\n",
149
+ good_filters, mediocre_filters);
150
+ }
151
+ ASSERT_LE(mediocre_filters, good_filters/5);
152
+ }
153
+
154
+ // Different bits-per-byte
155
+
156
+ } // namespace leveldb
157
+
158
+ int main(int argc, char** argv) {
159
+ return leveldb::test::RunAllTests();
160
+ }