leveldb-ruby 0.14 → 0.15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/LICENSE +24 -0
  2. data/README +60 -16
  3. data/ext/leveldb/extconf.rb +1 -1
  4. data/ext/leveldb/leveldb.cc +187 -18
  5. data/leveldb/Makefile +82 -96
  6. data/leveldb/build_detect_platform +137 -51
  7. data/leveldb/db/c.cc +110 -0
  8. data/leveldb/db/db_bench.cc +105 -4
  9. data/leveldb/db/db_impl.cc +135 -45
  10. data/leveldb/db/db_impl.h +12 -10
  11. data/leveldb/db/db_test.cc +666 -431
  12. data/leveldb/db/dbformat.cc +20 -0
  13. data/leveldb/db/dbformat.h +12 -0
  14. data/leveldb/db/repair.cc +3 -1
  15. data/leveldb/db/skiplist.h +2 -1
  16. data/leveldb/db/table_cache.cc +42 -16
  17. data/leveldb/db/table_cache.h +11 -0
  18. data/leveldb/db/version_set.cc +46 -41
  19. data/leveldb/db/version_set.h +9 -0
  20. data/leveldb/db/write_batch.cc +13 -4
  21. data/leveldb/db/write_batch_internal.h +2 -0
  22. data/leveldb/db/write_batch_test.cc +31 -0
  23. data/leveldb/include/leveldb/c.h +29 -0
  24. data/leveldb/include/leveldb/db.h +2 -1
  25. data/leveldb/include/leveldb/filter_policy.h +70 -0
  26. data/leveldb/include/leveldb/options.h +8 -0
  27. data/leveldb/include/leveldb/status.h +6 -0
  28. data/leveldb/include/leveldb/table.h +15 -0
  29. data/leveldb/include/leveldb/table_builder.h +1 -0
  30. data/leveldb/port/atomic_pointer.h +13 -5
  31. data/leveldb/port/port.h +0 -2
  32. data/leveldb/port/port_example.h +10 -0
  33. data/leveldb/port/port_posix.cc +4 -0
  34. data/leveldb/port/port_posix.h +24 -9
  35. data/leveldb/table/block.cc +8 -4
  36. data/leveldb/table/block.h +3 -2
  37. data/leveldb/table/filter_block.cc +111 -0
  38. data/leveldb/table/filter_block.h +68 -0
  39. data/leveldb/table/filter_block_test.cc +128 -0
  40. data/leveldb/table/format.cc +17 -7
  41. data/leveldb/table/format.h +9 -4
  42. data/leveldb/table/table.cc +107 -6
  43. data/leveldb/table/table_builder.cc +49 -6
  44. data/leveldb/table/table_test.cc +8 -24
  45. data/leveldb/util/bloom.cc +95 -0
  46. data/leveldb/util/bloom_test.cc +159 -0
  47. data/leveldb/util/coding_test.cc +23 -0
  48. data/leveldb/util/comparator.cc +8 -3
  49. data/leveldb/util/env_posix.cc +46 -4
  50. data/leveldb/util/filter_policy.cc +11 -0
  51. data/leveldb/util/options.cc +2 -1
  52. data/lib/leveldb.rb +31 -5
  53. metadata +227 -109
  54. data/leveldb/port/port_android.cc +0 -64
  55. data/leveldb/port/port_android.h +0 -156
@@ -5,14 +5,15 @@
5
5
  #include "leveldb/table_builder.h"
6
6
 
7
7
  #include <assert.h>
8
- #include <stdio.h>
9
8
  #include "leveldb/comparator.h"
10
9
  #include "leveldb/env.h"
10
+ #include "leveldb/filter_policy.h"
11
+ #include "leveldb/options.h"
11
12
  #include "table/block_builder.h"
13
+ #include "table/filter_block.h"
12
14
  #include "table/format.h"
13
15
  #include "util/coding.h"
14
16
  #include "util/crc32c.h"
15
- #include "util/logging.h"
16
17
 
17
18
  namespace leveldb {
18
19
 
@@ -27,6 +28,7 @@ struct TableBuilder::Rep {
27
28
  std::string last_key;
28
29
  int64_t num_entries;
29
30
  bool closed; // Either Finish() or Abandon() has been called.
31
+ FilterBlockBuilder* filter_block;
30
32
 
31
33
  // We do not emit the index entry for a block until we have seen the
32
34
  // first key for the next data block. This allows us to use shorter
@@ -51,6 +53,8 @@ struct TableBuilder::Rep {
51
53
  index_block(&index_block_options),
52
54
  num_entries(0),
53
55
  closed(false),
56
+ filter_block(opt.filter_policy == NULL ? NULL
57
+ : new FilterBlockBuilder(opt.filter_policy)),
54
58
  pending_index_entry(false) {
55
59
  index_block_options.block_restart_interval = 1;
56
60
  }
@@ -58,10 +62,14 @@ struct TableBuilder::Rep {
58
62
 
59
63
  TableBuilder::TableBuilder(const Options& options, WritableFile* file)
60
64
  : rep_(new Rep(options, file)) {
65
+ if (rep_->filter_block != NULL) {
66
+ rep_->filter_block->StartBlock(0);
67
+ }
61
68
  }
62
69
 
63
70
  TableBuilder::~TableBuilder() {
64
71
  assert(rep_->closed); // Catch errors where caller forgot to call Finish()
72
+ delete rep_->filter_block;
65
73
  delete rep_;
66
74
  }
67
75
 
@@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
98
106
  r->pending_index_entry = false;
99
107
  }
100
108
 
109
+ if (r->filter_block != NULL) {
110
+ r->filter_block->AddKey(key);
111
+ }
112
+
101
113
  r->last_key.assign(key.data(), key.size());
102
114
  r->num_entries++;
103
115
  r->data_block.Add(key, value);
@@ -119,6 +131,9 @@ void TableBuilder::Flush() {
119
131
  r->pending_index_entry = true;
120
132
  r->status = r->file->Flush();
121
133
  }
134
+ if (r->filter_block != NULL) {
135
+ r->filter_block->StartBlock(r->offset);
136
+ }
122
137
  }
123
138
 
124
139
  void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
@@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
152
167
  break;
153
168
  }
154
169
  }
170
+ WriteRawBlock(block_contents, type, handle);
171
+ r->compressed_output.clear();
172
+ block->Reset();
173
+ }
174
+
175
+ void TableBuilder::WriteRawBlock(const Slice& block_contents,
176
+ CompressionType type,
177
+ BlockHandle* handle) {
178
+ Rep* r = rep_;
155
179
  handle->set_offset(r->offset);
156
180
  handle->set_size(block_contents.size());
157
181
  r->status = r->file->Append(block_contents);
@@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
166
190
  r->offset += block_contents.size() + kBlockTrailerSize;
167
191
  }
168
192
  }
169
- r->compressed_output.clear();
170
- block->Reset();
171
193
  }
172
194
 
173
195
  Status TableBuilder::status() const {
@@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
179
201
  Flush();
180
202
  assert(!r->closed);
181
203
  r->closed = true;
182
- BlockHandle metaindex_block_handle;
183
- BlockHandle index_block_handle;
204
+
205
+ BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
206
+
207
+ // Write filter block
208
+ if (ok() && r->filter_block != NULL) {
209
+ WriteRawBlock(r->filter_block->Finish(), kNoCompression,
210
+ &filter_block_handle);
211
+ }
212
+
213
+ // Write metaindex block
184
214
  if (ok()) {
185
215
  BlockBuilder meta_index_block(&r->options);
216
+ if (r->filter_block != NULL) {
217
+ // Add mapping from "filter.Name" to location of filter data
218
+ std::string key = "filter.";
219
+ key.append(r->options.filter_policy->Name());
220
+ std::string handle_encoding;
221
+ filter_block_handle.EncodeTo(&handle_encoding);
222
+ meta_index_block.Add(key, handle_encoding);
223
+ }
224
+
186
225
  // TODO(postrelease): Add stats and other meta blocks
187
226
  WriteBlock(&meta_index_block, &metaindex_block_handle);
188
227
  }
228
+
229
+ // Write index block
189
230
  if (ok()) {
190
231
  if (r->pending_index_entry) {
191
232
  r->options.comparator->FindShortSuccessor(&r->last_key);
@@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
196
237
  }
197
238
  WriteBlock(&r->index_block, &index_block_handle);
198
239
  }
240
+
241
+ // Write footer
199
242
  if (ok()) {
200
243
  Footer footer;
201
244
  footer.set_metaindex_handle(metaindex_block_handle);
@@ -168,8 +168,6 @@ class Constructor {
168
168
  // Construct the data structure from the data in "data"
169
169
  virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
170
170
 
171
- virtual size_t NumBytes() const = 0;
172
-
173
171
  virtual Iterator* NewIterator() const = 0;
174
172
 
175
173
  virtual const KVMap& data() { return data_; }
@@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
185
183
  explicit BlockConstructor(const Comparator* cmp)
186
184
  : Constructor(cmp),
187
185
  comparator_(cmp),
188
- block_size_(-1),
189
186
  block_(NULL) { }
190
187
  ~BlockConstructor() {
191
188
  delete block_;
@@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
201
198
  builder.Add(it->first, it->second);
202
199
  }
203
200
  // Open the block
204
- Slice block_data = builder.Finish();
205
- block_size_ = block_data.size();
206
- char* block_data_copy = new char[block_size_];
207
- memcpy(block_data_copy, block_data.data(), block_size_);
208
- block_ = new Block(block_data_copy, block_size_);
201
+ data_ = builder.Finish().ToString();
202
+ BlockContents contents;
203
+ contents.data = data_;
204
+ contents.cachable = false;
205
+ contents.heap_allocated = false;
206
+ block_ = new Block(contents);
209
207
  return Status::OK();
210
208
  }
211
- virtual size_t NumBytes() const { return block_size_; }
212
-
213
209
  virtual Iterator* NewIterator() const {
214
210
  return block_->NewIterator(comparator_);
215
211
  }
216
212
 
217
213
  private:
218
214
  const Comparator* comparator_;
219
- int block_size_;
215
+ std::string data_;
220
216
  Block* block_;
221
217
 
222
218
  BlockConstructor();
@@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
253
249
  table_options.comparator = options.comparator;
254
250
  return Table::Open(table_options, source_, sink.contents().size(), &table_);
255
251
  }
256
- virtual size_t NumBytes() const { return source_->Size(); }
257
252
 
258
253
  virtual Iterator* NewIterator() const {
259
254
  return table_->NewIterator(ReadOptions());
@@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
342
337
  }
343
338
  return Status::OK();
344
339
  }
345
- virtual size_t NumBytes() const {
346
- return memtable_->ApproximateMemoryUsage();
347
- }
348
-
349
340
  virtual Iterator* NewIterator() const {
350
341
  return new KeyConvertingIterator(memtable_->NewIterator());
351
342
  }
@@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
379
370
  }
380
371
  return Status::OK();
381
372
  }
382
- virtual size_t NumBytes() const {
383
- Range r("", "\xff\xff");
384
- uint64_t size;
385
- db_->GetApproximateSizes(&r, 1, &size);
386
- return size;
387
- }
388
-
389
373
  virtual Iterator* NewIterator() const {
390
374
  return db_->NewIterator(ReadOptions());
391
375
  }
@@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
809
793
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
810
794
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
811
795
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
812
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 611000));
796
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
813
797
 
814
798
  }
815
799
 
@@ -0,0 +1,95 @@
1
+ // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "leveldb/filter_policy.h"
6
+
7
+ #include "leveldb/slice.h"
8
+ #include "util/hash.h"
9
+
10
+ namespace leveldb {
11
+
12
+ namespace {
13
+ static uint32_t BloomHash(const Slice& key) {
14
+ return Hash(key.data(), key.size(), 0xbc9f1d34);
15
+ }
16
+
17
+ class BloomFilterPolicy : public FilterPolicy {
18
+ private:
19
+ size_t bits_per_key_;
20
+ size_t k_;
21
+
22
+ public:
23
+ explicit BloomFilterPolicy(int bits_per_key)
24
+ : bits_per_key_(bits_per_key) {
25
+ // We intentionally round down to reduce probing cost a little bit
26
+ k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
27
+ if (k_ < 1) k_ = 1;
28
+ if (k_ > 30) k_ = 30;
29
+ }
30
+
31
+ virtual const char* Name() const {
32
+ return "leveldb.BuiltinBloomFilter";
33
+ }
34
+
35
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
36
+ // Compute bloom filter size (in both bits and bytes)
37
+ size_t bits = n * bits_per_key_;
38
+
39
+ // For small n, we can see a very high false positive rate. Fix it
40
+ // by enforcing a minimum bloom filter length.
41
+ if (bits < 64) bits = 64;
42
+
43
+ size_t bytes = (bits + 7) / 8;
44
+ bits = bytes * 8;
45
+
46
+ const size_t init_size = dst->size();
47
+ dst->resize(init_size + bytes, 0);
48
+ dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
49
+ char* array = &(*dst)[init_size];
50
+ for (size_t i = 0; i < n; i++) {
51
+ // Use double-hashing to generate a sequence of hash values.
52
+ // See analysis in [Kirsch,Mitzenmacher 2006].
53
+ uint32_t h = BloomHash(keys[i]);
54
+ const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
55
+ for (size_t j = 0; j < k_; j++) {
56
+ const uint32_t bitpos = h % bits;
57
+ array[bitpos/8] |= (1 << (bitpos % 8));
58
+ h += delta;
59
+ }
60
+ }
61
+ }
62
+
63
+ virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
64
+ const size_t len = bloom_filter.size();
65
+ if (len < 2) return false;
66
+
67
+ const char* array = bloom_filter.data();
68
+ const size_t bits = (len - 1) * 8;
69
+
70
+ // Use the encoded k so that we can read filters generated by
71
+ // bloom filters created using different parameters.
72
+ const size_t k = array[len-1];
73
+ if (k > 30) {
74
+ // Reserved for potentially new encodings for short bloom filters.
75
+ // Consider it a match.
76
+ return true;
77
+ }
78
+
79
+ uint32_t h = BloomHash(key);
80
+ const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
81
+ for (size_t j = 0; j < k; j++) {
82
+ const uint32_t bitpos = h % bits;
83
+ if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
84
+ h += delta;
85
+ }
86
+ return true;
87
+ }
88
+ };
89
+ }
90
+
91
+ const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
92
+ return new BloomFilterPolicy(bits_per_key);
93
+ }
94
+
95
+ } // namespace leveldb
@@ -0,0 +1,159 @@
1
+ // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "leveldb/filter_policy.h"
6
+
7
+ #include "util/logging.h"
8
+ #include "util/testharness.h"
9
+ #include "util/testutil.h"
10
+
11
+ namespace leveldb {
12
+
13
+ static const int kVerbose = 1;
14
+
15
+ static Slice Key(int i, char* buffer) {
16
+ memcpy(buffer, &i, sizeof(i));
17
+ return Slice(buffer, sizeof(i));
18
+ }
19
+
20
+ class BloomTest {
21
+ private:
22
+ const FilterPolicy* policy_;
23
+ std::string filter_;
24
+ std::vector<std::string> keys_;
25
+
26
+ public:
27
+ BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
28
+
29
+ ~BloomTest() {
30
+ delete policy_;
31
+ }
32
+
33
+ void Reset() {
34
+ keys_.clear();
35
+ filter_.clear();
36
+ }
37
+
38
+ void Add(const Slice& s) {
39
+ keys_.push_back(s.ToString());
40
+ }
41
+
42
+ void Build() {
43
+ std::vector<Slice> key_slices;
44
+ for (size_t i = 0; i < keys_.size(); i++) {
45
+ key_slices.push_back(Slice(keys_[i]));
46
+ }
47
+ filter_.clear();
48
+ policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
49
+ keys_.clear();
50
+ if (kVerbose >= 2) DumpFilter();
51
+ }
52
+
53
+ size_t FilterSize() const {
54
+ return filter_.size();
55
+ }
56
+
57
+ void DumpFilter() {
58
+ fprintf(stderr, "F(");
59
+ for (size_t i = 0; i+1 < filter_.size(); i++) {
60
+ const unsigned int c = static_cast<unsigned int>(filter_[i]);
61
+ for (int j = 0; j < 8; j++) {
62
+ fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
63
+ }
64
+ }
65
+ fprintf(stderr, ")\n");
66
+ }
67
+
68
+ bool Matches(const Slice& s) {
69
+ if (!keys_.empty()) {
70
+ Build();
71
+ }
72
+ return policy_->KeyMayMatch(s, filter_);
73
+ }
74
+
75
+ double FalsePositiveRate() {
76
+ char buffer[sizeof(int)];
77
+ int result = 0;
78
+ for (int i = 0; i < 10000; i++) {
79
+ if (Matches(Key(i + 1000000000, buffer))) {
80
+ result++;
81
+ }
82
+ }
83
+ return result / 10000.0;
84
+ }
85
+ };
86
+
87
+ TEST(BloomTest, EmptyFilter) {
88
+ ASSERT_TRUE(! Matches("hello"));
89
+ ASSERT_TRUE(! Matches("world"));
90
+ }
91
+
92
+ TEST(BloomTest, Small) {
93
+ Add("hello");
94
+ Add("world");
95
+ ASSERT_TRUE(Matches("hello"));
96
+ ASSERT_TRUE(Matches("world"));
97
+ ASSERT_TRUE(! Matches("x"));
98
+ ASSERT_TRUE(! Matches("foo"));
99
+ }
100
+
101
+ static int NextLength(int length) {
102
+ if (length < 10) {
103
+ length += 1;
104
+ } else if (length < 100) {
105
+ length += 10;
106
+ } else if (length < 1000) {
107
+ length += 100;
108
+ } else {
109
+ length += 1000;
110
+ }
111
+ return length;
112
+ }
113
+
114
+ TEST(BloomTest, VaryingLengths) {
115
+ char buffer[sizeof(int)];
116
+
117
+ // Count number of filters that significantly exceed the false positive rate
118
+ int mediocre_filters = 0;
119
+ int good_filters = 0;
120
+
121
+ for (int length = 1; length <= 10000; length = NextLength(length)) {
122
+ Reset();
123
+ for (int i = 0; i < length; i++) {
124
+ Add(Key(i, buffer));
125
+ }
126
+ Build();
127
+
128
+ ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
129
+
130
+ // All added keys must match
131
+ for (int i = 0; i < length; i++) {
132
+ ASSERT_TRUE(Matches(Key(i, buffer)))
133
+ << "Length " << length << "; key " << i;
134
+ }
135
+
136
+ // Check false positive rate
137
+ double rate = FalsePositiveRate();
138
+ if (kVerbose >= 1) {
139
+ fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
140
+ rate*100.0, length, static_cast<int>(FilterSize()));
141
+ }
142
+ ASSERT_LE(rate, 0.02); // Must not be over 2%
143
+ if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
144
+ else good_filters++;
145
+ }
146
+ if (kVerbose >= 1) {
147
+ fprintf(stderr, "Filters: %d good, %d mediocre\n",
148
+ good_filters, mediocre_filters);
149
+ }
150
+ ASSERT_LE(mediocre_filters, good_filters/5);
151
+ }
152
+
153
+ // Different bits-per-byte
154
+
155
+ } // namespace leveldb
156
+
157
+ int main(int argc, char** argv) {
158
+ return leveldb::test::RunAllTests();
159
+ }