leveldb-ruby 0.14 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/LICENSE +24 -0
  2. data/README +60 -16
  3. data/ext/leveldb/extconf.rb +1 -1
  4. data/ext/leveldb/leveldb.cc +187 -18
  5. data/leveldb/Makefile +82 -96
  6. data/leveldb/build_detect_platform +137 -51
  7. data/leveldb/db/c.cc +110 -0
  8. data/leveldb/db/db_bench.cc +105 -4
  9. data/leveldb/db/db_impl.cc +135 -45
  10. data/leveldb/db/db_impl.h +12 -10
  11. data/leveldb/db/db_test.cc +666 -431
  12. data/leveldb/db/dbformat.cc +20 -0
  13. data/leveldb/db/dbformat.h +12 -0
  14. data/leveldb/db/repair.cc +3 -1
  15. data/leveldb/db/skiplist.h +2 -1
  16. data/leveldb/db/table_cache.cc +42 -16
  17. data/leveldb/db/table_cache.h +11 -0
  18. data/leveldb/db/version_set.cc +46 -41
  19. data/leveldb/db/version_set.h +9 -0
  20. data/leveldb/db/write_batch.cc +13 -4
  21. data/leveldb/db/write_batch_internal.h +2 -0
  22. data/leveldb/db/write_batch_test.cc +31 -0
  23. data/leveldb/include/leveldb/c.h +29 -0
  24. data/leveldb/include/leveldb/db.h +2 -1
  25. data/leveldb/include/leveldb/filter_policy.h +70 -0
  26. data/leveldb/include/leveldb/options.h +8 -0
  27. data/leveldb/include/leveldb/status.h +6 -0
  28. data/leveldb/include/leveldb/table.h +15 -0
  29. data/leveldb/include/leveldb/table_builder.h +1 -0
  30. data/leveldb/port/atomic_pointer.h +13 -5
  31. data/leveldb/port/port.h +0 -2
  32. data/leveldb/port/port_example.h +10 -0
  33. data/leveldb/port/port_posix.cc +4 -0
  34. data/leveldb/port/port_posix.h +24 -9
  35. data/leveldb/table/block.cc +8 -4
  36. data/leveldb/table/block.h +3 -2
  37. data/leveldb/table/filter_block.cc +111 -0
  38. data/leveldb/table/filter_block.h +68 -0
  39. data/leveldb/table/filter_block_test.cc +128 -0
  40. data/leveldb/table/format.cc +17 -7
  41. data/leveldb/table/format.h +9 -4
  42. data/leveldb/table/table.cc +107 -6
  43. data/leveldb/table/table_builder.cc +49 -6
  44. data/leveldb/table/table_test.cc +8 -24
  45. data/leveldb/util/bloom.cc +95 -0
  46. data/leveldb/util/bloom_test.cc +159 -0
  47. data/leveldb/util/coding_test.cc +23 -0
  48. data/leveldb/util/comparator.cc +8 -3
  49. data/leveldb/util/env_posix.cc +46 -4
  50. data/leveldb/util/filter_policy.cc +11 -0
  51. data/leveldb/util/options.cc +2 -1
  52. data/lib/leveldb.rb +31 -5
  53. metadata +227 -109
  54. data/leveldb/port/port_android.cc +0 -64
  55. data/leveldb/port/port_android.h +0 -156
@@ -5,14 +5,15 @@
5
5
  #include "leveldb/table_builder.h"
6
6
 
7
7
  #include <assert.h>
8
- #include <stdio.h>
9
8
  #include "leveldb/comparator.h"
10
9
  #include "leveldb/env.h"
10
+ #include "leveldb/filter_policy.h"
11
+ #include "leveldb/options.h"
11
12
  #include "table/block_builder.h"
13
+ #include "table/filter_block.h"
12
14
  #include "table/format.h"
13
15
  #include "util/coding.h"
14
16
  #include "util/crc32c.h"
15
- #include "util/logging.h"
16
17
 
17
18
  namespace leveldb {
18
19
 
@@ -27,6 +28,7 @@ struct TableBuilder::Rep {
27
28
  std::string last_key;
28
29
  int64_t num_entries;
29
30
  bool closed; // Either Finish() or Abandon() has been called.
31
+ FilterBlockBuilder* filter_block;
30
32
 
31
33
  // We do not emit the index entry for a block until we have seen the
32
34
  // first key for the next data block. This allows us to use shorter
@@ -51,6 +53,8 @@ struct TableBuilder::Rep {
51
53
  index_block(&index_block_options),
52
54
  num_entries(0),
53
55
  closed(false),
56
+ filter_block(opt.filter_policy == NULL ? NULL
57
+ : new FilterBlockBuilder(opt.filter_policy)),
54
58
  pending_index_entry(false) {
55
59
  index_block_options.block_restart_interval = 1;
56
60
  }
@@ -58,10 +62,14 @@ struct TableBuilder::Rep {
58
62
 
59
63
  TableBuilder::TableBuilder(const Options& options, WritableFile* file)
60
64
  : rep_(new Rep(options, file)) {
65
+ if (rep_->filter_block != NULL) {
66
+ rep_->filter_block->StartBlock(0);
67
+ }
61
68
  }
62
69
 
63
70
  TableBuilder::~TableBuilder() {
64
71
  assert(rep_->closed); // Catch errors where caller forgot to call Finish()
72
+ delete rep_->filter_block;
65
73
  delete rep_;
66
74
  }
67
75
 
@@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
98
106
  r->pending_index_entry = false;
99
107
  }
100
108
 
109
+ if (r->filter_block != NULL) {
110
+ r->filter_block->AddKey(key);
111
+ }
112
+
101
113
  r->last_key.assign(key.data(), key.size());
102
114
  r->num_entries++;
103
115
  r->data_block.Add(key, value);
@@ -119,6 +131,9 @@ void TableBuilder::Flush() {
119
131
  r->pending_index_entry = true;
120
132
  r->status = r->file->Flush();
121
133
  }
134
+ if (r->filter_block != NULL) {
135
+ r->filter_block->StartBlock(r->offset);
136
+ }
122
137
  }
123
138
 
124
139
  void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
@@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
152
167
  break;
153
168
  }
154
169
  }
170
+ WriteRawBlock(block_contents, type, handle);
171
+ r->compressed_output.clear();
172
+ block->Reset();
173
+ }
174
+
175
+ void TableBuilder::WriteRawBlock(const Slice& block_contents,
176
+ CompressionType type,
177
+ BlockHandle* handle) {
178
+ Rep* r = rep_;
155
179
  handle->set_offset(r->offset);
156
180
  handle->set_size(block_contents.size());
157
181
  r->status = r->file->Append(block_contents);
@@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
166
190
  r->offset += block_contents.size() + kBlockTrailerSize;
167
191
  }
168
192
  }
169
- r->compressed_output.clear();
170
- block->Reset();
171
193
  }
172
194
 
173
195
  Status TableBuilder::status() const {
@@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
179
201
  Flush();
180
202
  assert(!r->closed);
181
203
  r->closed = true;
182
- BlockHandle metaindex_block_handle;
183
- BlockHandle index_block_handle;
204
+
205
+ BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
206
+
207
+ // Write filter block
208
+ if (ok() && r->filter_block != NULL) {
209
+ WriteRawBlock(r->filter_block->Finish(), kNoCompression,
210
+ &filter_block_handle);
211
+ }
212
+
213
+ // Write metaindex block
184
214
  if (ok()) {
185
215
  BlockBuilder meta_index_block(&r->options);
216
+ if (r->filter_block != NULL) {
217
+ // Add mapping from "filter.Name" to location of filter data
218
+ std::string key = "filter.";
219
+ key.append(r->options.filter_policy->Name());
220
+ std::string handle_encoding;
221
+ filter_block_handle.EncodeTo(&handle_encoding);
222
+ meta_index_block.Add(key, handle_encoding);
223
+ }
224
+
186
225
  // TODO(postrelease): Add stats and other meta blocks
187
226
  WriteBlock(&meta_index_block, &metaindex_block_handle);
188
227
  }
228
+
229
+ // Write index block
189
230
  if (ok()) {
190
231
  if (r->pending_index_entry) {
191
232
  r->options.comparator->FindShortSuccessor(&r->last_key);
@@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
196
237
  }
197
238
  WriteBlock(&r->index_block, &index_block_handle);
198
239
  }
240
+
241
+ // Write footer
199
242
  if (ok()) {
200
243
  Footer footer;
201
244
  footer.set_metaindex_handle(metaindex_block_handle);
@@ -168,8 +168,6 @@ class Constructor {
168
168
  // Construct the data structure from the data in "data"
169
169
  virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
170
170
 
171
- virtual size_t NumBytes() const = 0;
172
-
173
171
  virtual Iterator* NewIterator() const = 0;
174
172
 
175
173
  virtual const KVMap& data() { return data_; }
@@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
185
183
  explicit BlockConstructor(const Comparator* cmp)
186
184
  : Constructor(cmp),
187
185
  comparator_(cmp),
188
- block_size_(-1),
189
186
  block_(NULL) { }
190
187
  ~BlockConstructor() {
191
188
  delete block_;
@@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
201
198
  builder.Add(it->first, it->second);
202
199
  }
203
200
  // Open the block
204
- Slice block_data = builder.Finish();
205
- block_size_ = block_data.size();
206
- char* block_data_copy = new char[block_size_];
207
- memcpy(block_data_copy, block_data.data(), block_size_);
208
- block_ = new Block(block_data_copy, block_size_);
201
+ data_ = builder.Finish().ToString();
202
+ BlockContents contents;
203
+ contents.data = data_;
204
+ contents.cachable = false;
205
+ contents.heap_allocated = false;
206
+ block_ = new Block(contents);
209
207
  return Status::OK();
210
208
  }
211
- virtual size_t NumBytes() const { return block_size_; }
212
-
213
209
  virtual Iterator* NewIterator() const {
214
210
  return block_->NewIterator(comparator_);
215
211
  }
216
212
 
217
213
  private:
218
214
  const Comparator* comparator_;
219
- int block_size_;
215
+ std::string data_;
220
216
  Block* block_;
221
217
 
222
218
  BlockConstructor();
@@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
253
249
  table_options.comparator = options.comparator;
254
250
  return Table::Open(table_options, source_, sink.contents().size(), &table_);
255
251
  }
256
- virtual size_t NumBytes() const { return source_->Size(); }
257
252
 
258
253
  virtual Iterator* NewIterator() const {
259
254
  return table_->NewIterator(ReadOptions());
@@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
342
337
  }
343
338
  return Status::OK();
344
339
  }
345
- virtual size_t NumBytes() const {
346
- return memtable_->ApproximateMemoryUsage();
347
- }
348
-
349
340
  virtual Iterator* NewIterator() const {
350
341
  return new KeyConvertingIterator(memtable_->NewIterator());
351
342
  }
@@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
379
370
  }
380
371
  return Status::OK();
381
372
  }
382
- virtual size_t NumBytes() const {
383
- Range r("", "\xff\xff");
384
- uint64_t size;
385
- db_->GetApproximateSizes(&r, 1, &size);
386
- return size;
387
- }
388
-
389
373
  virtual Iterator* NewIterator() const {
390
374
  return db_->NewIterator(ReadOptions());
391
375
  }
@@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
809
793
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"), 210000, 211000));
810
794
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"), 510000, 511000));
811
795
  ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"), 510000, 511000));
812
- ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 611000));
796
+ ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 610000, 612000));
813
797
 
814
798
  }
815
799
 
@@ -0,0 +1,95 @@
1
+ // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "leveldb/filter_policy.h"
6
+
7
+ #include "leveldb/slice.h"
8
+ #include "util/hash.h"
9
+
10
+ namespace leveldb {
11
+
12
+ namespace {
13
+ static uint32_t BloomHash(const Slice& key) {
14
+ return Hash(key.data(), key.size(), 0xbc9f1d34);
15
+ }
16
+
17
+ class BloomFilterPolicy : public FilterPolicy {
18
+ private:
19
+ size_t bits_per_key_;
20
+ size_t k_;
21
+
22
+ public:
23
+ explicit BloomFilterPolicy(int bits_per_key)
24
+ : bits_per_key_(bits_per_key) {
25
+ // We intentionally round down to reduce probing cost a little bit
26
+ k_ = static_cast<size_t>(bits_per_key * 0.69); // 0.69 =~ ln(2)
27
+ if (k_ < 1) k_ = 1;
28
+ if (k_ > 30) k_ = 30;
29
+ }
30
+
31
+ virtual const char* Name() const {
32
+ return "leveldb.BuiltinBloomFilter";
33
+ }
34
+
35
+ virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
36
+ // Compute bloom filter size (in both bits and bytes)
37
+ size_t bits = n * bits_per_key_;
38
+
39
+ // For small n, we can see a very high false positive rate. Fix it
40
+ // by enforcing a minimum bloom filter length.
41
+ if (bits < 64) bits = 64;
42
+
43
+ size_t bytes = (bits + 7) / 8;
44
+ bits = bytes * 8;
45
+
46
+ const size_t init_size = dst->size();
47
+ dst->resize(init_size + bytes, 0);
48
+ dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
49
+ char* array = &(*dst)[init_size];
50
+ for (size_t i = 0; i < n; i++) {
51
+ // Use double-hashing to generate a sequence of hash values.
52
+ // See analysis in [Kirsch,Mitzenmacher 2006].
53
+ uint32_t h = BloomHash(keys[i]);
54
+ const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
55
+ for (size_t j = 0; j < k_; j++) {
56
+ const uint32_t bitpos = h % bits;
57
+ array[bitpos/8] |= (1 << (bitpos % 8));
58
+ h += delta;
59
+ }
60
+ }
61
+ }
62
+
63
+ virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
64
+ const size_t len = bloom_filter.size();
65
+ if (len < 2) return false;
66
+
67
+ const char* array = bloom_filter.data();
68
+ const size_t bits = (len - 1) * 8;
69
+
70
+ // Use the encoded k so that we can read filters generated by
71
+ // bloom filters created using different parameters.
72
+ const size_t k = array[len-1];
73
+ if (k > 30) {
74
+ // Reserved for potentially new encodings for short bloom filters.
75
+ // Consider it a match.
76
+ return true;
77
+ }
78
+
79
+ uint32_t h = BloomHash(key);
80
+ const uint32_t delta = (h >> 17) | (h << 15); // Rotate right 17 bits
81
+ for (size_t j = 0; j < k; j++) {
82
+ const uint32_t bitpos = h % bits;
83
+ if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
84
+ h += delta;
85
+ }
86
+ return true;
87
+ }
88
+ };
89
+ }
90
+
91
+ const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
92
+ return new BloomFilterPolicy(bits_per_key);
93
+ }
94
+
95
+ } // namespace leveldb
@@ -0,0 +1,159 @@
1
+ // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
2
+ // Use of this source code is governed by a BSD-style license that can be
3
+ // found in the LICENSE file. See the AUTHORS file for names of contributors.
4
+
5
+ #include "leveldb/filter_policy.h"
6
+
7
+ #include "util/logging.h"
8
+ #include "util/testharness.h"
9
+ #include "util/testutil.h"
10
+
11
+ namespace leveldb {
12
+
13
+ static const int kVerbose = 1;
14
+
15
+ static Slice Key(int i, char* buffer) {
16
+ memcpy(buffer, &i, sizeof(i));
17
+ return Slice(buffer, sizeof(i));
18
+ }
19
+
20
+ class BloomTest {
21
+ private:
22
+ const FilterPolicy* policy_;
23
+ std::string filter_;
24
+ std::vector<std::string> keys_;
25
+
26
+ public:
27
+ BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
28
+
29
+ ~BloomTest() {
30
+ delete policy_;
31
+ }
32
+
33
+ void Reset() {
34
+ keys_.clear();
35
+ filter_.clear();
36
+ }
37
+
38
+ void Add(const Slice& s) {
39
+ keys_.push_back(s.ToString());
40
+ }
41
+
42
+ void Build() {
43
+ std::vector<Slice> key_slices;
44
+ for (size_t i = 0; i < keys_.size(); i++) {
45
+ key_slices.push_back(Slice(keys_[i]));
46
+ }
47
+ filter_.clear();
48
+ policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
49
+ keys_.clear();
50
+ if (kVerbose >= 2) DumpFilter();
51
+ }
52
+
53
+ size_t FilterSize() const {
54
+ return filter_.size();
55
+ }
56
+
57
+ void DumpFilter() {
58
+ fprintf(stderr, "F(");
59
+ for (size_t i = 0; i+1 < filter_.size(); i++) {
60
+ const unsigned int c = static_cast<unsigned int>(filter_[i]);
61
+ for (int j = 0; j < 8; j++) {
62
+ fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
63
+ }
64
+ }
65
+ fprintf(stderr, ")\n");
66
+ }
67
+
68
+ bool Matches(const Slice& s) {
69
+ if (!keys_.empty()) {
70
+ Build();
71
+ }
72
+ return policy_->KeyMayMatch(s, filter_);
73
+ }
74
+
75
+ double FalsePositiveRate() {
76
+ char buffer[sizeof(int)];
77
+ int result = 0;
78
+ for (int i = 0; i < 10000; i++) {
79
+ if (Matches(Key(i + 1000000000, buffer))) {
80
+ result++;
81
+ }
82
+ }
83
+ return result / 10000.0;
84
+ }
85
+ };
86
+
87
+ TEST(BloomTest, EmptyFilter) {
88
+ ASSERT_TRUE(! Matches("hello"));
89
+ ASSERT_TRUE(! Matches("world"));
90
+ }
91
+
92
+ TEST(BloomTest, Small) {
93
+ Add("hello");
94
+ Add("world");
95
+ ASSERT_TRUE(Matches("hello"));
96
+ ASSERT_TRUE(Matches("world"));
97
+ ASSERT_TRUE(! Matches("x"));
98
+ ASSERT_TRUE(! Matches("foo"));
99
+ }
100
+
101
+ static int NextLength(int length) {
102
+ if (length < 10) {
103
+ length += 1;
104
+ } else if (length < 100) {
105
+ length += 10;
106
+ } else if (length < 1000) {
107
+ length += 100;
108
+ } else {
109
+ length += 1000;
110
+ }
111
+ return length;
112
+ }
113
+
114
+ TEST(BloomTest, VaryingLengths) {
115
+ char buffer[sizeof(int)];
116
+
117
+ // Count number of filters that significantly exceed the false positive rate
118
+ int mediocre_filters = 0;
119
+ int good_filters = 0;
120
+
121
+ for (int length = 1; length <= 10000; length = NextLength(length)) {
122
+ Reset();
123
+ for (int i = 0; i < length; i++) {
124
+ Add(Key(i, buffer));
125
+ }
126
+ Build();
127
+
128
+ ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
129
+
130
+ // All added keys must match
131
+ for (int i = 0; i < length; i++) {
132
+ ASSERT_TRUE(Matches(Key(i, buffer)))
133
+ << "Length " << length << "; key " << i;
134
+ }
135
+
136
+ // Check false positive rate
137
+ double rate = FalsePositiveRate();
138
+ if (kVerbose >= 1) {
139
+ fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
140
+ rate*100.0, length, static_cast<int>(FilterSize()));
141
+ }
142
+ ASSERT_LE(rate, 0.02); // Must not be over 2%
143
+ if (rate > 0.0125) mediocre_filters++; // Allowed, but not too often
144
+ else good_filters++;
145
+ }
146
+ if (kVerbose >= 1) {
147
+ fprintf(stderr, "Filters: %d good, %d mediocre\n",
148
+ good_filters, mediocre_filters);
149
+ }
150
+ ASSERT_LE(mediocre_filters, good_filters/5);
151
+ }
152
+
153
+ // Different bits-per-byte
154
+
155
+ } // namespace leveldb
156
+
157
+ int main(int argc, char** argv) {
158
+ return leveldb::test::RunAllTests();
159
+ }