RubyGems - leveldb-ruby - Versions diffs - 0.14 → 0.15 - Mend

leveldb-ruby 0.14 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

data/LICENSE +24 -0
data/README +60 -16
data/ext/leveldb/extconf.rb +1 -1
data/ext/leveldb/leveldb.cc +187 -18
data/leveldb/Makefile +82 -96
data/leveldb/build_detect_platform +137 -51
data/leveldb/db/c.cc +110 -0
data/leveldb/db/db_bench.cc +105 -4
data/leveldb/db/db_impl.cc +135 -45
data/leveldb/db/db_impl.h +12 -10
data/leveldb/db/db_test.cc +666 -431
data/leveldb/db/dbformat.cc +20 -0
data/leveldb/db/dbformat.h +12 -0
data/leveldb/db/repair.cc +3 -1
data/leveldb/db/skiplist.h +2 -1
data/leveldb/db/table_cache.cc +42 -16
data/leveldb/db/table_cache.h +11 -0
data/leveldb/db/version_set.cc +46 -41
data/leveldb/db/version_set.h +9 -0
data/leveldb/db/write_batch.cc +13 -4
data/leveldb/db/write_batch_internal.h +2 -0
data/leveldb/db/write_batch_test.cc +31 -0
data/leveldb/include/leveldb/c.h +29 -0
data/leveldb/include/leveldb/db.h +2 -1
data/leveldb/include/leveldb/filter_policy.h +70 -0
data/leveldb/include/leveldb/options.h +8 -0
data/leveldb/include/leveldb/status.h +6 -0
data/leveldb/include/leveldb/table.h +15 -0
data/leveldb/include/leveldb/table_builder.h +1 -0
data/leveldb/port/atomic_pointer.h +13 -5
data/leveldb/port/port.h +0 -2
data/leveldb/port/port_example.h +10 -0
data/leveldb/port/port_posix.cc +4 -0
data/leveldb/port/port_posix.h +24 -9
data/leveldb/table/block.cc +8 -4
data/leveldb/table/block.h +3 -2
data/leveldb/table/filter_block.cc +111 -0
data/leveldb/table/filter_block.h +68 -0
data/leveldb/table/filter_block_test.cc +128 -0
data/leveldb/table/format.cc +17 -7
data/leveldb/table/format.h +9 -4
data/leveldb/table/table.cc +107 -6
data/leveldb/table/table_builder.cc +49 -6
data/leveldb/table/table_test.cc +8 -24
data/leveldb/util/bloom.cc +95 -0
data/leveldb/util/bloom_test.cc +159 -0
data/leveldb/util/coding_test.cc +23 -0
data/leveldb/util/comparator.cc +8 -3
data/leveldb/util/env_posix.cc +46 -4
data/leveldb/util/filter_policy.cc +11 -0
data/leveldb/util/options.cc +2 -1
data/lib/leveldb.rb +31 -5
metadata +227 -109
data/leveldb/port/port_android.cc +0 -64
data/leveldb/port/port_android.h +0 -156

data/leveldb/table/table_builder.cc CHANGED

@@ -5,14 +5,15 @@
 #include "leveldb/table_builder.h"
 #include <assert.h>
-#include <stdio.h>
 #include "leveldb/comparator.h"
 #include "leveldb/env.h"
+#include "leveldb/filter_policy.h"
+#include "leveldb/options.h"
 #include "table/block_builder.h"
+#include "table/filter_block.h"
 #include "table/format.h"
 #include "util/coding.h"
 #include "util/crc32c.h"
-#include "util/logging.h"
 namespace leveldb {
@@ -27,6 +28,7 @@ struct TableBuilder::Rep {
   std::string last_key;
   int64_t num_entries;
   bool closed;          // Either Finish() or Abandon() has been called.
+  FilterBlockBuilder* filter_block;
   // We do not emit the index entry for a block until we have seen the
   // first key for the next data block.  This allows us to use shorter
@@ -51,6 +53,8 @@ struct TableBuilder::Rep {
         index_block(&index_block_options),
         num_entries(0),
         closed(false),
+        filter_block(opt.filter_policy == NULL ? NULL
+                     : new FilterBlockBuilder(opt.filter_policy)),
         pending_index_entry(false) {
     index_block_options.block_restart_interval = 1;
   }
@@ -58,10 +62,14 @@ struct TableBuilder::Rep {
 TableBuilder::TableBuilder(const Options& options, WritableFile* file)
     : rep_(new Rep(options, file)) {
+  if (rep_->filter_block != NULL) {
+    rep_->filter_block->StartBlock(0);
+  }
 }
 TableBuilder::~TableBuilder() {
   assert(rep_->closed);  // Catch errors where caller forgot to call Finish()
+  delete rep_->filter_block;
   delete rep_;
 }
@@ -98,6 +106,10 @@ void TableBuilder::Add(const Slice& key, const Slice& value) {
     r->pending_index_entry = false;
   }
+  if (r->filter_block != NULL) {
+    r->filter_block->AddKey(key);
+  }
   r->last_key.assign(key.data(), key.size());
   r->num_entries++;
   r->data_block.Add(key, value);
@@ -119,6 +131,9 @@ void TableBuilder::Flush() {
     r->pending_index_entry = true;
     r->status = r->file->Flush();
   }
+  if (r->filter_block != NULL) {
+    r->filter_block->StartBlock(r->offset);
+  }
 }
 void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
@@ -152,6 +167,15 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
       break;
     }
   }
+  WriteRawBlock(block_contents, type, handle);
+  r->compressed_output.clear();
+  block->Reset();
+}
+void TableBuilder::WriteRawBlock(const Slice& block_contents,
+                                 CompressionType type,
+                                 BlockHandle* handle) {
+  Rep* r = rep_;
   handle->set_offset(r->offset);
   handle->set_size(block_contents.size());
   r->status = r->file->Append(block_contents);
@@ -166,8 +190,6 @@ void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
       r->offset += block_contents.size() + kBlockTrailerSize;
     }
   }
-  r->compressed_output.clear();
-  block->Reset();
 }
 Status TableBuilder::status() const {
@@ -179,13 +201,32 @@ Status TableBuilder::Finish() {
   Flush();
   assert(!r->closed);
   r->closed = true;
-  BlockHandle metaindex_block_handle;
-  BlockHandle index_block_handle;
+  BlockHandle filter_block_handle, metaindex_block_handle, index_block_handle;
+  // Write filter block
+  if (ok() && r->filter_block != NULL) {
+    WriteRawBlock(r->filter_block->Finish(), kNoCompression,
+                  &filter_block_handle);
+  }
+  // Write metaindex block
   if (ok()) {
     BlockBuilder meta_index_block(&r->options);
+    if (r->filter_block != NULL) {
+      // Add mapping from "filter.Name" to location of filter data
+      std::string key = "filter.";
+      key.append(r->options.filter_policy->Name());
+      std::string handle_encoding;
+      filter_block_handle.EncodeTo(&handle_encoding);
+      meta_index_block.Add(key, handle_encoding);
+    }
     // TODO(postrelease): Add stats and other meta blocks
     WriteBlock(&meta_index_block, &metaindex_block_handle);
   }
+  // Write index block
   if (ok()) {
     if (r->pending_index_entry) {
       r->options.comparator->FindShortSuccessor(&r->last_key);
@@ -196,6 +237,8 @@ Status TableBuilder::Finish() {
     }
     WriteBlock(&r->index_block, &index_block_handle);
   }
+  // Write footer
   if (ok()) {
     Footer footer;
     footer.set_metaindex_handle(metaindex_block_handle);

data/leveldb/table/table_test.cc CHANGED

@@ -168,8 +168,6 @@ class Constructor {
   // Construct the data structure from the data in "data"
   virtual Status FinishImpl(const Options& options, const KVMap& data) = 0;
-  virtual size_t NumBytes() const = 0;
   virtual Iterator* NewIterator() const = 0;
   virtual const KVMap& data() { return data_; }
@@ -185,7 +183,6 @@ class BlockConstructor: public Constructor {
   explicit BlockConstructor(const Comparator* cmp)
       : Constructor(cmp),
         comparator_(cmp),
-        block_size_(-1),
         block_(NULL) { }
   ~BlockConstructor() {
     delete block_;
@@ -201,22 +198,21 @@ class BlockConstructor: public Constructor {
       builder.Add(it->first, it->second);
     }
     // Open the block
-    Slice block_data = builder.Finish();
-    block_size_ = block_data.size();
-    char* block_data_copy = new char[block_size_];
-    memcpy(block_data_copy, block_data.data(), block_size_);
-    block_ = new Block(block_data_copy, block_size_);
+    data_ = builder.Finish().ToString();
+    BlockContents contents;
+    contents.data = data_;
+    contents.cachable = false;
+    contents.heap_allocated = false;
+    block_ = new Block(contents);
     return Status::OK();
   }
-  virtual size_t NumBytes() const { return block_size_; }
   virtual Iterator* NewIterator() const {
     return block_->NewIterator(comparator_);
   }
  private:
   const Comparator* comparator_;
-  int block_size_;
+  std::string data_;
   Block* block_;
   BlockConstructor();
@@ -253,7 +249,6 @@ class TableConstructor: public Constructor {
     table_options.comparator = options.comparator;
     return Table::Open(table_options, source_, sink.contents().size(), &table_);
   }
-  virtual size_t NumBytes() const { return source_->Size(); }
   virtual Iterator* NewIterator() const {
     return table_->NewIterator(ReadOptions());
@@ -342,10 +337,6 @@ class MemTableConstructor: public Constructor {
     }
     return Status::OK();
   }
-  virtual size_t NumBytes() const {
-    return memtable_->ApproximateMemoryUsage();
-  }
   virtual Iterator* NewIterator() const {
     return new KeyConvertingIterator(memtable_->NewIterator());
   }
@@ -379,13 +370,6 @@ class DBConstructor: public Constructor {
     }
     return Status::OK();
   }
-  virtual size_t NumBytes() const {
-    Range r("", "\xff\xff");
-    uint64_t size;
-    db_->GetApproximateSizes(&r, 1, &size);
-    return size;
-  }
   virtual Iterator* NewIterator() const {
     return db_->NewIterator(ReadOptions());
   }
@@ -809,7 +793,7 @@ TEST(TableTest, ApproximateOffsetOfPlain) {
   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k05"),  210000, 211000));
   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k06"),  510000, 511000));
   ASSERT_TRUE(Between(c.ApproximateOffsetOf("k07"),  510000, 511000));
-  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 611000));
+  ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"),  610000, 612000));
 }

data/leveldb/util/bloom.cc ADDED

@@ -0,0 +1,95 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "leveldb/filter_policy.h"
+#include "leveldb/slice.h"
+#include "util/hash.h"
+namespace leveldb {
+namespace {
+static uint32_t BloomHash(const Slice& key) {
+  return Hash(key.data(), key.size(), 0xbc9f1d34);
+}
+class BloomFilterPolicy : public FilterPolicy {
+ private:
+  size_t bits_per_key_;
+  size_t k_;
+ public:
+  explicit BloomFilterPolicy(int bits_per_key)
+      : bits_per_key_(bits_per_key) {
+    // We intentionally round down to reduce probing cost a little bit
+    k_ = static_cast<size_t>(bits_per_key * 0.69);  // 0.69 =~ ln(2)
+    if (k_ < 1) k_ = 1;
+    if (k_ > 30) k_ = 30;
+  }
+  virtual const char* Name() const {
+    return "leveldb.BuiltinBloomFilter";
+  }
+  virtual void CreateFilter(const Slice* keys, int n, std::string* dst) const {
+    // Compute bloom filter size (in both bits and bytes)
+    size_t bits = n * bits_per_key_;
+    // For small n, we can see a very high false positive rate.  Fix it
+    // by enforcing a minimum bloom filter length.
+    if (bits < 64) bits = 64;
+    size_t bytes = (bits + 7) / 8;
+    bits = bytes * 8;
+    const size_t init_size = dst->size();
+    dst->resize(init_size + bytes, 0);
+    dst->push_back(static_cast<char>(k_));  // Remember # of probes in filter
+    char* array = &(*dst)[init_size];
+    for (size_t i = 0; i < n; i++) {
+      // Use double-hashing to generate a sequence of hash values.
+      // See analysis in [Kirsch,Mitzenmacher 2006].
+      uint32_t h = BloomHash(keys[i]);
+      const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+      for (size_t j = 0; j < k_; j++) {
+        const uint32_t bitpos = h % bits;
+        array[bitpos/8] |= (1 << (bitpos % 8));
+        h += delta;
+      }
+    }
+  }
+  virtual bool KeyMayMatch(const Slice& key, const Slice& bloom_filter) const {
+    const size_t len = bloom_filter.size();
+    if (len < 2) return false;
+    const char* array = bloom_filter.data();
+    const size_t bits = (len - 1) * 8;
+    // Use the encoded k so that we can read filters generated by
+    // bloom filters created using different parameters.
+    const size_t k = array[len-1];
+    if (k > 30) {
+      // Reserved for potentially new encodings for short bloom filters.
+      // Consider it a match.
+      return true;
+    }
+    uint32_t h = BloomHash(key);
+    const uint32_t delta = (h >> 17) | (h << 15);  // Rotate right 17 bits
+    for (size_t j = 0; j < k; j++) {
+      const uint32_t bitpos = h % bits;
+      if ((array[bitpos/8] & (1 << (bitpos % 8))) == 0) return false;
+      h += delta;
+    }
+    return true;
+  }
+};
+}
+const FilterPolicy* NewBloomFilterPolicy(int bits_per_key) {
+  return new BloomFilterPolicy(bits_per_key);
+}
+}  // namespace leveldb

data/leveldb/util/bloom_test.cc ADDED

@@ -0,0 +1,159 @@
+// Copyright (c) 2012 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "leveldb/filter_policy.h"
+#include "util/logging.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+namespace leveldb {
+static const int kVerbose = 1;
+static Slice Key(int i, char* buffer) {
+  memcpy(buffer, &i, sizeof(i));
+  return Slice(buffer, sizeof(i));
+}
+class BloomTest {
+ private:
+  const FilterPolicy* policy_;
+  std::string filter_;
+  std::vector<std::string> keys_;
+ public:
+  BloomTest() : policy_(NewBloomFilterPolicy(10)) { }
+  ~BloomTest() {
+    delete policy_;
+  }
+  void Reset() {
+    keys_.clear();
+    filter_.clear();
+  }
+  void Add(const Slice& s) {
+    keys_.push_back(s.ToString());
+  }
+  void Build() {
+    std::vector<Slice> key_slices;
+    for (size_t i = 0; i < keys_.size(); i++) {
+      key_slices.push_back(Slice(keys_[i]));
+    }
+    filter_.clear();
+    policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_);
+    keys_.clear();
+    if (kVerbose >= 2) DumpFilter();
+  }
+  size_t FilterSize() const {
+    return filter_.size();
+  }
+  void DumpFilter() {
+    fprintf(stderr, "F(");
+    for (size_t i = 0; i+1 < filter_.size(); i++) {
+      const unsigned int c = static_cast<unsigned int>(filter_[i]);
+      for (int j = 0; j < 8; j++) {
+        fprintf(stderr, "%c", (c & (1 <<j)) ? '1' : '.');
+      }
+    }
+    fprintf(stderr, ")\n");
+  }
+  bool Matches(const Slice& s) {
+    if (!keys_.empty()) {
+      Build();
+    }
+    return policy_->KeyMayMatch(s, filter_);
+  }
+  double FalsePositiveRate() {
+    char buffer[sizeof(int)];
+    int result = 0;
+    for (int i = 0; i < 10000; i++) {
+      if (Matches(Key(i + 1000000000, buffer))) {
+        result++;
+      }
+    }
+    return result / 10000.0;
+  }
+};
+TEST(BloomTest, EmptyFilter) {
+  ASSERT_TRUE(! Matches("hello"));
+  ASSERT_TRUE(! Matches("world"));
+}
+TEST(BloomTest, Small) {
+  Add("hello");
+  Add("world");
+  ASSERT_TRUE(Matches("hello"));
+  ASSERT_TRUE(Matches("world"));
+  ASSERT_TRUE(! Matches("x"));
+  ASSERT_TRUE(! Matches("foo"));
+}
+static int NextLength(int length) {
+  if (length < 10) {
+    length += 1;
+  } else if (length < 100) {
+    length += 10;
+  } else if (length < 1000) {
+    length += 100;
+  } else {
+    length += 1000;
+  }
+  return length;
+}
+TEST(BloomTest, VaryingLengths) {
+  char buffer[sizeof(int)];
+  // Count number of filters that significantly exceed the false positive rate
+  int mediocre_filters = 0;
+  int good_filters = 0;
+  for (int length = 1; length <= 10000; length = NextLength(length)) {
+    Reset();
+    for (int i = 0; i < length; i++) {
+      Add(Key(i, buffer));
+    }
+    Build();
+    ASSERT_LE(FilterSize(), (length * 10 / 8) + 40) << length;
+    // All added keys must match
+    for (int i = 0; i < length; i++) {
+      ASSERT_TRUE(Matches(Key(i, buffer)))
+          << "Length " << length << "; key " << i;
+    }
+    // Check false positive rate
+    double rate = FalsePositiveRate();
+    if (kVerbose >= 1) {
+      fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
+              rate*100.0, length, static_cast<int>(FilterSize()));
+    }
+    ASSERT_LE(rate, 0.02);   // Must not be over 2%
+    if (rate > 0.0125) mediocre_filters++;  // Allowed, but not too often
+    else good_filters++;
+  }
+  if (kVerbose >= 1) {
+    fprintf(stderr, "Filters: %d good, %d mediocre\n",
+            good_filters, mediocre_filters);
+  }
+  ASSERT_LE(mediocre_filters, good_filters/5);
+}
+// Different bits-per-byte
+}  // namespace leveldb
+int main(int argc, char** argv) {
+  return leveldb::test::RunAllTests();
+}