RubyGems - leveldb - Versions diffs - 0.1.3 → 0.1.4 - Mend

leveldb 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/README.md +71 -2
data/ext/leveldb/Makefile +5 -1
data/ext/leveldb/db/autocompact_test.cc +118 -0
data/ext/leveldb/db/corruption_test.cc +22 -29
data/ext/leveldb/db/db_impl.cc +27 -14
data/ext/leveldb/db/db_impl.h +8 -1
data/ext/leveldb/db/db_iter.cc +29 -12
data/ext/leveldb/db/db_iter.h +5 -3
data/ext/leveldb/db/dbformat.h +3 -0
data/ext/leveldb/db/version_set.cc +92 -4
data/ext/leveldb/db/version_set.h +15 -0
data/ext/leveldb/include/leveldb/db.h +1 -1
data/ext/leveldb/util/env_posix.cc +32 -1
data/ext/leveldb/util/random.h +6 -1
data/lib/leveldb/db.rb +39 -17
data/lib/leveldb/version.rb +1 -1
metadata +4 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 45ece05a2078c65923a11be951e9bd4ed6065d52
-  data.tar.gz: 1e0067e76973f4562a84a8831cfb79e77289292a
+  metadata.gz: 8ac9eef1bb10dc5b82a7ea8cc6f4c2d64cc5d04c
+  data.tar.gz: 04748952f04ef49c0c5622712ade7f4b3fc211e8
 SHA512:
-  metadata.gz: d82cbd5f00b07320b1f02d01964b0f46c4dcf494e65dac1156e18aff8079c8ebddef747cabe1266111f3c5f88d2bd58c5142149a24cc2f07201e337688b54180
-  data.tar.gz: 00d47b0cf131dcf9edd6d2ddbf2fc4aaa64eab36179f0a5994fd0a3792067070b54e7fe79bfbdb06f3d9133f0e203c94b27320f269060ce978ef7bf9676eeabf
+  metadata.gz: 06f097cb8df8f2f5679dfced115ee73236caa4770aea85302d54c58dbf9d451982356e404cd3050ed8413df1bd7e4bd6379e5a7789ef2f5b81db48f216f40346
+  data.tar.gz: 2f846938f51b43632a5c9629ea618eccbcdabc42ccba33f63fe0e8a6ade6dbf026485c9461b8b99ab7cce1400135e15d1ba29311c58496963d90b99a9337dbf8

data/README.md CHANGED Viewed

@@ -36,8 +36,9 @@ embedded database. LevelDB is a persistent ordered map.
     $ brew install snappy
     $ git clone git://github.com/DAddYE/leveldb.git
     $ cd leveldb
-    $ rake compile
-    $ rake console
+    $ bundle install
+    $ bundle exec rake compile
+    $ bundle exec rake console
 ### Standard
@@ -130,6 +131,74 @@ db.read_property('leveldb.stats')
 db.stats
 ```
+## Benchmarks
+_Preface_: those are only for general purpose, I know that [zedshaw](http://zedshaw.com/essays/programmer_stats.html)
+will kill me for this, but ... on my mac:
+    Model Identifier:	MacBookPro10,1
+    Processor Name:	Intel Core i7
+    Processor Speed:	2.3 GHz
+    Number of Processors:	1
+    Total Number of Cores:	4
+    L2 Cache (per Core):	256 KB
+    L3 Cache:	6 MB
+    Memory:	8 GB
+The benchmark code is in [benchmark/leveldb.rb](/benchmark/leveldb.rb)
+Writing/Reading `100mb` of _very_ random data of `10kb` each:
+### Without compression:
+          user     system      total        real
+    put  0.530000   0.310000   0.840000 (  1.420387)
+    get  0.800000   0.460000   1.260000 (  2.626631)
+    Level  Files Size(MB) Time(sec) Read(MB) Write(MB)
+    --------------------------------------------------
+      0        1        0         0        0         0
+      2       50       98         0        0         0
+      3        1        2         0        0         0
+### With compression:
+          user     system      total        real
+    put  0.850000   0.320000   1.170000 (  1.721609)
+    get  1.160000   0.480000   1.640000 (  2.703543)
+    Level  Files Size(MB) Time(sec) Read(MB) Write(MB)
+    --------------------------------------------------
+      0        1        0         0        0         0
+      1        5       10         0        0         0
+      2       45       90         0        0         0
+**NOTE**: as you can see `snappy` can't compress that kind of _very very_
+random data, but I was not interested to bench snappy (as a compressor) but
+only to see how (eventually) much _slower_ will be using it. As you can see,
+only a _few_ and on normal _data_ the db size will be much much better!
+### With batch:
+          user     system      total        real
+    put  0.260000   0.170000   0.430000 (  0.433407)
+    Level  Files Size(MB) Time(sec) Read(MB) Write(MB)
+    --------------------------------------------------
+      0        1      100         1        0       100
+## Difference between a c++ pure ruby impl?
+This, again, only for general purpose, but I want to compare the `c++` implementation
+of [leveldb-ruby](https://github.com/wmorgan/leveldb-ruby) with this that use ffi.
+I'm aware that this lib is 1 year older, but for those who cares, the basic bench:
+          user     system      total        real
+    put  0.440000   0.300000   0.740000 (  1.363188)
+    get  0.440000   0.440000   1.460000 (  2.407274)
 ## Todo
 1. Add pluggable serializers

data/ext/leveldb/Makefile CHANGED Viewed

@@ -31,6 +31,7 @@ TESTHARNESS = ./util/testharness.o $(TESTUTIL)
 TESTS = \
 	arena_test \
+	autocompact_test \
 	bloom_test \
 	c_test \
 	cache_test \
@@ -70,7 +71,7 @@ SHARED = $(SHARED1)
 else
 # Update db.h if you change these.
 SHARED_MAJOR = 1
-SHARED_MINOR = 12
+SHARED_MINOR = 13
 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
 SHARED2 = $(SHARED1).$(SHARED_MAJOR)
 SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
@@ -114,6 +115,9 @@ leveldbutil: db/leveldb_main.o $(LIBOBJECTS)
 arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
 bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)

data/ext/leveldb/db/autocompact_test.cc ADDED Viewed

@@ -0,0 +1,118 @@
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#include "leveldb/db.h"
+#include "db/db_impl.h"
+#include "leveldb/cache.h"
+#include "util/testharness.h"
+#include "util/testutil.h"
+namespace leveldb {
+class AutoCompactTest {
+ public:
+  std::string dbname_;
+  Cache* tiny_cache_;
+  Options options_;
+  DB* db_;
+  AutoCompactTest() {
+    dbname_ = test::TmpDir() + "/autocompact_test";
+    tiny_cache_ = NewLRUCache(100);
+    options_.block_cache = tiny_cache_;
+    DestroyDB(dbname_, options_);
+    options_.create_if_missing = true;
+    options_.compression = kNoCompression;
+    ASSERT_OK(DB::Open(options_, dbname_, &db_));
+  }
+  ~AutoCompactTest() {
+    delete db_;
+    DestroyDB(dbname_, Options());
+    delete tiny_cache_;
+  }
+  std::string Key(int i) {
+    char buf[100];
+    snprintf(buf, sizeof(buf), "key%06d", i);
+    return std::string(buf);
+  }
+  uint64_t Size(const Slice& start, const Slice& limit) {
+    Range r(start, limit);
+    uint64_t size;
+    db_->GetApproximateSizes(&r, 1, &size);
+    return size;
+  }
+  void DoReads(int n);
+};
+static const int kValueSize = 200 * 1024;
+static const int kTotalSize = 100 * 1024 * 1024;
+static const int kCount = kTotalSize / kValueSize;
+// Read through the first n keys repeatedly and check that they get
+// compacted (verified by checking the size of the key space).
+void AutoCompactTest::DoReads(int n) {
+  std::string value(kValueSize, 'x');
+  DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
+  // Fill database
+  for (int i = 0; i < kCount; i++) {
+    ASSERT_OK(db_->Put(WriteOptions(), Key(i), value));
+  }
+  ASSERT_OK(dbi->TEST_CompactMemTable());
+  // Delete everything
+  for (int i = 0; i < kCount; i++) {
+    ASSERT_OK(db_->Delete(WriteOptions(), Key(i)));
+  }
+  ASSERT_OK(dbi->TEST_CompactMemTable());
+  // Get initial measurement of the space we will be reading.
+  const int64_t initial_size = Size(Key(0), Key(n));
+  const int64_t initial_other_size = Size(Key(n), Key(kCount));
+  // Read until size drops significantly.
+  std::string limit_key = Key(n);
+  for (int read = 0; true; read++) {
+    ASSERT_LT(read, 100) << "Taking too long to compact";
+    Iterator* iter = db_->NewIterator(ReadOptions());
+    for (iter->SeekToFirst();
+         iter->Valid() && iter->key().ToString() < limit_key;
+         iter->Next()) {
+      // Drop data
+    }
+    delete iter;
+    // Wait a little bit to allow any triggered compactions to complete.
+    Env::Default()->SleepForMicroseconds(1000000);
+    uint64_t size = Size(Key(0), Key(n));
+    fprintf(stderr, "iter %3d => %7.3f MB [other %7.3f MB]\n",
+            read+1, size/1048576.0, Size(Key(n), Key(kCount))/1048576.0);
+    if (size <= initial_size/10) {
+      break;
+    }
+  }
+  // Verify that the size of the key space not touched by the reads
+  // is pretty much unchanged.
+  const int64_t final_other_size = Size(Key(n), Key(kCount));
+  ASSERT_LE(final_other_size, initial_other_size + 1048576);
+  ASSERT_GE(final_other_size, initial_other_size/5 - 1048576);
+}
+TEST(AutoCompactTest, ReadAll) {
+  DoReads(kCount);
+}
+TEST(AutoCompactTest, ReadHalf) {
+  DoReads(kCount/2);
+}
+}  // namespace leveldb
+int main(int argc, char** argv) {
+  return leveldb::test::RunAllTests();
+}

data/ext/leveldb/db/corruption_test.cc CHANGED Viewed

@@ -35,6 +35,7 @@ class CorruptionTest {
   CorruptionTest() {
     tiny_cache_ = NewLRUCache(100);
     options_.env = &env_;
+    options_.block_cache = tiny_cache_;
     dbname_ = test::TmpDir() + "/db_test";
     DestroyDB(dbname_, options_);
@@ -50,17 +51,14 @@ class CorruptionTest {
      delete tiny_cache_;
   }
-  Status TryReopen(Options* options = NULL) {
+  Status TryReopen() {
     delete db_;
     db_ = NULL;
-    Options opt = (options ? *options : options_);
-    opt.env = &env_;
-    opt.block_cache = tiny_cache_;
-    return DB::Open(opt, dbname_, &db_);
+    return DB::Open(options_, dbname_, &db_);
   }
-  void Reopen(Options* options = NULL) {
-    ASSERT_OK(TryReopen(options));
+  void Reopen() {
+    ASSERT_OK(TryReopen());
   }
   void RepairDB() {
@@ -92,6 +90,10 @@ class CorruptionTest {
     for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
       uint64_t key;
       Slice in(iter->key());
+      if (in == "" || in == "~") {
+        // Ignore boundary keys.
+        continue;
+      }
       if (!ConsumeDecimalNumber(&in, &key) ||
           !in.empty() ||
           key < next_expected) {
@@ -233,7 +235,7 @@ TEST(CorruptionTest, TableFile) {
   dbi->TEST_CompactRange(1, NULL, NULL);
   Corrupt(kTableFile, 100, 1);
-  Check(99, 99);
+  Check(90, 99);
 }
 TEST(CorruptionTest, TableFileIndexData) {
@@ -299,7 +301,7 @@ TEST(CorruptionTest, CompactionInputError) {
   ASSERT_EQ(1, Property("leveldb.num-files-at-level" + NumberToString(last)));
   Corrupt(kTableFile, 100, 1);
-  Check(9, 9);
+  Check(5, 9);
   // Force compactions by writing lots of values
   Build(10000);
@@ -307,32 +309,23 @@ TEST(CorruptionTest, CompactionInputError) {
 }
 TEST(CorruptionTest, CompactionInputErrorParanoid) {
-  Options options;
-  options.paranoid_checks = true;
-  options.write_buffer_size = 1048576;
-  Reopen(&options);
+  options_.paranoid_checks = true;
+  options_.write_buffer_size = 512 << 10;
+  Reopen();
   DBImpl* dbi = reinterpret_cast<DBImpl*>(db_);
-  // Fill levels >= 1 so memtable compaction outputs to level 1
-  for (int level = 1; level < config::kNumLevels; level++) {
-    dbi->Put(WriteOptions(), "", "begin");
-    dbi->Put(WriteOptions(), "~", "end");
+  // Make multiple inputs so we need to compact.
+  for (int i = 0; i < 2; i++) {
+    Build(10);
     dbi->TEST_CompactMemTable();
+    Corrupt(kTableFile, 100, 1);
+    env_.SleepForMicroseconds(100000);
   }
+  dbi->CompactRange(NULL, NULL);
-  Build(10);
-  dbi->TEST_CompactMemTable();
-  ASSERT_EQ(1, Property("leveldb.num-files-at-level0"));
-  Corrupt(kTableFile, 100, 1);
-  Check(9, 9);
-  // Write must eventually fail because of corrupted table
-  Status s;
+  // Write must fail because of corrupted table
   std::string tmp1, tmp2;
-  for (int i = 0; i < 10000 && s.ok(); i++) {
-    s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2));
-  }
+  Status s = db_->Put(WriteOptions(), Key(5, &tmp1), Value(5, &tmp2));
   ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db";
 }

data/ext/leveldb/db/db_impl.cc CHANGED Viewed

@@ -113,14 +113,14 @@ Options SanitizeOptions(const std::string& dbname,
   return result;
 }
-DBImpl::DBImpl(const Options& options, const std::string& dbname)
-    : env_(options.env),
-      internal_comparator_(options.comparator),
-      internal_filter_policy_(options.filter_policy),
-      options_(SanitizeOptions(
-          dbname, &internal_comparator_, &internal_filter_policy_, options)),
-      owns_info_log_(options_.info_log != options.info_log),
-      owns_cache_(options_.block_cache != options.block_cache),
+DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
+    : env_(raw_options.env),
+      internal_comparator_(raw_options.comparator),
+      internal_filter_policy_(raw_options.filter_policy),
+      options_(SanitizeOptions(dbname, &internal_comparator_,
+                               &internal_filter_policy_, raw_options)),
+      owns_info_log_(options_.info_log != raw_options.info_log),
+      owns_cache_(options_.block_cache != raw_options.block_cache),
       dbname_(dbname),
       db_lock_(NULL),
       shutting_down_(NULL),
@@ -130,6 +130,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
       logfile_(NULL),
       logfile_number_(0),
       log_(NULL),
+      seed_(0),
       tmp_batch_(new WriteBatch),
       bg_compaction_scheduled_(false),
       manual_compaction_(NULL),
@@ -138,7 +139,7 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname)
   has_imm_.Release_Store(NULL);
   // Reserve ten files or so for other uses and give the rest to TableCache.
-  const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles;
+  const int table_cache_size = options_.max_open_files - kNumNonTableCacheFiles;
   table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
   versions_ = new VersionSet(dbname_, &options_, table_cache_,
@@ -1027,7 +1028,8 @@ static void CleanupIteratorState(void* arg1, void* arg2) {
 }  // namespace
 Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
-                                      SequenceNumber* latest_snapshot) {
+                                      SequenceNumber* latest_snapshot,
+                                      uint32_t* seed) {
   IterState* cleanup = new IterState;
   mutex_.Lock();
   *latest_snapshot = versions_->LastSequence();
@@ -1051,13 +1053,15 @@ Iterator* DBImpl::NewInternalIterator(const ReadOptions& options,
   cleanup->version = versions_->current();
   internal_iter->RegisterCleanup(CleanupIteratorState, cleanup, NULL);
+  *seed = ++seed_;
   mutex_.Unlock();
   return internal_iter;
 }
 Iterator* DBImpl::TEST_NewInternalIterator() {
   SequenceNumber ignored;
-  return NewInternalIterator(ReadOptions(), &ignored);
+  uint32_t ignored_seed;
+  return NewInternalIterator(ReadOptions(), &ignored, &ignored_seed);
 }
 int64_t DBImpl::TEST_MaxNextLevelOverlappingBytes() {
@@ -1114,12 +1118,21 @@ Status DBImpl::Get(const ReadOptions& options,
 Iterator* DBImpl::NewIterator(const ReadOptions& options) {
   SequenceNumber latest_snapshot;
-  Iterator* internal_iter = NewInternalIterator(options, &latest_snapshot);
+  uint32_t seed;
+  Iterator* iter = NewInternalIterator(options, &latest_snapshot, &seed);
   return NewDBIterator(
-      &dbname_, env_, user_comparator(), internal_iter,
+      this, user_comparator(), iter,
       (options.snapshot != NULL
        ? reinterpret_cast<const SnapshotImpl*>(options.snapshot)->number_
-       : latest_snapshot));
+       : latest_snapshot),
+      seed);
+}
+void DBImpl::RecordReadSample(Slice key) {
+  MutexLock l(&mutex_);
+  if (versions_->current()->RecordReadSample(key)) {
+    MaybeScheduleCompaction();
+  }
 }
 const Snapshot* DBImpl::GetSnapshot() {

data/ext/leveldb/db/db_impl.h CHANGED Viewed

@@ -59,13 +59,19 @@ class DBImpl : public DB {
   // file at a level >= 1.
   int64_t TEST_MaxNextLevelOverlappingBytes();
+  // Record a sample of bytes read at the specified internal key.
+  // Samples are taken approximately once every config::kReadBytesPeriod
+  // bytes.
+  void RecordReadSample(Slice key);
  private:
   friend class DB;
   struct CompactionState;
   struct Writer;
   Iterator* NewInternalIterator(const ReadOptions&,
-                                SequenceNumber* latest_snapshot);
+                                SequenceNumber* latest_snapshot,
+                                uint32_t* seed);
   Status NewDB();
@@ -135,6 +141,7 @@ class DBImpl : public DB {
   WritableFile* logfile_;
   uint64_t logfile_number_;
   log::Writer* log_;
+  uint32_t seed_;                // For sampling.
   // Queue of writers.
   std::deque<Writer*> writers_;

data/ext/leveldb/db/db_iter.cc CHANGED Viewed

@@ -5,12 +5,14 @@
 #include "db/db_iter.h"
 #include "db/filename.h"
+#include "db/db_impl.h"
 #include "db/dbformat.h"
 #include "leveldb/env.h"
 #include "leveldb/iterator.h"
 #include "port/port.h"
 #include "util/logging.h"
 #include "util/mutexlock.h"
+#include "util/random.h"
 namespace leveldb {
@@ -46,15 +48,16 @@ class DBIter: public Iterator {
     kReverse
   };
-  DBIter(const std::string* dbname, Env* env,
-         const Comparator* cmp, Iterator* iter, SequenceNumber s)
-      : dbname_(dbname),
-        env_(env),
+  DBIter(DBImpl* db, const Comparator* cmp, Iterator* iter, SequenceNumber s,
+         uint32_t seed)
+      : db_(db),
         user_comparator_(cmp),
         iter_(iter),
         sequence_(s),
         direction_(kForward),
-        valid_(false) {
+        valid_(false),
+        rnd_(seed),
+        bytes_counter_(RandomPeriod()) {
   }
   virtual ~DBIter() {
     delete iter_;
@@ -100,8 +103,12 @@ class DBIter: public Iterator {
     }
   }
-  const std::string* const dbname_;
-  Env* const env_;
+  // Pick next gap with average value of config::kReadBytesPeriod.
+  ssize_t RandomPeriod() {
+    return rnd_.Uniform(2*config::kReadBytesPeriod);
+  }
+  DBImpl* db_;
   const Comparator* const user_comparator_;
   Iterator* const iter_;
   SequenceNumber const sequence_;
@@ -112,13 +119,23 @@ class DBIter: public Iterator {
   Direction direction_;
   bool valid_;
+  Random rnd_;
+  ssize_t bytes_counter_;
   // No copying allowed
   DBIter(const DBIter&);
   void operator=(const DBIter&);
 };
 inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
-  if (!ParseInternalKey(iter_->key(), ikey)) {
+  Slice k = iter_->key();
+  ssize_t n = k.size() + iter_->value().size();
+  bytes_counter_ -= n;
+  while (bytes_counter_ < 0) {
+    bytes_counter_ += RandomPeriod();
+    db_->RecordReadSample(k);
+  }
+  if (!ParseInternalKey(k, ikey)) {
     status_ = Status::Corruption("corrupted internal key in DBIter");
     return false;
   } else {
@@ -288,12 +305,12 @@ void DBIter::SeekToLast() {
 }  // anonymous namespace
 Iterator* NewDBIterator(
-    const std::string* dbname,
-    Env* env,
+    DBImpl* db,
     const Comparator* user_key_comparator,
     Iterator* internal_iter,
-    const SequenceNumber& sequence) {
-  return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);
+    SequenceNumber sequence,
+    uint32_t seed) {
+  return new DBIter(db, user_key_comparator, internal_iter, sequence, seed);
 }
 }  // namespace leveldb

data/ext/leveldb/db/db_iter.h CHANGED Viewed

@@ -11,15 +11,17 @@
 namespace leveldb {
+class DBImpl;
 // Return a new iterator that converts internal keys (yielded by
 // "*internal_iter") that were live at the specified "sequence" number
 // into appropriate user keys.
 extern Iterator* NewDBIterator(
-    const std::string* dbname,
-    Env* env,
+    DBImpl* db,
     const Comparator* user_key_comparator,
     Iterator* internal_iter,
-    const SequenceNumber& sequence);
+    SequenceNumber sequence,
+    uint32_t seed);
 }  // namespace leveldb

data/ext/leveldb/db/dbformat.h CHANGED Viewed

@@ -38,6 +38,9 @@ static const int kL0_StopWritesTrigger = 12;
 // space if the same key space is being repeatedly overwritten.
 static const int kMaxMemCompactLevel = 2;
+// Approximate gap in bytes between samples of data read during iteration.
+static const int kReadBytesPeriod = 1048576;
 }  // namespace config
 class InternalKey;

data/ext/leveldb/db/version_set.cc CHANGED Viewed

@@ -289,6 +289,51 @@ static bool NewestFirst(FileMetaData* a, FileMetaData* b) {
   return a->number > b->number;
 }
+void Version::ForEachOverlapping(Slice user_key, Slice internal_key,
+                                 void* arg,
+                                 bool (*func)(void*, int, FileMetaData*)) {
+  // TODO(sanjay): Change Version::Get() to use this function.
+  const Comparator* ucmp = vset_->icmp_.user_comparator();
+  // Search level-0 in order from newest to oldest.
+  std::vector<FileMetaData*> tmp;
+  tmp.reserve(files_[0].size());
+  for (uint32_t i = 0; i < files_[0].size(); i++) {
+    FileMetaData* f = files_[0][i];
+    if (ucmp->Compare(user_key, f->smallest.user_key()) >= 0 &&
+        ucmp->Compare(user_key, f->largest.user_key()) <= 0) {
+      tmp.push_back(f);
+    }
+  }
+  if (!tmp.empty()) {
+    std::sort(tmp.begin(), tmp.end(), NewestFirst);
+    for (uint32_t i = 0; i < tmp.size(); i++) {
+      if (!(*func)(arg, 0, tmp[i])) {
+        return;
+      }
+    }
+  }
+  // Search other levels.
+  for (int level = 1; level < config::kNumLevels; level++) {
+    size_t num_files = files_[level].size();
+    if (num_files == 0) continue;
+    // Binary search to find earliest index whose largest key >= internal_key.
+    uint32_t index = FindFile(vset_->icmp_, files_[level], internal_key);
+    if (index < num_files) {
+      FileMetaData* f = files_[level][index];
+      if (ucmp->Compare(user_key, f->smallest.user_key()) < 0) {
+        // All of "f" is past any data for user_key
+      } else {
+        if (!(*func)(arg, level, f)) {
+          return;
+        }
+      }
+    }
+  }
+}
 Status Version::Get(const ReadOptions& options,
                     const LookupKey& k,
                     std::string* value,
@@ -401,6 +446,44 @@ bool Version::UpdateStats(const GetStats& stats) {
   return false;
 }
+bool Version::RecordReadSample(Slice internal_key) {
+  ParsedInternalKey ikey;
+  if (!ParseInternalKey(internal_key, &ikey)) {
+    return false;
+  }
+  struct State {
+    GetStats stats;  // Holds first matching file
+    int matches;
+    static bool Match(void* arg, int level, FileMetaData* f) {
+      State* state = reinterpret_cast<State*>(arg);
+      state->matches++;
+      if (state->matches == 1) {
+        // Remember first match.
+        state->stats.seek_file = f;
+        state->stats.seek_file_level = level;
+      }
+      // We can stop iterating once we have a second match.
+      return state->matches < 2;
+    }
+  };
+  State state;
+  state.matches = 0;
+  ForEachOverlapping(ikey.user_key, internal_key, &state, &State::Match);
+  // Must have at least two matches since we want to merge across
+  // files. But what if we have a single file that contains many
+  // overwrites and deletions?  Should we have another mechanism for
+  // finding such files?
+  if (state.matches >= 2) {
+    // 1MB cost is about 1 seek (see comment in Builder::Apply).
+    return UpdateStats(state.stats);
+  }
+  return false;
+}
 void Version::Ref() {
   ++refs_;
 }
@@ -435,10 +518,13 @@ int Version::PickLevelForMemTableOutput(
       if (OverlapInLevel(level + 1, &smallest_user_key, &largest_user_key)) {
         break;
       }
-      GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
-      const int64_t sum = TotalFileSize(overlaps);
-      if (sum > kMaxGrandParentOverlapBytes) {
-        break;
+      if (level + 2 < config::kNumLevels) {
+        // Check that file does not overlap too many grandparent bytes.
+        GetOverlappingInputs(level + 2, &start, &limit, &overlaps);
+        const int64_t sum = TotalFileSize(overlaps);
+        if (sum > kMaxGrandParentOverlapBytes) {
+          break;
+        }
       }
       level++;
     }
@@ -452,6 +538,8 @@ void Version::GetOverlappingInputs(
     const InternalKey* begin,
     const InternalKey* end,
     std::vector<FileMetaData*>* inputs) {
+  assert(level >= 0);
+  assert(level < config::kNumLevels);
   inputs->clear();
   Slice user_begin, user_end;
   if (begin != NULL) {

data/ext/leveldb/db/version_set.h CHANGED Viewed

@@ -78,6 +78,12 @@ class Version {
   // REQUIRES: lock is held
   bool UpdateStats(const GetStats& stats);
+  // Record a sample of bytes read at the specified internal key.
+  // Samples are taken approximately once every config::kReadBytesPeriod
+  // bytes.  Returns true if a new compaction may need to be triggered.
+  // REQUIRES: lock is held
+  bool RecordReadSample(Slice key);
   // Reference count management (so Versions do not disappear out from
   // under live iterators)
   void Ref();
@@ -114,6 +120,15 @@ class Version {
   class LevelFileNumIterator;
   Iterator* NewConcatenatingIterator(const ReadOptions&, int level) const;
+  // Call func(arg, level, f) for every file that overlaps user_key in
+  // order from newest to oldest.  If an invocation of func returns
+  // false, makes no more calls.
+  //
+  // REQUIRES: user portion of internal_key == user_key.
+  void ForEachOverlapping(Slice user_key, Slice internal_key,
+                          void* arg,
+                          bool (*func)(void*, int, FileMetaData*));
   VersionSet* vset_;            // VersionSet to which this Version belongs
   Version* next_;               // Next version in linked list
   Version* prev_;               // Previous version in linked list

data/ext/leveldb/include/leveldb/db.h CHANGED Viewed

@@ -14,7 +14,7 @@ namespace leveldb {
 // Update Makefile if you change these
 static const int kMajorVersion = 1;
-static const int kMinorVersion = 12;
+static const int kMinorVersion = 13;
 struct Options;
 struct ReadOptions;

data/ext/leveldb/util/env_posix.cc CHANGED Viewed

@@ -319,8 +319,39 @@ class PosixMmapFile : public WritableFile {
     return Status::OK();
   }
-  virtual Status Sync() {
+  Status SyncDirIfManifest() {
+    const char* f = filename_.c_str();
+    const char* sep = strrchr(f, '/');
+    Slice basename;
+    std::string dir;
+    if (sep == NULL) {
+      dir = ".";
+      basename = f;
+    } else {
+      dir = std::string(f, sep - f);
+      basename = sep + 1;
+    }
     Status s;
+    if (basename.starts_with("MANIFEST")) {
+      int fd = open(dir.c_str(), O_RDONLY);
+      if (fd < 0) {
+        s = IOError(dir, errno);
+      } else {
+        if (fsync(fd) < 0) {
+          s = IOError(dir, errno);
+        }
+        close(fd);
+      }
+    }
+    return s;
+  }
+  virtual Status Sync() {
+    // Ensure new files referred to by the manifest are in the filesystem.
+    Status s = SyncDirIfManifest();
+    if (!s.ok()) {
+      return s;
+    }
     if (pending_sync_) {
       // Some unmapped data was not synced

data/ext/leveldb/util/random.h CHANGED Viewed

@@ -16,7 +16,12 @@ class Random {
  private:
   uint32_t seed_;
  public:
-  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) { }
+  explicit Random(uint32_t s) : seed_(s & 0x7fffffffu) {
+    // Avoid bad seeds.
+    if (seed_ == 0 || seed_ == 2147483647L) {
+      seed_ = 1;
+    }
+  }
   uint32_t Next() {
     static const uint32_t M = 2147483647L;   // 2^31-1
     static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0

data/lib/leveldb/db.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module LevelDB
     class KeyError < StandardError; end
     class ClosedError < StandardError; end
-    attr_reader :path
+    attr_reader :path, :options
     @@mutex = Mutex.new
     DEFAULT = {
@@ -20,7 +20,7 @@ module LevelDB
       paranoid_checks: false,
       write_buffer_size: 4 << 20,
       block_size: 4096,
-      max_open_files: 1000,
+      max_open_files: 200,
       block_cache_size: 8 * (2 << 20),
       block_restart_interval: 16,
       compression: false,
@@ -29,27 +29,35 @@ module LevelDB
     }
     def initialize(path, options={})
+      new!(path, options)
+    end
+    def new!(path, options={})
       @_db_opts    = C.options_create
       @_write_opts = C.writeoptions_create
       @_read_opts  = C.readoptions_create
       @_read_len   = C.value('size_t')
-      options = DEFAULT.merge(options)
+      @options = DEFAULT.merge(options)
-      @_cache = C.cache_create_lru(options[:block_cache_size])
+      @_cache = C.cache_create_lru(@options[:block_cache_size])
-      C.readoptions_set_verify_checksums(@_read_opts, options[:verify_checksums] ? 1 : 0)
-      C.readoptions_set_fill_cache(@_read_opts, options[:fill_cache] ? 1 : 0)
+      C.readoptions_set_verify_checksums(@_read_opts, @options[:verify_checksums] ? 1 : 0)
+      C.readoptions_set_fill_cache(@_read_opts, @options[:fill_cache] ? 1 : 0)
-      C.options_set_create_if_missing(@_db_opts, options[:create_if_missing] ? 1 : 0)
-      C.options_set_error_if_exists(@_db_opts, options[:error_if_exists] ? 1 : 0)
-      C.options_set_paranoid_checks(@_db_opts, options[:paranoid_checks] ? 1 : 0)
-      C.options_set_write_buffer_size(@_db_opts, options[:write_buffer_size])
-      C.options_set_block_size(@_db_opts, options[:block_size])
+      C.options_set_create_if_missing(@_db_opts, @options[:create_if_missing] ? 1 : 0)
+      C.options_set_error_if_exists(@_db_opts, @options[:error_if_exists] ? 1 : 0)
+      C.options_set_paranoid_checks(@_db_opts, @options[:paranoid_checks] ? 1 : 0)
+      C.options_set_write_buffer_size(@_db_opts, @options[:write_buffer_size])
+      C.options_set_block_size(@_db_opts, @options[:block_size])
       C.options_set_cache(@_db_opts, @_cache)
-      C.options_set_max_open_files(@_db_opts, options[:max_open_files])
-      C.options_set_block_restart_interval(@_db_opts, options[:block_restart_interval])
-      C.options_set_compression(@_db_opts, options[:compression] ? 1 : 0)
+      C.options_set_max_open_files(@_db_opts, @options[:max_open_files])
+      C.options_set_block_restart_interval(@_db_opts, @options[:block_restart_interval])
+      C.options_set_compression(@_db_opts, @options[:compression] ? 1 : 0)
+      if @options[:bloom_filter_bits_per_key]
+        C.options_set_filter_policy(@_db_opts, C.filterpolicy_create_bloom(@options[:bloom_filter_bits_per_key]))
+      end
       @_db_opts.free = @_write_opts.free = @_read_opts.free = C[:options_destroy]
@@ -63,6 +71,14 @@ module LevelDB
       raise Error, error_message if errors?
     end
+    private :new!
+    def reopen
+      close unless closed?
+      @@mutex.synchronize { @_closed = false }
+      new!(@path, @options)
+    end
+    alias reopen! reopen
     def []=(key, val)
       raise ClosedError if closed?
@@ -81,12 +97,13 @@ module LevelDB
     def [](key)
       raise ClosedError if closed?
-      key  = key.to_s
-      val  = C.get(@_db, @_read_opts, key, key.size, @_read_len, @_err)
+      key = key.to_s
+      val = C.get(@_db, @_read_opts, key, key.size, @_read_len, @_err)
+      val.free = C[:free]
       raise Error, error_message if errors?
-      @_read_len.value == 0 ? nil : val.to_s(@_read_len.value)
+      @_read_len.value == 0 ? nil : val.to_s(@_read_len.value).clone
     end
     alias get []
@@ -189,6 +206,11 @@ module LevelDB
       true
     end
+    def destroy!
+      close && destroy && reopen
+    end
+    alias clear! destroy!
     def read_property(name)
       raise ClosedError if closed?

data/lib/leveldb/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module LevelDB
-  VERSION = '0.1.3'
+  VERSION = '0.1.4'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: leveldb
 version: !ruby/object:Gem::Version
-  version: 0.1.3
+  version: 0.1.4
 platform: ruby
 authors:
 - DAddYE
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-06-25 00:00:00.000000000 Z
+date: 2013-09-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: fiddler-rb
@@ -90,6 +90,7 @@ extra_rdoc_files: []
 files:
 - ext/Rakefile
 - ext/leveldb/db/c_test.c
+- ext/leveldb/db/autocompact_test.cc
 - ext/leveldb/db/builder.cc
 - ext/leveldb/db/c.cc
 - ext/leveldb/db/corruption_test.cc
@@ -244,7 +245,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.3
+rubygems_version: 2.1.2
 signing_key:
 specification_version: 4
 summary: LevelDB for Ruby