RubyGems - google_hash - Versions diffs - 0.8.1 → 0.8.2 - Mend

google_hash 0.8.1 → 0.8.2

Files changed (121) hide show

data/ext/sparsehash-2.0.2/src/sparsehash/internal/hashtable-common.h ADDED

@@ -0,0 +1,381 @@
+// Copyright (c) 2010, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// ---
+//
+// Provides classes shared by both sparse and dense hashtable.
+//
+// sh_hashtable_settings has parameters for growing and shrinking
+// a hashtable.  It also packages zero-size functor (ie. hasher).
+//
+// Other functions and classes provide common code for serializing
+// and deserializing hashtables to a stream (such as a FILE*).
+#ifndef UTIL_GTL_HASHTABLE_COMMON_H_
+#define UTIL_GTL_HASHTABLE_COMMON_H_
+#include <sparsehash/internal/sparseconfig.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stddef.h>                  // for size_t
+#include <iosfwd>
+#include <stdexcept>                 // For length_error
+_START_GOOGLE_NAMESPACE_
+template <bool> struct SparsehashCompileAssert { };
+#define SPARSEHASH_COMPILE_ASSERT(expr, msg) \
+  typedef SparsehashCompileAssert<(bool(expr))> msg[bool(expr) ? 1 : -1]
+namespace sparsehash_internal {
+// Adaptor methods for reading/writing data from an INPUT or OUPTUT
+// variable passed to serialize() or unserialize().  For now we
+// have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note
+// they are pointers, unlike typical use), or else a pointer to
+// something that supports a Read()/Write() method.
+//
+// For technical reasons, we implement read_data/write_data in two
+// stages.  The actual work is done in *_data_internal, which takes
+// the stream argument twice: once as a template type, and once with
+// normal type information.  (We only use the second version.)  We do
+// this because of how C++ picks what function overload to use.  If we
+// implemented this the naive way:
+//    bool read_data(istream* is, const void* data, size_t length);
+//    template<typename T> read_data(T* fp,  const void* data, size_t length);
+// C++ would prefer the second version for every stream type except
+// istream.  However, we want C++ to prefer the first version for
+// streams that are *subclasses* of istream, such as istringstream.
+// This is not possible given the way template types are resolved.  So
+// we split the stream argument in two, one of which is templated and
+// one of which is not.  The specialized functions (like the istream
+// version above) ignore the template arg and use the second, 'type'
+// arg, getting subclass matching as normal.  The 'catch-all'
+// functions (the second version above) use the template arg to deduce
+// the type, and use a second, void* arg to achieve the desired
+// 'catch-all' semantics.
+// ----- low-level I/O for FILE* ----
+template<typename Ignored>
+inline bool read_data_internal(Ignored*, FILE* fp,
+                               void* data, size_t length) {
+  return fread(data, length, 1, fp) == 1;
+}
+template<typename Ignored>
+inline bool write_data_internal(Ignored*, FILE* fp,
+                                const void* data, size_t length) {
+  return fwrite(data, length, 1, fp) == 1;
+}
+// ----- low-level I/O for iostream ----
+// We want the caller to be responsible for #including <iostream>, not
+// us, because iostream is a big header!  According to the standard,
+// it's only legal to delay the instantiation the way we want to if
+// the istream/ostream is a template type.  So we jump through hoops.
+template<typename ISTREAM>
+inline bool read_data_internal_for_istream(ISTREAM* fp,
+                                           void* data, size_t length) {
+  return fp->read(reinterpret_cast<char*>(data), length).good();
+}
+template<typename Ignored>
+inline bool read_data_internal(Ignored*, std::istream* fp,
+                               void* data, size_t length) {
+  return read_data_internal_for_istream(fp, data, length);
+}
+template<typename OSTREAM>
+inline bool write_data_internal_for_ostream(OSTREAM* fp,
+                                            const void* data, size_t length) {
+  return fp->write(reinterpret_cast<const char*>(data), length).good();
+}
+template<typename Ignored>
+inline bool write_data_internal(Ignored*, std::ostream* fp,
+                                const void* data, size_t length) {
+  return write_data_internal_for_ostream(fp, data, length);
+}
+// ----- low-level I/O for custom streams ----
+// The INPUT type needs to support a Read() method that takes a
+// buffer and a length and returns the number of bytes read.
+template <typename INPUT>
+inline bool read_data_internal(INPUT* fp, void*,
+                               void* data, size_t length) {
+  return static_cast<size_t>(fp->Read(data, length)) == length;
+}
+// The OUTPUT type needs to support a Write() operation that takes
+// a buffer and a length and returns the number of bytes written.
+template <typename OUTPUT>
+inline bool write_data_internal(OUTPUT* fp, void*,
+                                const void* data, size_t length) {
+  return static_cast<size_t>(fp->Write(data, length)) == length;
+}
+// ----- low-level I/O: the public API ----
+template <typename INPUT>
+inline bool read_data(INPUT* fp, void* data, size_t length) {
+  return read_data_internal(fp, fp, data, length);
+}
+template <typename OUTPUT>
+inline bool write_data(OUTPUT* fp, const void* data, size_t length) {
+  return write_data_internal(fp, fp, data, length);
+}
+// Uses read_data() and write_data() to read/write an integer.
+// length is the number of bytes to read/write (which may differ
+// from sizeof(IntType), allowing us to save on a 32-bit system
+// and load on a 64-bit system).  Excess bytes are taken to be 0.
+// INPUT and OUTPUT must match legal inputs to read/write_data (above).
+template <typename INPUT, typename IntType>
+bool read_bigendian_number(INPUT* fp, IntType* value, size_t length) {
+  *value = 0;
+  unsigned char byte;
+  // We require IntType to be unsigned or else the shifting gets all screwy.
+  SPARSEHASH_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
+                            serializing_int_requires_an_unsigned_type);
+  for (size_t i = 0; i < length; ++i) {
+    if (!read_data(fp, &byte, sizeof(byte))) return false;
+    *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
+  }
+  return true;
+}
+template <typename OUTPUT, typename IntType>
+bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length) {
+  unsigned char byte;
+  // We require IntType to be unsigned or else the shifting gets all screwy.
+  SPARSEHASH_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0),
+                            serializing_int_requires_an_unsigned_type);
+  for (size_t i = 0; i < length; ++i) {
+    byte = (sizeof(value) <= length-1 - i)
+        ? 0 : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
+    if (!write_data(fp, &byte, sizeof(byte))) return false;
+  }
+  return true;
+}
+// If your keys and values are simple enough, you can pass this
+// serializer to serialize()/unserialize().  "Simple enough" means
+// value_type is a POD type that contains no pointers.  Note,
+// however, we don't try to normalize endianness.
+// This is the type used for NopointerSerializer.
+template <typename value_type> struct pod_serializer {
+  template <typename INPUT>
+  bool operator()(INPUT* fp, value_type* value) const {
+    return read_data(fp, value, sizeof(*value));
+  }
+  template <typename OUTPUT>
+  bool operator()(OUTPUT* fp, const value_type& value) const {
+    return write_data(fp, &value, sizeof(value));
+  }
+};
+// Settings contains parameters for growing and shrinking the table.
+// It also packages zero-size functor (ie. hasher).
+//
+// It does some munging of the hash value in cases where we think
+// (fear) the original hash function might not be very good.  In
+// particular, the default hash of pointers is the identity hash,
+// so probably all the low bits are 0.  We identify when we think
+// we're hashing a pointer, and chop off the low bits.  Note this
+// isn't perfect: even when the key is a pointer, we can't tell
+// for sure that the hash is the identity hash.  If it's not, this
+// is needless work (and possibly, though not likely, harmful).
+template<typename Key, typename HashFunc,
+         typename SizeType, int HT_MIN_BUCKETS>
+class sh_hashtable_settings : public HashFunc {
+ public:
+  typedef Key key_type;
+  typedef HashFunc hasher;
+  typedef SizeType size_type;
+ public:
+  sh_hashtable_settings(const hasher& hf,
+                        const float ht_occupancy_flt,
+                        const float ht_empty_flt)
+      : hasher(hf),
+        enlarge_threshold_(0),
+        shrink_threshold_(0),
+        consider_shrink_(false),
+        use_empty_(false),
+        use_deleted_(false),
+        num_ht_copies_(0) {
+    set_enlarge_factor(ht_occupancy_flt);
+    set_shrink_factor(ht_empty_flt);
+  }
+  size_type hash(const key_type& v) const {
+    // We munge the hash value when we don't trust hasher::operator().
+    return hash_munger<Key>::MungedHash(hasher::operator()(v));
+  }
+  float enlarge_factor() const {
+    return enlarge_factor_;
+  }
+  void set_enlarge_factor(float f) {
+    enlarge_factor_ = f;
+  }
+  float shrink_factor() const {
+    return shrink_factor_;
+  }
+  void set_shrink_factor(float f) {
+    shrink_factor_ = f;
+  }
+  size_type enlarge_threshold() const {
+    return enlarge_threshold_;
+  }
+  void set_enlarge_threshold(size_type t) {
+    enlarge_threshold_ = t;
+  }
+  size_type shrink_threshold() const {
+    return shrink_threshold_;
+  }
+  void set_shrink_threshold(size_type t) {
+    shrink_threshold_ = t;
+  }
+  size_type enlarge_size(size_type x) const {
+    return static_cast<size_type>(x * enlarge_factor_);
+  }
+  size_type shrink_size(size_type x) const {
+    return static_cast<size_type>(x * shrink_factor_);
+  }
+  bool consider_shrink() const {
+    return consider_shrink_;
+  }
+  void set_consider_shrink(bool t) {
+    consider_shrink_ = t;
+  }
+  bool use_empty() const {
+    return use_empty_;
+  }
+  void set_use_empty(bool t) {
+    use_empty_ = t;
+  }
+  bool use_deleted() const {
+    return use_deleted_;
+  }
+  void set_use_deleted(bool t) {
+    use_deleted_ = t;
+  }
+  size_type num_ht_copies() const {
+    return static_cast<size_type>(num_ht_copies_);
+  }
+  void inc_num_ht_copies() {
+    ++num_ht_copies_;
+  }
+  // Reset the enlarge and shrink thresholds
+  void reset_thresholds(size_type num_buckets) {
+    set_enlarge_threshold(enlarge_size(num_buckets));
+    set_shrink_threshold(shrink_size(num_buckets));
+    // whatever caused us to reset already considered
+    set_consider_shrink(false);
+  }
+  // Caller is resposible for calling reset_threshold right after
+  // set_resizing_parameters.
+  void set_resizing_parameters(float shrink, float grow) {
+    assert(shrink >= 0.0);
+    assert(grow <= 1.0);
+    if (shrink > grow/2.0f)
+      shrink = grow / 2.0f;     // otherwise we thrash hashtable size
+    set_shrink_factor(shrink);
+    set_enlarge_factor(grow);
+  }
+  // This is the smallest size a hashtable can be without being too crowded
+  // If you like, you can give a min #buckets as well as a min #elts
+  size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) {
+    float enlarge = enlarge_factor();
+    size_type sz = HT_MIN_BUCKETS;             // min buckets allowed
+    while ( sz < min_buckets_wanted ||
+            num_elts >= static_cast<size_type>(sz * enlarge) ) {
+      // This just prevents overflowing size_type, since sz can exceed
+      // max_size() here.
+      if (static_cast<size_type>(sz * 2) < sz) {
+        throw std::length_error("resize overflow");  // protect against overflow
+      }
+      sz *= 2;
+    }
+    return sz;
+  }
+ private:
+  template<class HashKey> class hash_munger {
+   public:
+    static size_t MungedHash(size_t hash) {
+      return hash;
+    }
+  };
+  // This matches when the hashtable key is a pointer.
+  template<class HashKey> class hash_munger<HashKey*> {
+   public:
+    static size_t MungedHash(size_t hash) {
+      // TODO(csilvers): consider rotating instead:
+      //    static const int shift = (sizeof(void *) == 4) ? 2 : 3;
+      //    return (hash << (sizeof(hash) * 8) - shift)) | (hash >> shift);
+      // This matters if we ever change sparse/dense_hash_* to compare
+      // hashes before comparing actual values.  It's speedy on x86.
+      return hash / sizeof(void*);   // get rid of known-0 bits
+    }
+  };
+  size_type enlarge_threshold_;  // table.size() * enlarge_factor
+  size_type shrink_threshold_;   // table.size() * shrink_factor
+  float enlarge_factor_;         // how full before resize
+  float shrink_factor_;          // how empty before resize
+  // consider_shrink=true if we should try to shrink before next insert
+  bool consider_shrink_;
+  bool use_empty_;    // used only by densehashtable, not sparsehashtable
+  bool use_deleted_;  // false until delkey has been set
+  // num_ht_copies is a counter incremented every Copy/Move
+  unsigned int num_ht_copies_;
+};
+}  // namespace sparsehash_internal
+#undef SPARSEHASH_COMPILE_ASSERT
+_END_GOOGLE_NAMESPACE_
+#endif  // UTIL_GTL_HASHTABLE_COMMON_H_

data/ext/{sparsehash-1.8.1/src/google/sparsehash → sparsehash-2.0.2/src/sparsehash/internal}/libc_allocator_with_realloc.h RENAMED

@@ -1,10 +1,10 @@
 // Copyright (c) 2010, Google Inc.
 // All rights reserved.
-//
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
-//
+//
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
@@ -14,7 +14,7 @@
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
-//
+//
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -28,16 +28,14 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // ---
-// Author: Guilin Chen
 #ifndef UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
 #define UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
-#include <google/sparsehash/sparseconfig.h>
+#include <sparsehash/internal/sparseconfig.h>
 #include <stdlib.h>           // for malloc/realloc/free
 #include <stddef.h>           // for ptrdiff_t
+#include <new>                // for placement new
 _START_GOOGLE_NAMESPACE_

data/ext/{sparsehash-1.8.1/src/google/sparsehash → sparsehash-2.0.2/src/sparsehash/internal}/sparsehashtable.h RENAMED

@@ -28,7 +28,6 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // ---
-// Author: Craig Silverstein
 //
 // A sparse hashtable is a particular implementation of
 // a hashtable: one that is meant to minimize memory use.
@@ -56,7 +55,7 @@
 // the hashtable is insert_only until you set it again.
 //
 // You probably shouldn't use this code directly.  Use
-// <google/sparse_hash_table> or <google/sparse_hash_set> instead.
+// sparse_hash_map<> or sparse_hash_set<> instead.
 //
 // You can modify the following, below:
 // HT_OCCUPANCY_PCT            -- how full before we double size
@@ -96,6 +95,23 @@
 #ifndef _SPARSEHASHTABLE_H_
 #define _SPARSEHASHTABLE_H_
+#include <sparsehash/internal/sparseconfig.h>
+#include <assert.h>
+#include <algorithm>                 // For swap(), eg
+#include <iterator>                  // for iterator tags
+#include <limits>                    // for numeric_limits
+#include <utility>                   // for pair
+#include <sparsehash/type_traits.h>        // for remove_const
+#include <sparsehash/internal/hashtable-common.h>
+#include <sparsehash/sparsetable>    // IWYU pragma: export
+#include <stdexcept>                 // For length_error
+_START_GOOGLE_NAMESPACE_
+namespace base {   // just to make google->opensource transition easier
+using GOOGLE_NAMESPACE::remove_const;
+}
 #ifndef SPARSEHASH_STAT_UPDATE
 #define SPARSEHASH_STAT_UPDATE(x) ((void) 0)
 #endif
@@ -106,20 +122,6 @@
 // Quadratic probing
 #define JUMP_(key, num_probes)    ( num_probes )
-#include <google/sparsehash/sparseconfig.h>
-#include <assert.h>
-#include <algorithm>              // For swap(), eg
-#include <stdexcept>              // For length_error
-#include <iterator>               // for facts about iterator tags
-#include <limits>                 // for numeric_limits<>
-#include <utility>                // for pair<>
-#include <google/sparsehash/hashtable-common.h>
-#include <google/sparsetable>     // Since that's basically what we are
-_START_GOOGLE_NAMESPACE_
-using STL_NAMESPACE::pair;
 // The smaller this is, the faster lookup is (because the group bitmap is
 // smaller) and the faster insert is, because there's less to move.
 // On the other hand, there are more groups.  Since group::size_type is
@@ -134,6 +136,8 @@ static const u_int16_t DEFAULT_GROUP_SIZE = 48;   // fits in 1.5 words
 //      to search for a Value in the table (find() takes a Key).
 // HashFcn: Takes a Key and returns an integer, the more unique the better.
 // ExtractKey: given a Value, returns the unique Key associated with it.
+//             Must inherit from unary_function, or at least have a
+//             result_type enum indicating the return type of operator().
 // SetKey: given a Value* and a Key, modifies the value such that
 //         ExtractKey(value) == key.  We guarantee this is only called
 //         with key == deleted_key.
@@ -161,10 +165,10 @@ struct sparse_hashtable_iterator {
  public:
   typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A>       iterator;
   typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
-  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::nonempty_iterator
+  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,value_alloc_type>::nonempty_iterator
       st_iterator;
-  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
+  typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
   typedef typename value_alloc_type::difference_type difference_type;
   typedef typename value_alloc_type::size_type size_type;
@@ -213,10 +217,10 @@ struct sparse_hashtable_const_iterator {
  public:
   typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A>       iterator;
   typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
-  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::const_nonempty_iterator
+  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,value_alloc_type>::const_nonempty_iterator
       st_iterator;
-  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
+  typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
   typedef typename value_alloc_type::difference_type difference_type;
   typedef typename value_alloc_type::size_type size_type;
@@ -267,10 +271,10 @@ struct sparse_hashtable_destructive_iterator {
  public:
   typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
-  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::destructive_iterator
+  typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,value_alloc_type>::destructive_iterator
       st_iterator;
-  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
+  typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
   typedef typename value_alloc_type::difference_type difference_type;
   typedef typename value_alloc_type::size_type size_type;
@@ -463,12 +467,12 @@ class sparse_hashtable {
     assert(num_deleted == 0);
   }
+  // Test if the given key is the deleted indicator.  Requires
+  // num_deleted > 0, for correctness of read(), and because that
+  // guarantees that key_info.delkey is valid.
   bool test_deleted_key(const key_type& key) const {
-    // The num_deleted test is crucial for read(): after read(), the ht values
-    // are garbage, and we don't want to think some of them are deleted.
-    // Invariant: !use_deleted implies num_deleted is 0.
-    assert(settings.use_deleted() || num_deleted == 0);
-    return num_deleted > 0 && equals(key_info.delkey, key);
+    assert(num_deleted > 0);
+    return equals(key_info.delkey, key);
   }
  public:
@@ -491,27 +495,37 @@ class sparse_hashtable {
   // These are public so the iterators can use them
   // True if the item at position bucknum is "deleted" marker
   bool test_deleted(size_type bucknum) const {
-    if (num_deleted == 0 || !table.test(bucknum)) return false;
-    return test_deleted_key(get_key(table.unsafe_get(bucknum)));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && table.test(bucknum) &&
+        test_deleted_key(get_key(table.unsafe_get(bucknum)));
   }
   bool test_deleted(const iterator &it) const {
-    if (!settings.use_deleted()) return false;
-    return test_deleted_key(get_key(*it));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && test_deleted_key(get_key(*it));
   }
   bool test_deleted(const const_iterator &it) const {
-    if (!settings.use_deleted()) return false;
-    return test_deleted_key(get_key(*it));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && test_deleted_key(get_key(*it));
   }
   bool test_deleted(const destructive_iterator &it) const {
-    if (!settings.use_deleted()) return false;
-    return test_deleted_key(get_key(*it));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && test_deleted_key(get_key(*it));
   }
  private:
+  void check_use_deleted(const char* caller) {
+    (void)caller;    // could log it if the assert failed
+    assert(settings.use_deleted());
+  }
   // Set it so test_deleted is true.  true if object didn't used to be deleted.
   // TODO(csilvers): make these private (also in densehashtable.h)
   bool set_deleted(iterator &it) {
-    assert(settings.use_deleted());
+    check_use_deleted("set_deleted()");
     bool retval = !test_deleted(it);
     // &* converts from iterator to value-type.
     set_key(&(*it), key_info.delkey);
@@ -519,7 +533,7 @@ class sparse_hashtable {
   }
   // Set it so test_deleted is false.  true if object used to be deleted.
   bool clear_deleted(iterator &it) {
-    assert(settings.use_deleted());
+    check_use_deleted("clear_deleted()");
     // Happens automatically when we assign something else in its place.
     return test_deleted(it);
   }
@@ -530,14 +544,14 @@ class sparse_hashtable {
   // 'it' after it's been deleted anyway, so its const-ness doesn't
   // really matter.
   bool set_deleted(const_iterator &it) {
-    assert(settings.use_deleted());   // bad if set_deleted_key() wasn't called
+    check_use_deleted("set_deleted()");
     bool retval = !test_deleted(it);
     set_key(const_cast<pointer>(&(*it)), key_info.delkey);
     return retval;
   }
   // Set it so test_deleted is false.  true if object used to be deleted.
   bool clear_deleted(const_iterator &it) {
-    assert(settings.use_deleted());   // bad if set_deleted_key() wasn't called
+    check_use_deleted("clear_deleted()");
     return test_deleted(it);
   }
@@ -600,8 +614,9 @@ class sparse_hashtable {
         did_resize = true;
     }
     if (table.num_nonempty() >=
-        (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
+        (std::numeric_limits<size_type>::max)() - delta) {
       throw std::length_error("resize overflow");
+    }
     if ( bucket_count() >= HT_MIN_BUCKETS &&
          (table.num_nonempty() + delta) <= settings.enlarge_threshold() )
       return did_resize;                       // we're ok as we are
@@ -621,7 +636,7 @@ class sparse_hashtable {
         settings.min_buckets(table.num_nonempty() - num_deleted + delta,
                              bucket_count());
     if (resize_to < needed_size &&    // may double resize_to
-        resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
+        resize_to < (std::numeric_limits<size_type>::max)() / 2) {
       // This situation means that we have enough deleted elements,
       // that once we purge them, we won't actually have needed to
       // grow.  But we may want to grow anyway: if we just purge one
@@ -795,10 +810,13 @@ class sparse_hashtable {
   // Many STL algorithms use swap instead of copy constructors
   void swap(sparse_hashtable& ht) {
-    STL_NAMESPACE::swap(settings, ht.settings);
-    STL_NAMESPACE::swap(key_info, ht.key_info);
-    STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
+    std::swap(settings, ht.settings);
+    std::swap(key_info, ht.key_info);
+    std::swap(num_deleted, ht.num_deleted);
     table.swap(ht.table);
+    settings.reset_thresholds(bucket_count());  // also resets consider_shrink
+    ht.settings.reset_thresholds(ht.bucket_count());
+    // we purposefully don't swap the allocator, which may not be swap-able
   }
   // It's always nice to be able to clear a table without deallocating it
@@ -817,7 +835,7 @@ class sparse_hashtable {
   // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
   // Note: because of deletions where-to-insert is not trivial: it's the
   // first deleted bucket we see, as long as we don't find the key later
-  pair<size_type, size_type> find_position(const key_type &key) const {
+  std::pair<size_type, size_type> find_position(const key_type &key) const {
     size_type num_probes = 0;              // how many times we've probed
     const size_type bucket_count_minus_one = bucket_count() - 1;
     size_type bucknum = hash(key) & bucket_count_minus_one;
@@ -827,9 +845,9 @@ class sparse_hashtable {
       if ( !table.test(bucknum) ) {        // bucket is empty
         SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
         if ( insert_pos == ILLEGAL_BUCKET )  // found no prior place to insert
-          return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
+          return std::pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
         else
-          return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
+          return std::pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
       } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
         if ( insert_pos == ILLEGAL_BUCKET )
@@ -837,7 +855,7 @@ class sparse_hashtable {
       } else if ( equals(key, get_key(table.unsafe_get(bucknum))) ) {
         SPARSEHASH_STAT_UPDATE(total_probes += num_probes);
-        return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
+        return std::pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
       }
       ++num_probes;                        // we're doing another probe
       bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
@@ -847,9 +865,10 @@ class sparse_hashtable {
   }
  public:
   iterator find(const key_type& key) {
     if ( size() == 0 ) return end();
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
       return end();
     else
@@ -858,7 +877,7 @@ class sparse_hashtable {
   const_iterator find(const key_type& key) const {
     if ( size() == 0 ) return end();
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
       return end();
     else
@@ -869,33 +888,34 @@ class sparse_hashtable {
   // This is a tr1 method: the bucket a given key is in, or what bucket
   // it would be put in, if it were to be inserted.  Shrug.
   size_type bucket(const key_type& key) const {
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
   }
   // Counts how many elements have key key.  For maps, it's either 0 or 1.
   size_type count(const key_type &key) const {
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     return pos.first == ILLEGAL_BUCKET ? 0 : 1;
   }
   // Likewise, equal_range doesn't really make sense for us.  Oh well.
-  pair<iterator,iterator> equal_range(const key_type& key) {
+  std::pair<iterator,iterator> equal_range(const key_type& key) {
     iterator pos = find(key);      // either an iterator or end
     if (pos == end()) {
-      return pair<iterator,iterator>(pos, pos);
+      return std::pair<iterator,iterator>(pos, pos);
     } else {
       const iterator startpos = pos++;
-      return pair<iterator,iterator>(startpos, pos);
+      return std::pair<iterator,iterator>(startpos, pos);
     }
   }
-  pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
+  std::pair<const_iterator,const_iterator> equal_range(const key_type& key)
+      const {
     const_iterator pos = find(key);      // either an iterator or end
     if (pos == end()) {
-      return pair<const_iterator,const_iterator>(pos, pos);
+      return std::pair<const_iterator,const_iterator>(pos, pos);
     } else {
       const const_iterator startpos = pos++;
-      return pair<const_iterator,const_iterator>(startpos, pos);
+      return std::pair<const_iterator,const_iterator>(startpos, pos);
     }
   }
@@ -904,8 +924,9 @@ class sparse_hashtable {
  private:
   // Private method used by insert_noresize and find_or_insert.
   iterator insert_at(const_reference obj, size_type pos) {
-    if (size() >= max_size())
+    if (size() >= max_size()) {
       throw std::length_error("insert overflow");
+    }
     if ( test_deleted(pos) ) {      // just replace if it's been deleted
       // The set() below will undelete this object.  We just worry about stats
       assert(num_deleted > 0);
@@ -916,27 +937,28 @@ class sparse_hashtable {
   }
   // If you know *this is big enough to hold obj, use this routine
-  pair<iterator, bool> insert_noresize(const_reference obj) {
+  std::pair<iterator, bool> insert_noresize(const_reference obj) {
     // First, double-check we're not inserting delkey
     assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
            && "Inserting the deleted key");
-    const pair<size_type,size_type> pos = find_position(get_key(obj));
+    const std::pair<size_type,size_type> pos = find_position(get_key(obj));
     if ( pos.first != ILLEGAL_BUCKET) {      // object was already there
-      return pair<iterator,bool>(iterator(this, table.get_iter(pos.first),
-                                          table.nonempty_end()),
-                                 false);          // false: we didn't insert
+      return std::pair<iterator,bool>(iterator(this, table.get_iter(pos.first),
+                                               table.nonempty_end()),
+                                      false);     // false: we didn't insert
     } else {                                 // pos.second says where to put it
-      return pair<iterator,bool>(insert_at(obj, pos.second), true);
+      return std::pair<iterator,bool>(insert_at(obj, pos.second), true);
     }
   }
   // Specializations of insert(it, it) depending on the power of the iterator:
   // (1) Iterator supports operator-, resize before inserting
   template <class ForwardIterator>
-  void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
-    size_t dist = STL_NAMESPACE::distance(f, l);
-    if (dist >= (std::numeric_limits<size_type>::max)())
+  void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) {
+    size_t dist = std::distance(f, l);
+    if (dist >= (std::numeric_limits<size_type>::max)()) {
       throw std::length_error("insert-range overflow");
+    }
     resize_delta(static_cast<size_type>(dist));
     for ( ; dist > 0; --dist, ++f) {
       insert_noresize(*f);
@@ -945,14 +967,14 @@ class sparse_hashtable {
   // (2) Arbitrary iterator, can't tell how much to resize
   template <class InputIterator>
-  void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
+  void insert(InputIterator f, InputIterator l, std::input_iterator_tag) {
     for ( ; f != l; ++f)
       insert(*f);
   }
  public:
   // This is the normal insert routine, used by the outside world
-  pair<iterator, bool> insert(const_reference obj) {
+  std::pair<iterator, bool> insert(const_reference obj) {
     resize_delta(1);                      // adding an object, grow if need be
     return insert_noresize(obj);
   }
@@ -961,24 +983,26 @@ class sparse_hashtable {
   template <class InputIterator>
   void insert(InputIterator f, InputIterator l) {
     // specializes on iterator type
-    insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
+    insert(f, l,
+           typename std::iterator_traits<InputIterator>::iterator_category());
   }
-  // This is public only because sparse_hash_map::operator[] uses it.
-  // It does the minimal amount of work to implement operator[].
-  template <class DataType>
-  DataType& find_or_insert(const key_type& key) {
+  // DefaultValue is a functor that takes a key and returns a value_type
+  // representing the default value to be inserted if none is found.
+  template <class DefaultValue>
+  value_type& find_or_insert(const key_type& key) {
     // First, double-check we're not inserting delkey
     assert((!settings.use_deleted() || !equals(key, key_info.delkey))
            && "Inserting the deleted key");
-    const pair<size_type,size_type> pos = find_position(key);
+    const std::pair<size_type,size_type> pos = find_position(key);
+    DefaultValue default_value;
     if ( pos.first != ILLEGAL_BUCKET) {  // object was already there
-      return table.get_iter(pos.first)->second;
+      return *table.get_iter(pos.first);
     } else if (resize_delta(1)) {        // needed to rehash to make room
       // Since we resized, we can't use pos, so recalculate where to insert.
-      return insert_noresize(value_type(key, DataType())).first->second;
+      return *insert_noresize(default_value(key)).first;
     } else {                             // no need to rehash, insert right here
-      return insert_at(value_type(key, DataType()), pos.second)->second;
+      return *insert_at(default_value(key), pos.second);
     }
   }
@@ -1072,28 +1096,62 @@ class sparse_hashtable {
   // actually put in the hashtable!  Alas, since I don't know how to
   // write a hasher or key_equal, you have to make sure everything
   // but the table is the same.  We compact before writing.
-  bool write_metadata(FILE *fp) {
+  //
+  // The OUTPUT type needs to support a Write() operation. File and
+  // OutputBuffer are appropriate types to pass in.
+  //
+  // The INPUT type needs to support a Read() operation. File and
+  // InputBuffer are appropriate types to pass in.
+  template <typename OUTPUT>
+  bool write_metadata(OUTPUT *fp) {
     squash_deleted();           // so we don't have to worry about delkey
     return table.write_metadata(fp);
   }
-  bool read_metadata(FILE *fp) {
+  template <typename INPUT>
+  bool read_metadata(INPUT *fp) {
     num_deleted = 0;            // since we got rid before writing
-    bool result = table.read_metadata(fp);
+    const bool result = table.read_metadata(fp);
     settings.reset_thresholds(bucket_count());
     return result;
   }
   // Only meaningful if value_type is a POD.
-  bool write_nopointer_data(FILE *fp) {
+  template <typename OUTPUT>
+  bool write_nopointer_data(OUTPUT *fp) {
     return table.write_nopointer_data(fp);
   }
   // Only meaningful if value_type is a POD.
-  bool read_nopointer_data(FILE *fp) {
+  template <typename INPUT>
+  bool read_nopointer_data(INPUT *fp) {
     return table.read_nopointer_data(fp);
   }
+  // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
+  //    (istream, ostream, etc) *or* a class providing
+  //    Read(void*, size_t) and Write(const void*, size_t)
+  //    (respectively), which writes a buffer into a stream
+  //    (which the INPUT/OUTPUT instance presumably owns).
+  typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
+  // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
+  template <typename ValueSerializer, typename OUTPUT>
+  bool serialize(ValueSerializer serializer, OUTPUT *fp) {
+    squash_deleted();           // so we don't have to worry about delkey
+    return table.serialize(serializer, fp);
+  }
+  // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
+  template <typename ValueSerializer, typename INPUT>
+  bool unserialize(ValueSerializer serializer, INPUT *fp) {
+    num_deleted = 0;            // since we got rid before writing
+    const bool result = table.unserialize(serializer, fp);
+    settings.reset_thresholds(bucket_count());
+    return result;
+  }
  private:
   // Table is the main storage class.
   typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table;
@@ -1103,34 +1161,37 @@ class sparse_hashtable {
   // hasher's operator() might have the same function signature, they
   // must be packaged in different classes.
   struct Settings :
-      sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
+      sparsehash_internal::sh_hashtable_settings<key_type, hasher,
+                                                 size_type, HT_MIN_BUCKETS> {
     explicit Settings(const hasher& hf)
-        : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
+        : sparsehash_internal::sh_hashtable_settings<key_type, hasher,
+                                                     size_type, HT_MIN_BUCKETS>(
             hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
   };
   // KeyInfo stores delete key and packages zero-size functors:
   // ExtractKey and SetKey.
-  class KeyInfo : public ExtractKey, public SetKey, public key_equal {
+  class KeyInfo : public ExtractKey, public SetKey, public EqualKey {
    public:
-    KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
+    KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
         : ExtractKey(ek),
           SetKey(sk),
-          key_equal(eq) {
+          EqualKey(eq) {
     }
-    const key_type get_key(const_reference v) const {
+    // We want to return the exact same type as ExtractKey: Key or const Key&
+    typename ExtractKey::result_type get_key(const_reference v) const {
       return ExtractKey::operator()(v);
     }
     void set_key(pointer v, const key_type& k) const {
       SetKey::operator()(v, k);
     }
     bool equals(const key_type& a, const key_type& b) const {
-      return key_equal::operator()(a, b);
+      return EqualKey::operator()(a, b);
     }
     // Which key marks deleted entries.
     // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
-    typename remove_const<key_type>::type delkey;
+    typename base::remove_const<key_type>::type delkey;
   };
   // Utility functions to access the templated operators
@@ -1140,7 +1201,7 @@ class sparse_hashtable {
   bool equals(const key_type& a, const key_type& b) const {
     return key_info.equals(a, b);
   }
-  const key_type get_key(const_reference v) const {
+  typename ExtractKey::result_type get_key(const_reference v) const {
     return key_info.get_key(v);
   }
   void set_key(pointer v, const key_type& k) const {