RubyGems - google_hash - Versions diffs - 0.8.1 → 0.8.2 - Mend

google_hash 0.8.1 → 0.8.2

Files changed (121) hide show

data/ext/{sparsehash-1.8.1/src/google/sparsehash → sparsehash-2.0.2/src/sparsehash/internal}/densehashtable.h RENAMED

@@ -28,7 +28,6 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // ---
-// Author: Craig Silverstein
 //
 // A dense hashtable is a particular implementation of
 // a hashtable: one that is meant to minimize memory allocation.
@@ -53,8 +52,8 @@
 // and Default Constructible. It is not required to be (and commonly
 // isn't) Assignable.
 //
-// You probably shouldn't use this code directly.  Use
-// <google/dense_hash_map> or <google/dense_hash_set> instead.
+// You probably shouldn't use this code directly.  Use dense_hash_map<>
+// or dense_hash_set<> instead.
 // You can change the following below:
 // HT_OCCUPANCY_PCT      -- how full before we double size
@@ -72,7 +71,7 @@
 // For enlarge_factor, you can use this chart to try to trade-off
 // expected lookup time to the space taken up.  By default, this
 // code uses quadratic probing, though you can change it to linear
-// via _JUMP below if you really want to.
+// via JUMP_ below if you really want to.
 //
 // From http://www.augustana.ca/~mohrj/courses/1999.fall/csc210/lecture_notes/hashing.html
 // NUMBER OF PROBES / LOOKUP       Successful            Unsuccessful
@@ -90,31 +89,34 @@
 #ifndef _DENSEHASHTABLE_H_
 #define _DENSEHASHTABLE_H_
-// The probing method
-// Linear probing
-// #define JUMP_(key, num_probes)    ( 1 )
-// Quadratic probing
-#define JUMP_(key, num_probes)    ( num_probes )
-#include <google/sparsehash/sparseconfig.h>
-#include <stdio.h>
+#include <sparsehash/internal/sparseconfig.h>
 #include <assert.h>
-#include <stdlib.h>             // for abort()
+#include <stdio.h>              // for FILE, fwrite, fread
 #include <algorithm>            // For swap(), eg
-#include <stdexcept>            // For length_error
-#include <iostream>             // For cerr
-#include <memory>               // For uninitialized_fill, uninitialized_copy
-#include <utility>              // for pair<>
-#include <iterator>             // for facts about iterator tags
-#include <limits>               // for numeric_limits<>
-#include <google/sparsehash/libc_allocator_with_realloc.h>
-#include <google/sparsehash/hashtable-common.h>
-#include <google/type_traits.h> // for true_type, integral_constant, etc.
+#include <iterator>             // For iterator tags
+#include <limits>               // for numeric_limits
+#include <memory>               // For uninitialized_fill
+#include <utility>              // for pair
+#include <sparsehash/internal/hashtable-common.h>
+#include <sparsehash/internal/libc_allocator_with_realloc.h>
+#include <sparsehash/type_traits.h>
+#include <stdexcept>                 // For length_error
 _START_GOOGLE_NAMESPACE_
-using STL_NAMESPACE::pair;
+namespace base {   // just to make google->opensource transition easier
+using GOOGLE_NAMESPACE::true_type;
+using GOOGLE_NAMESPACE::false_type;
+using GOOGLE_NAMESPACE::integral_constant;
+using GOOGLE_NAMESPACE::is_same;
+using GOOGLE_NAMESPACE::remove_const;
+}
+// The probing method
+// Linear probing
+// #define JUMP_(key, num_probes)    ( 1 )
+// Quadratic probing
+#define JUMP_(key, num_probes)    ( num_probes )
 // Hashtable class, used to implement the hashed associative containers
 // hash_set and hash_map.
@@ -124,6 +126,8 @@ using STL_NAMESPACE::pair;
 //      to search for a Value in the table (find() takes a Key).
 // HashFcn: Takes a Key and returns an integer, the more unique the better.
 // ExtractKey: given a Value, returns the unique Key associated with it.
+//             Must inherit from unary_function, or at least have a
+//             result_type enum indicating the return type of operator().
 // SetKey: given a Value* and a Key, modifies the value such that
 //         ExtractKey(value) == key.  We guarantee this is only called
 //         with key == deleted_key or key == empty_key.
@@ -151,7 +155,7 @@ struct dense_hashtable_iterator {
   typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A>       iterator;
   typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
-  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
+  typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
   typedef typename value_alloc_type::difference_type difference_type;
   typedef typename value_alloc_type::size_type size_type;
@@ -204,7 +208,7 @@ struct dense_hashtable_const_iterator {
   typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A>       iterator;
   typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
-  typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
+  typedef std::forward_iterator_tag iterator_category;  // very little defined!
   typedef V value_type;
   typedef typename value_alloc_type::difference_type difference_type;
   typedef typename value_alloc_type::size_type size_type;
@@ -218,7 +222,8 @@ struct dense_hashtable_const_iterator {
     : ht(h), pos(it), end(it_end)   {
     if (advance)  advance_past_empty_and_deleted();
   }
-  dense_hashtable_const_iterator() { }
+  dense_hashtable_const_iterator()
+    : ht(NULL), pos(pointer()), end(pointer()) { }
   // This lets us convert regular iterators to const iterators
   dense_hashtable_const_iterator(const iterator &it)
     : ht(it.ht), pos(it.pos), end(it.end) { }
@@ -284,12 +289,12 @@ class dense_hashtable {
   // How full we let the table get before we resize, by default.
   // Knuth says .8 is good -- higher causes us to probe too much,
   // though it saves memory.
-  static const int HT_OCCUPANCY_PCT; // = 50 (out of 100)
+  static const int HT_OCCUPANCY_PCT;  // defined at the bottom of this file
   // How empty we let the table get before we resize lower, by default.
   // (0.0 means never resize lower.)
   // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
-  static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
+  static const int HT_EMPTY_PCT;      // defined at the bottom of this file
   // Minimum size we're willing to let hashtables be.
   // Must be a power of two, and at least 4.
@@ -336,7 +341,9 @@ class dense_hashtable {
   // ACCESSOR FUNCTIONS for the things we templatize on, basically
   hasher hash_funct() const               { return settings; }
   key_equal key_eq() const                { return key_info; }
-  allocator_type get_allocator() const    { return allocator; }
+  allocator_type get_allocator() const {
+    return allocator_type(val_info);
+  }
   // Accessor function for statistics gathering.
   int num_table_copies() const { return settings.num_ht_copies(); }
@@ -371,19 +378,19 @@ class dense_hashtable {
     assert(num_deleted == 0);
   }
+  // Test if the given key is the deleted indicator.  Requires
+  // num_deleted > 0, for correctness of read(), and because that
+  // guarantees that key_info.delkey is valid.
   bool test_deleted_key(const key_type& key) const {
-    // The num_deleted test is crucial for read(): after read(), the ht values
-    // are garbage, and we don't want to think some of them are deleted.
-    // Invariant: !use_deleted implies num_deleted is 0.
-    assert(settings.use_deleted() || num_deleted == 0);
-    return num_deleted > 0 && equals(key_info.delkey, key);
+    assert(num_deleted > 0);
+    return equals(key_info.delkey, key);
   }
  public:
   void set_deleted_key(const key_type &key) {
     // the empty indicator (if specified) and the deleted indicator
     // must be different
-    assert((!settings.use_empty() || !equals(key, get_key(emptyval)))
+    assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
            && "Passed the empty-key to set_deleted_key");
     // It's only safe to change what "deleted" means if we purge deleted guys
     squash_deleted();
@@ -403,19 +410,30 @@ class dense_hashtable {
   // These are public so the iterators can use them
   // True if the item at position bucknum is "deleted" marker
   bool test_deleted(size_type bucknum) const {
-    return test_deleted_key(get_key(table[bucknum]));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && test_deleted_key(get_key(table[bucknum]));
   }
   bool test_deleted(const iterator &it) const {
-    return test_deleted_key(get_key(*it));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && test_deleted_key(get_key(*it));
   }
   bool test_deleted(const const_iterator &it) const {
-    return test_deleted_key(get_key(*it));
+    // Invariant: !use_deleted() implies num_deleted is 0.
+    assert(settings.use_deleted() || num_deleted == 0);
+    return num_deleted > 0 && test_deleted_key(get_key(*it));
   }
  private:
+  void check_use_deleted(const char* caller) {
+    (void)caller;    // could log it if the assert failed
+    assert(settings.use_deleted());
+  }
   // Set it so test_deleted is true.  true if object didn't used to be deleted.
   bool set_deleted(iterator &it) {
-    assert(settings.use_deleted());
+    check_use_deleted("set_deleted()");
     bool retval = !test_deleted(it);
     // &* converts from iterator to value-type.
     set_key(&(*it), key_info.delkey);
@@ -423,7 +441,7 @@ class dense_hashtable {
   }
   // Set it so test_deleted is false.  true if object used to be deleted.
   bool clear_deleted(iterator &it) {
-    assert(settings.use_deleted());
+    check_use_deleted("clear_deleted()");
     // Happens automatically when we assign something else in its place.
     return test_deleted(it);
   }
@@ -434,14 +452,14 @@ class dense_hashtable {
   // 'it' after it's been deleted anyway, so its const-ness doesn't
   // really matter.
   bool set_deleted(const_iterator &it) {
-    assert(settings.use_deleted());
+    check_use_deleted("set_deleted()");
     bool retval = !test_deleted(it);
     set_key(const_cast<pointer>(&(*it)), key_info.delkey);
     return retval;
   }
   // Set it so test_deleted is false.  true if object used to be deleted.
   bool clear_deleted(const_iterator &it) {
-    assert(settings.use_deleted());
+    check_use_deleted("clear_deleted()");
     return test_deleted(it);
   }
@@ -456,20 +474,20 @@ class dense_hashtable {
   // True if the item at position bucknum is "empty" marker
   bool test_empty(size_type bucknum) const {
     assert(settings.use_empty());  // we always need to know what's empty!
-    return equals(get_key(emptyval), get_key(table[bucknum]));
+    return equals(get_key(val_info.emptyval), get_key(table[bucknum]));
   }
   bool test_empty(const iterator &it) const {
     assert(settings.use_empty());  // we always need to know what's empty!
-    return equals(get_key(emptyval), get_key(*it));
+    return equals(get_key(val_info.emptyval), get_key(*it));
   }
   bool test_empty(const const_iterator &it) const {
     assert(settings.use_empty());  // we always need to know what's empty!
-    return equals(get_key(emptyval), get_key(*it));
+    return equals(get_key(val_info.emptyval), get_key(*it));
   }
  private:
   void fill_range_with_empty(pointer table_start, pointer table_end) {
-    STL_NAMESPACE::uninitialized_fill(table_start, table_end, emptyval);
+    std::uninitialized_fill(table_start, table_end, val_info.emptyval);
   }
  public:
@@ -483,24 +501,24 @@ class dense_hashtable {
     assert((!settings.use_deleted() || !equals(get_key(val), key_info.delkey))
            && "Setting the empty key the same as the deleted key");
     settings.set_use_empty(true);
-    set_value(&emptyval, val);
+    set_value(&val_info.emptyval, val);
     assert(!table);                  // must set before first use
     // num_buckets was set in constructor even though table was NULL
-    table = allocator.allocate(num_buckets);
+    table = val_info.allocate(num_buckets);
     assert(table);
     fill_range_with_empty(table, table + num_buckets);
   }
-  // TODO(sjackman): return a key_type rather than a value_type
+  // TODO(user): return a key_type rather than a value_type
   value_type empty_key() const {
     assert(settings.use_empty());
-    return emptyval;
+    return val_info.emptyval;
   }
   // FUNCTIONS CONCERNING SIZE
  public:
   size_type size() const      { return num_elements - num_deleted; }
-  size_type max_size() const  { return allocator.max_size(); }
+  size_type max_size() const  { return val_info.max_size(); }
   bool empty() const          { return size() == 0; }
   size_type bucket_count() const      { return num_buckets; }
   size_type max_bucket_count() const  { return max_size(); }
@@ -556,8 +574,10 @@ class dense_hashtable {
       if ( maybe_shrink() )
         did_resize = true;
     }
-    if (num_elements >= (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
+    if (num_elements >=
+        (std::numeric_limits<size_type>::max)() - delta) {
       throw std::length_error("resize overflow");
+    }
     if ( bucket_count() >= HT_MIN_BUCKETS &&
          (num_elements + delta) <= settings.enlarge_threshold() )
       return did_resize;                          // we're ok as we are
@@ -576,7 +596,7 @@ class dense_hashtable {
       settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
     if (resize_to < needed_size &&    // may double resize_to
-        resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
+        resize_to < (std::numeric_limits<size_type>::max)() / 2) {
       // This situation means that we have enough deleted elements,
       // that once we purge them, we won't actually have needed to
       // grow.  But we may want to grow anyway: if we just purge one
@@ -598,13 +618,13 @@ class dense_hashtable {
   // We require table be not-NULL and empty before calling this.
   void resize_table(size_type /*old_size*/, size_type new_size,
-                    true_type) {
-    table = allocator.realloc_or_die(table, new_size);
+                    base::true_type) {
+    table = val_info.realloc_or_die(table, new_size);
   }
-  void resize_table(size_type old_size, size_type new_size, false_type) {
-    allocator.deallocate(table, old_size);
-    table = allocator.allocate(new_size);
+  void resize_table(size_type old_size, size_type new_size, base::false_type) {
+    val_info.deallocate(table, old_size);
+    table = val_info.allocate(new_size);
   }
   // Used to actually do the rehashing when we grow/shrink a hashtable
@@ -669,13 +689,12 @@ class dense_hashtable {
                            const Alloc& alloc = Alloc())
       : settings(hf),
         key_info(ext, set, eql),
-        allocator(alloc),
         num_deleted(0),
         num_elements(0),
         num_buckets(expected_max_items_in_table == 0
                     ? HT_DEFAULT_STARTING_BUCKETS
                     : settings.min_buckets(expected_max_items_in_table, 0)),
-        emptyval(),
+        val_info(alloc_impl<value_alloc_type>(alloc)),
         table(NULL) {
     // table is NULL until emptyval is set.  However, we set num_buckets
     // here so we know how much space to allocate once emptyval is set
@@ -688,11 +707,10 @@ class dense_hashtable {
                   size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
       : settings(ht.settings),
         key_info(ht.key_info),
-        allocator(ht.allocator),
         num_deleted(0),
         num_elements(0),
         num_buckets(0),
-        emptyval(ht.emptyval),
+        val_info(ht.val_info),
         table(NULL) {
     if (!ht.settings.use_empty()) {
       // If use_empty isn't set, copy_from will crash, so we do our own copying.
@@ -715,7 +733,7 @@ class dense_hashtable {
     }
     settings = ht.settings;
     key_info = ht.key_info;
-    set_value(&emptyval, ht.emptyval);
+    set_value(&val_info.emptyval, ht.val_info.emptyval);
     // copy_from() calls clear and sets num_deleted to 0 too
     copy_from(ht, HT_MIN_BUCKETS);
     // we purposefully don't copy the allocator, which may not be copyable
@@ -725,38 +743,38 @@ class dense_hashtable {
   ~dense_hashtable() {
     if (table) {
       destroy_buckets(0, num_buckets);
-      allocator.deallocate(table, num_buckets);
+      val_info.deallocate(table, num_buckets);
     }
   }
   // Many STL algorithms use swap instead of copy constructors
   void swap(dense_hashtable& ht) {
-    STL_NAMESPACE::swap(settings, ht.settings);
-    STL_NAMESPACE::swap(key_info, ht.key_info);
-    STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
-    STL_NAMESPACE::swap(num_elements, ht.num_elements);
-    STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
+    std::swap(settings, ht.settings);
+    std::swap(key_info, ht.key_info);
+    std::swap(num_deleted, ht.num_deleted);
+    std::swap(num_elements, ht.num_elements);
+    std::swap(num_buckets, ht.num_buckets);
     { value_type tmp;     // for annoying reasons, swap() doesn't work
-      set_value(&tmp, emptyval);
-      set_value(&emptyval, ht.emptyval);
-      set_value(&ht.emptyval, tmp);
+      set_value(&tmp, val_info.emptyval);
+      set_value(&val_info.emptyval, ht.val_info.emptyval);
+      set_value(&ht.val_info.emptyval, tmp);
     }
-    STL_NAMESPACE::swap(table, ht.table);
-    settings.reset_thresholds(bucket_count());  // this also resets consider_shrink
-    ht.settings.reset_thresholds(bucket_count());
+    std::swap(table, ht.table);
+    settings.reset_thresholds(bucket_count());  // also resets consider_shrink
+    ht.settings.reset_thresholds(ht.bucket_count());
     // we purposefully don't swap the allocator, which may not be swap-able
   }
  private:
   void clear_to_size(size_type new_num_buckets) {
     if (!table) {
-      table = allocator.allocate(new_num_buckets);
+      table = val_info.allocate(new_num_buckets);
     } else {
       destroy_buckets(0, num_buckets);
       if (new_num_buckets != num_buckets) {   // resize, if necessary
-        typedef integral_constant<bool,
-            is_same<value_alloc_type,
-                    libc_allocator_with_realloc<value_type> >::value>
+        typedef base::integral_constant<bool,
+            base::is_same<value_alloc_type,
+                          libc_allocator_with_realloc<value_type> >::value>
             realloc_ok;
         resize_table(num_buckets, new_num_buckets, realloc_ok());
       }
@@ -803,7 +821,7 @@ class dense_hashtable {
   // if object is not found; 2nd is ILLEGAL_BUCKET if it is.
   // Note: because of deletions where-to-insert is not trivial: it's the
   // first deleted bucket we see, as long as we don't find the key later
-  pair<size_type, size_type> find_position(const key_type &key) const {
+  std::pair<size_type, size_type> find_position(const key_type &key) const {
     size_type num_probes = 0;              // how many times we've probed
     const size_type bucket_count_minus_one = bucket_count() - 1;
     size_type bucknum = hash(key) & bucket_count_minus_one;
@@ -811,16 +829,16 @@ class dense_hashtable {
     while ( 1 ) {                          // probe until something happens
       if ( test_empty(bucknum) ) {         // bucket is empty
         if ( insert_pos == ILLEGAL_BUCKET )   // found no prior place to insert
-          return pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
+          return std::pair<size_type,size_type>(ILLEGAL_BUCKET, bucknum);
         else
-          return pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
+          return std::pair<size_type,size_type>(ILLEGAL_BUCKET, insert_pos);
       } else if ( test_deleted(bucknum) ) {// keep searching, but mark to insert
         if ( insert_pos == ILLEGAL_BUCKET )
           insert_pos = bucknum;
       } else if ( equals(key, get_key(table[bucknum])) ) {
-        return pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
+        return std::pair<size_type,size_type>(bucknum, ILLEGAL_BUCKET);
       }
       ++num_probes;                        // we're doing another probe
       bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
@@ -830,9 +848,10 @@ class dense_hashtable {
   }
  public:
   iterator find(const key_type& key) {
     if ( size() == 0 ) return end();
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
       return end();
     else
@@ -841,7 +860,7 @@ class dense_hashtable {
   const_iterator find(const key_type& key) const {
     if ( size() == 0 ) return end();
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     if ( pos.first == ILLEGAL_BUCKET )     // alas, not there
       return end();
     else
@@ -851,33 +870,34 @@ class dense_hashtable {
   // This is a tr1 method: the bucket a given key is in, or what bucket
   // it would be put in, if it were to be inserted.  Shrug.
   size_type bucket(const key_type& key) const {
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     return pos.first == ILLEGAL_BUCKET ? pos.second : pos.first;
   }
   // Counts how many elements have key key.  For maps, it's either 0 or 1.
   size_type count(const key_type &key) const {
-    pair<size_type, size_type> pos = find_position(key);
+    std::pair<size_type, size_type> pos = find_position(key);
     return pos.first == ILLEGAL_BUCKET ? 0 : 1;
   }
   // Likewise, equal_range doesn't really make sense for us.  Oh well.
-  pair<iterator,iterator> equal_range(const key_type& key) {
+  std::pair<iterator,iterator> equal_range(const key_type& key) {
     iterator pos = find(key);      // either an iterator or end
     if (pos == end()) {
-      return pair<iterator,iterator>(pos, pos);
+      return std::pair<iterator,iterator>(pos, pos);
     } else {
       const iterator startpos = pos++;
-      return pair<iterator,iterator>(startpos, pos);
+      return std::pair<iterator,iterator>(startpos, pos);
     }
   }
-  pair<const_iterator,const_iterator> equal_range(const key_type& key) const {
+  std::pair<const_iterator,const_iterator> equal_range(const key_type& key)
+      const {
     const_iterator pos = find(key);      // either an iterator or end
     if (pos == end()) {
-      return pair<const_iterator,const_iterator>(pos, pos);
+      return std::pair<const_iterator,const_iterator>(pos, pos);
     } else {
       const const_iterator startpos = pos++;
-      return pair<const_iterator,const_iterator>(startpos, pos);
+      return std::pair<const_iterator,const_iterator>(startpos, pos);
     }
   }
@@ -886,8 +906,9 @@ class dense_hashtable {
  private:
   // Private method used by insert_noresize and find_or_insert.
   iterator insert_at(const_reference obj, size_type pos) {
-    if (size() >= max_size())
+    if (size() >= max_size()) {
       throw std::length_error("insert overflow");
+    }
     if ( test_deleted(pos) ) {      // just replace if it's been del.
       // shrug: shouldn't need to be const.
       const_iterator delpos(this, table + pos, table + num_buckets, false);
@@ -902,29 +923,31 @@ class dense_hashtable {
   }
   // If you know *this is big enough to hold obj, use this routine
-  pair<iterator, bool> insert_noresize(const_reference obj) {
+  std::pair<iterator, bool> insert_noresize(const_reference obj) {
     // First, double-check we're not inserting delkey or emptyval
-    assert((!settings.use_empty() || !equals(get_key(obj), get_key(emptyval)))
+    assert((!settings.use_empty() || !equals(get_key(obj),
+                                             get_key(val_info.emptyval)))
            && "Inserting the empty key");
     assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
            && "Inserting the deleted key");
-    const pair<size_type,size_type> pos = find_position(get_key(obj));
+    const std::pair<size_type,size_type> pos = find_position(get_key(obj));
     if ( pos.first != ILLEGAL_BUCKET) {      // object was already there
-      return pair<iterator,bool>(iterator(this, table + pos.first,
+      return std::pair<iterator,bool>(iterator(this, table + pos.first,
                                           table + num_buckets, false),
                                  false);          // false: we didn't insert
     } else {                                 // pos.second says where to put it
-      return pair<iterator,bool>(insert_at(obj, pos.second), true);
+      return std::pair<iterator,bool>(insert_at(obj, pos.second), true);
     }
   }
   // Specializations of insert(it, it) depending on the power of the iterator:
   // (1) Iterator supports operator-, resize before inserting
   template <class ForwardIterator>
-  void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
-    size_t dist = STL_NAMESPACE::distance(f, l);
-    if (dist >= (std::numeric_limits<size_type>::max)())
+  void insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag) {
+    size_t dist = std::distance(f, l);
+    if (dist >= (std::numeric_limits<size_type>::max)()) {
       throw std::length_error("insert-range overflow");
+    }
     resize_delta(static_cast<size_type>(dist));
     for ( ; dist > 0; --dist, ++f) {
       insert_noresize(*f);
@@ -933,14 +956,14 @@ class dense_hashtable {
   // (2) Arbitrary iterator, can't tell how much to resize
   template <class InputIterator>
-  void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
+  void insert(InputIterator f, InputIterator l, std::input_iterator_tag) {
     for ( ; f != l; ++f)
       insert(*f);
   }
  public:
   // This is the normal insert routine, used by the outside world
-  pair<iterator, bool> insert(const_reference obj) {
+  std::pair<iterator, bool> insert(const_reference obj) {
     resize_delta(1);                      // adding an object, grow if need be
     return insert_noresize(obj);
   }
@@ -949,33 +972,36 @@ class dense_hashtable {
   template <class InputIterator>
   void insert(InputIterator f, InputIterator l) {
     // specializes on iterator type
-    insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
+    insert(f, l,
+           typename std::iterator_traits<InputIterator>::iterator_category());
   }
-  // This is public only because dense_hash_map::operator[] uses it.
-  // It does the minimal amount of work to implement operator[].
-  template <class DataType>
-  DataType& find_or_insert(const key_type& key) {
+  // DefaultValue is a functor that takes a key and returns a value_type
+  // representing the default value to be inserted if none is found.
+  template <class DefaultValue>
+  value_type& find_or_insert(const key_type& key) {
     // First, double-check we're not inserting emptykey or delkey
-    assert((!settings.use_empty() || !equals(key, get_key(emptyval)))
+    assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
            && "Inserting the empty key");
     assert((!settings.use_deleted() || !equals(key, key_info.delkey))
            && "Inserting the deleted key");
-    const pair<size_type,size_type> pos = find_position(key);
+    const std::pair<size_type,size_type> pos = find_position(key);
+    DefaultValue default_value;
     if ( pos.first != ILLEGAL_BUCKET) {  // object was already there
-      return table[pos.first].second;
+      return table[pos.first];
     } else if (resize_delta(1)) {        // needed to rehash to make room
       // Since we resized, we can't use pos, so recalculate where to insert.
-      return insert_noresize(value_type(key, DataType())).first->second;
+      return *insert_noresize(default_value(key)).first;
     } else {                             // no need to rehash, insert right here
-      return insert_at(value_type(key, DataType()), pos.second)->second;
+      return *insert_at(default_value(key), pos.second);
     }
   }
   // DELETION ROUTINES
   size_type erase(const key_type& key) {
     // First, double-check we're not trying to erase delkey or emptyval.
-    assert((!settings.use_empty() || !equals(key, get_key(emptyval)))
+    assert((!settings.use_empty() || !equals(key, get_key(val_info.emptyval)))
            && "Erasing the empty key");
     assert((!settings.use_deleted() || !equals(key, key_info.delkey))
            && "Erasing the deleted key");
@@ -1055,52 +1081,83 @@ class dense_hashtable {
   // I/O
   // We support reading and writing hashtables to disk.  Alas, since
   // I don't know how to write a hasher or key_equal, you have to make
-  // sure everything but the table is the same.  We compact before writing
+  // sure everything but the table is the same.  We compact before writing.
+ private:
+  // Every time the disk format changes, this should probably change too
+  typedef unsigned long MagicNumberType;
+  static const MagicNumberType MAGIC_NUMBER = 0x13578642;
+ public:
+  // I/O -- this is an add-on for writing hash table to disk
   //
-  // NOTE: These functions are currently TODO.  They've not been implemented.
-  bool write_metadata(FILE *fp) {
-    squash_deleted();           // so we don't have to worry about delkey
-    return false;               // TODO
-  }
+  // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
+  //    (istream, ostream, etc) *or* a class providing
+  //    Read(void*, size_t) and Write(const void*, size_t)
+  //    (respectively), which writes a buffer into a stream
+  //    (which the INPUT/OUTPUT instance presumably owns).
-  bool read_metadata(FILE *fp) {
-    num_deleted = 0;            // since we got rid before writing
-    assert(settings.use_empty() && "empty_key not set for read_metadata");
-    if (table)  allocator.deallocate(table, num_buckets);  // we'll make our own
-    // TODO: read magic number
-    // TODO: read num_buckets
-    settings.reset_thresholds(bucket_count());
-    table = allocator.allocate(num_buckets);
-    assert(table);
-    fill_range_with_empty(table, table + num_buckets);
-    // TODO: read num_elements
-    for ( size_type i = 0; i < num_elements; ++i ) {
-      // TODO: read bucket_num
-      // TODO: set with non-empty, non-deleted value
+  typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
+  // ValueSerializer: a functor.  operator()(OUTPUT*, const value_type&)
+  template <typename ValueSerializer, typename OUTPUT>
+  bool serialize(ValueSerializer serializer, OUTPUT *fp) {
+    squash_deleted();           // so we don't have to worry about delkey
+    if ( !sparsehash_internal::write_bigendian_number(fp, MAGIC_NUMBER, 4) )
+      return false;
+    if ( !sparsehash_internal::write_bigendian_number(fp, num_buckets, 8) )
+      return false;
+    if ( !sparsehash_internal::write_bigendian_number(fp, num_elements, 8) )
+      return false;
+    // Now write a bitmap of non-empty buckets.
+    for ( size_type i = 0; i < num_buckets; i += 8 ) {
+      unsigned char bits = 0;
+      for ( int bit = 0; bit < 8; ++bit ) {
+        if ( i + bit < num_buckets && !test_empty(i + bit) )
+          bits |= (1 << bit);
+      }
+      if ( !sparsehash_internal::write_data(fp, &bits, sizeof(bits)) )
+        return false;
+      for ( int bit = 0; bit < 8; ++bit ) {
+        if ( bits & (1 << bit) ) {
+          if ( !serializer(fp, table[i + bit]) ) return false;
+        }
+      }
     }
-    return false;               // TODO
+    return true;
   }
-  // If your keys and values are simple enough, we can write them to
-  // disk for you.  "simple enough" means value_type is a POD type
-  // that contains no pointers.  However, we don't try to normalize
-  // endianness
-  bool write_nopointer_data(FILE *fp) const {
-    for ( const_iterator it = begin(); it != end(); ++it ) {
-      // TODO: skip empty/deleted values
-      if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
+  // INPUT: anything we've written an overload of read_data() for.
+  // ValueSerializer: a functor.  operator()(INPUT*, value_type*)
+  template <typename ValueSerializer, typename INPUT>
+  bool unserialize(ValueSerializer serializer, INPUT *fp) {
+    assert(settings.use_empty() && "empty_key not set for read");
+    clear();                        // just to be consistent
+    MagicNumberType magic_read;
+    if ( !sparsehash_internal::read_bigendian_number(fp, &magic_read, 4) )
+      return false;
+    if ( magic_read != MAGIC_NUMBER ) {
+      return false;
     }
-    return false;
-  }
+    size_type new_num_buckets;
+    if ( !sparsehash_internal::read_bigendian_number(fp, &new_num_buckets, 8) )
+      return false;
+    clear_to_size(new_num_buckets);
+    if ( !sparsehash_internal::read_bigendian_number(fp, &num_elements, 8) )
+      return false;
-  // When reading, we have to override the potential const-ness of *it
-  bool read_nopointer_data(FILE *fp) {
-    for ( iterator it = begin(); it != end(); ++it ) {
-      // TODO: skip empty/deleted values
-      if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
+    // Read the bitmap of non-empty buckets.
+    for (size_type i = 0; i < num_buckets; i += 8) {
+      unsigned char bits;
+      if ( !sparsehash_internal::read_data(fp, &bits, sizeof(bits)) )
         return false;
+      for ( int bit = 0; bit < 8; ++bit ) {
+        if ( i + bit < num_buckets && (bits & (1 << bit)) ) {  // not empty
+          if ( !serializer(fp, &table[i + bit]) ) return false;
+        }
+      }
     }
-    return false;
+    return true;
   }
  private:
@@ -1115,9 +1172,9 @@ class dense_hashtable {
     // realloc_or_die should only be used when using the default
     // allocator (libc_allocator_with_realloc).
-    pointer realloc_or_die(pointer ptr, size_type n) {
+    pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/) {
       fprintf(stderr, "realloc_or_die is only supported for "
-                      "libc_allocator_with_realloc");
+                      "libc_allocator_with_realloc\n");
       exit(1);
       return NULL;
     }
@@ -1138,50 +1195,67 @@ class dense_hashtable {
     pointer realloc_or_die(pointer ptr, size_type n) {
       pointer retval = this->reallocate(ptr, n);
       if (retval == NULL) {
-        // We really should use PRIuS here, but I don't want to have to add
-        // a whole new configure option, with concomitant macro namespace
-        // pollution, just to print this (unlikely) error message.  So I cast.
         fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
-                "%lu elements for ptr %p",
-                static_cast<unsigned long>(n), ptr);
+                "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
         exit(1);
       }
       return retval;
     }
   };
+  // Package allocator with emptyval to eliminate memory needed for
+  // the zero-size allocator.
+  // If new fields are added to this class, we should add them to
+  // operator= and swap.
+  class ValInfo : public alloc_impl<value_alloc_type> {
+   public:
+    typedef typename alloc_impl<value_alloc_type>::value_type value_type;
+    ValInfo(const alloc_impl<value_alloc_type>& a)
+        : alloc_impl<value_alloc_type>(a), emptyval() { }
+    ValInfo(const ValInfo& v)
+        : alloc_impl<value_alloc_type>(v), emptyval(v.emptyval) { }
+    value_type emptyval;    // which key marks unused entries
+  };
   // Package functors with another class to eliminate memory needed for
   // zero-size functors.  Since ExtractKey and hasher's operator() might
   // have the same function signature, they must be packaged in
   // different classes.
   struct Settings :
-      sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
+      sparsehash_internal::sh_hashtable_settings<key_type, hasher,
+                                                 size_type, HT_MIN_BUCKETS> {
     explicit Settings(const hasher& hf)
-        : sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
+        : sparsehash_internal::sh_hashtable_settings<key_type, hasher,
+                                                     size_type, HT_MIN_BUCKETS>(
             hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
   };
   // Packages ExtractKey and SetKey functors.
-  class KeyInfo : public ExtractKey, public SetKey, public key_equal {
+  class KeyInfo : public ExtractKey, public SetKey, public EqualKey {
    public:
-    KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
+    KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
         : ExtractKey(ek),
           SetKey(sk),
-          key_equal(eq) {
+          EqualKey(eq) {
     }
-    const key_type get_key(const_reference v) const {
+    // We want to return the exact same type as ExtractKey: Key or const Key&
+    typename ExtractKey::result_type get_key(const_reference v) const {
       return ExtractKey::operator()(v);
     }
     void set_key(pointer v, const key_type& k) const {
       SetKey::operator()(v, k);
     }
     bool equals(const key_type& a, const key_type& b) const {
-      return key_equal::operator()(a, b);
+      return EqualKey::operator()(a, b);
     }
     // Which key marks deleted entries.
     // TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
-    typename remove_const<key_type>::type delkey;
+    typename base::remove_const<key_type>::type delkey;
   };
   // Utility functions to access the templated operators
@@ -1191,7 +1265,7 @@ class dense_hashtable {
   bool equals(const key_type& a, const key_type& b) const {
     return key_info.equals(a, b);
   }
-  const key_type get_key(const_reference v) const {
+  typename ExtractKey::result_type get_key(const_reference v) const {
     return key_info.get_key(v);
   }
   void set_key(pointer v, const key_type& k) const {
@@ -1202,12 +1276,11 @@ class dense_hashtable {
   // Actual data
   Settings settings;
   KeyInfo key_info;
-  alloc_impl<value_alloc_type> allocator;
   size_type num_deleted;  // how many occupied buckets are marked deleted
   size_type num_elements;
   size_type num_buckets;
-  value_type emptyval;    // which key marks unused entries
+  ValInfo val_info;       // holds emptyval, and also the allocator
   pointer table;
 };
@@ -1229,12 +1302,13 @@ const typename dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
 // good -- higher causes us to probe too much, though saves memory.
 // However, we go with .5, getting better performance at the cost of
 // more space (a trade-off densehashtable explicitly chooses to make).
-// Feel free to play around with different values, though.
+// Feel free to play around with different values, though, via
+// max_load_factor() and/or set_resizing_parameters().
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
 // How empty we let the table get before we resize lower.
-// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
+// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing.
 template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
 const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
   = static_cast<int>(0.4 *