google_hash 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +61 -27
- data/Rakefile +4 -1
- data/TODO +5 -0
- data/VERSION +1 -1
- data/changelog +3 -0
- data/ext/extconf.rb +10 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/AUTHORS +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/COPYING +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/ChangeLog +47 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/INSTALL +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/Makefile.am +29 -14
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/Makefile.in +77 -42
- data/ext/sparsehash-1.8.1/NEWS +71 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/README +0 -0
- data/ext/{sparsehash-1.5.2/README.windows → sparsehash-1.8.1/README_windows.txt} +25 -25
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/TODO +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/aclocal.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/compile +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/config.guess +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/config.sub +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/configure +3690 -4560
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/configure.ac +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/depcomp +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/dense_hash_map.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/dense_hash_set.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/designstyle.css +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/implementation.html +11 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/index.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/performance.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparse_hash_map.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparse_hash_set.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparsetable.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/Makefile +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/README +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/example.c +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/libchash.c +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/libchash.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/google-sparsehash.sln +17 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/install-sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/acx_pthread.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/google_namespace.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/namespaces.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_hash.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_hash_fun.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_namespace.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/missing +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/mkinstalldirs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb.sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/README +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/changelog +24 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/compat +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/control +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/copyright +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/docs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/rules +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/sparsehash.dirs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/sparsehash.install +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/rpm.sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/rpm/rpm.spec +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/config.h.in +3 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/config.h.include +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/dense_hash_map +43 -27
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/dense_hash_set +40 -19
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparse_hash_map +32 -23
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparse_hash_set +31 -21
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsehash/densehashtable.h +481 -298
- data/ext/sparsehash-1.8.1/src/google/sparsehash/hashtable-common.h +178 -0
- data/ext/sparsehash-1.8.1/src/google/sparsehash/libc_allocator_with_realloc.h +121 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsehash/sparsehashtable.h +404 -233
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsetable +173 -83
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/type_traits.h +3 -29
- data/ext/sparsehash-1.8.1/src/hash_test_interface.h +1011 -0
- data/ext/sparsehash-1.8.1/src/hashtable_test.cc +1733 -0
- data/ext/sparsehash-1.8.1/src/libc_allocator_with_realloc_test.cc +129 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/simple_test.cc +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/sparsetable_unittest.cc +202 -6
- data/ext/sparsehash-1.8.1/src/testutil.h +251 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/time_hash_map.cc +128 -54
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/type_traits_unittest.cc +30 -20
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/config.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/google/sparsehash/sparseconfig.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/port.cc +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/port.h +0 -0
- data/ext/sparsehash-1.8.1/vsprojects/hashtable_test/hashtable_test.vcproj +197 -0
- data/ext/{sparsehash-1.5.2/vsprojects/hashtable_unittest/hashtable_unittest.vcproj → sparsehash-1.8.1/vsprojects/simple_test/simple_test.vcproj} +9 -8
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj +0 -2
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/time_hash_map/time_hash_map.vcproj +3 -2
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/type_traits_unittest/type_traits_unittest.vcproj +0 -2
- data/ext/template/google_hash.cpp.erb +2 -1
- data/ext/template/main.cpp.erb +1 -1
- data/results.txt +6 -22
- data/spec/benchmark.rb +57 -0
- data/spec/spec.google_hash.rb +1 -8
- metadata +140 -130
- data/ext/benchmark.rb +0 -47
- data/ext/sparsehash-1.5.2/NEWS +0 -0
- data/ext/sparsehash-1.5.2/src/hashtable_unittest.cc +0 -1375
- data/ext/sparsehash-1.5.2/src/words +0 -8944
- data/types.txt +0 -18
|
@@ -40,10 +40,15 @@
|
|
|
40
40
|
// because all iterators for sets are const (you obviously can't
|
|
41
41
|
// change the key, and for sets there is no value).
|
|
42
42
|
//
|
|
43
|
-
// We adhere mostly to the STL semantics for hash-
|
|
44
|
-
// exception is that insert()
|
|
45
|
-
//
|
|
46
|
-
//
|
|
43
|
+
// We adhere mostly to the STL semantics for hash-map. One important
|
|
44
|
+
// exception is that insert() may invalidate iterators entirely -- STL
|
|
45
|
+
// semantics are that insert() may reorder iterators, but they all
|
|
46
|
+
// still refer to something valid in the hashtable. Not so for us.
|
|
47
|
+
// Likewise, insert() may invalidate pointers into the hashtable.
|
|
48
|
+
// (Whether insert invalidates iterators and pointers depends on
|
|
49
|
+
// whether it results in a hashtable resize). On the plus side,
|
|
50
|
+
// delete() doesn't invalidate iterators or pointers at all, or even
|
|
51
|
+
// change the ordering of elements.
|
|
47
52
|
//
|
|
48
53
|
// Here are a few "power user" tips:
|
|
49
54
|
//
|
|
@@ -62,19 +67,20 @@
|
|
|
62
67
|
// Setting the minimum load factor to 0.0 guarantees that
|
|
63
68
|
// the hash table will never shrink.
|
|
64
69
|
//
|
|
65
|
-
//
|
|
66
|
-
// (1) dense_hash_set: fastest, uses the most memory
|
|
70
|
+
// Roughly speaking:
|
|
71
|
+
// (1) dense_hash_set: fastest, uses the most memory unless entries are small
|
|
67
72
|
// (2) sparse_hash_set: slowest, uses the least memory
|
|
68
|
-
// (3) hash_set /unordered_set (STL): in the middle
|
|
73
|
+
// (3) hash_set / unordered_set (STL): in the middle
|
|
74
|
+
//
|
|
69
75
|
// Typically I use sparse_hash_set when I care about space and/or when
|
|
70
76
|
// I need to save the hashtable on disk. I use hash_set otherwise. I
|
|
71
77
|
// don't personally use dense_hash_set ever; some people use it for
|
|
72
78
|
// small sets with lots of lookups.
|
|
73
79
|
//
|
|
74
|
-
// - dense_hash_set has, typically,
|
|
75
|
-
// data takes up X bytes, the hash_set uses
|
|
76
|
-
// - sparse_hash_set has about
|
|
77
|
-
// -
|
|
80
|
+
// - dense_hash_set has, typically, about 78% memory overhead (if your
|
|
81
|
+
// data takes up X bytes, the hash_set uses .78X more bytes in overhead).
|
|
82
|
+
// - sparse_hash_set has about 4 bits overhead per entry.
|
|
83
|
+
// - sparse_hash_set can be 3-7 times slower than the others for lookup and,
|
|
78
84
|
// especially, inserts. See time_hash_map.cc for details.
|
|
79
85
|
//
|
|
80
86
|
// See /usr/(local/)?doc/sparsehash-*/sparse_hash_set.html
|
|
@@ -90,6 +96,7 @@
|
|
|
90
96
|
#include <memory> // for alloc<>
|
|
91
97
|
#include <utility> // for pair<>
|
|
92
98
|
#include HASH_FUN_H // defined in config.h
|
|
99
|
+
#include <google/sparsehash/libc_allocator_with_realloc.h>
|
|
93
100
|
#include <google/sparsehash/sparsehashtable.h>
|
|
94
101
|
|
|
95
102
|
_START_GOOGLE_NAMESPACE_
|
|
@@ -99,7 +106,7 @@ using STL_NAMESPACE::pair;
|
|
|
99
106
|
template <class Value,
|
|
100
107
|
class HashFcn = SPARSEHASH_HASH<Value>, // defined in sparseconfig.h
|
|
101
108
|
class EqualKey = STL_NAMESPACE::equal_to<Value>,
|
|
102
|
-
class Alloc =
|
|
109
|
+
class Alloc = libc_allocator_with_realloc<Value> >
|
|
103
110
|
class sparse_hash_set {
|
|
104
111
|
private:
|
|
105
112
|
// Apparently identity is not stl-standard, so we define our own
|
|
@@ -113,9 +120,8 @@ class sparse_hash_set {
|
|
|
113
120
|
}
|
|
114
121
|
};
|
|
115
122
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
Identity, SetKey, EqualKey, Alloc> ht;
|
|
123
|
+
typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
|
|
124
|
+
EqualKey, Alloc> ht;
|
|
119
125
|
ht rep;
|
|
120
126
|
|
|
121
127
|
public:
|
|
@@ -148,7 +154,7 @@ class sparse_hash_set {
|
|
|
148
154
|
|
|
149
155
|
|
|
150
156
|
// Accessor functions
|
|
151
|
-
|
|
157
|
+
allocator_type get_allocator() const { return rep.get_allocator(); }
|
|
152
158
|
hasher hash_funct() const { return rep.hash_funct(); }
|
|
153
159
|
hasher hash_function() const { return hash_funct(); } // tr1 name
|
|
154
160
|
key_equal key_eq() const { return rep.key_eq(); }
|
|
@@ -157,15 +163,18 @@ class sparse_hash_set {
|
|
|
157
163
|
// Constructors
|
|
158
164
|
explicit sparse_hash_set(size_type expected_max_items_in_table = 0,
|
|
159
165
|
const hasher& hf = hasher(),
|
|
160
|
-
const key_equal& eql = key_equal()
|
|
161
|
-
|
|
166
|
+
const key_equal& eql = key_equal(),
|
|
167
|
+
const allocator_type& alloc = allocator_type())
|
|
168
|
+
: rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
|
|
169
|
+
}
|
|
162
170
|
|
|
163
171
|
template <class InputIterator>
|
|
164
172
|
sparse_hash_set(InputIterator f, InputIterator l,
|
|
165
173
|
size_type expected_max_items_in_table = 0,
|
|
166
174
|
const hasher& hf = hasher(),
|
|
167
|
-
const key_equal& eql = key_equal()
|
|
168
|
-
|
|
175
|
+
const key_equal& eql = key_equal(),
|
|
176
|
+
const allocator_type& alloc = allocator_type())
|
|
177
|
+
: rep(expected_max_items_in_table, hf, eql, Identity(), SetKey(), alloc) {
|
|
169
178
|
rep.insert(f, l);
|
|
170
179
|
}
|
|
171
180
|
// We use the default copy constructor
|
|
@@ -212,7 +221,7 @@ class sparse_hash_set {
|
|
|
212
221
|
}
|
|
213
222
|
// Deprecated; use min_load_factor() or max_load_factor() instead.
|
|
214
223
|
void set_resizing_parameters(float shrink, float grow) {
|
|
215
|
-
|
|
224
|
+
rep.set_resizing_parameters(shrink, grow);
|
|
216
225
|
}
|
|
217
226
|
|
|
218
227
|
void resize(size_type hint) { rep.resize(hint); }
|
|
@@ -245,6 +254,7 @@ class sparse_hash_set {
|
|
|
245
254
|
// time goes on, or get rid of it entirely to be insert-only.
|
|
246
255
|
void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
|
|
247
256
|
void clear_deleted_key() { rep.clear_deleted_key(); }
|
|
257
|
+
key_type deleted_key() const { return rep.deleted_key(); }
|
|
248
258
|
|
|
249
259
|
// These are standard
|
|
250
260
|
size_type erase(const key_type& key) { return rep.erase(key); }
|
|
@@ -57,19 +57,19 @@
|
|
|
57
57
|
// <google/dense_hash_map> or <google/dense_hash_set> instead.
|
|
58
58
|
|
|
59
59
|
// You can change the following below:
|
|
60
|
-
//
|
|
61
|
-
//
|
|
60
|
+
// HT_OCCUPANCY_PCT -- how full before we double size
|
|
61
|
+
// HT_EMPTY_PCT -- how empty before we halve size
|
|
62
62
|
// HT_MIN_BUCKETS -- default smallest bucket size
|
|
63
63
|
//
|
|
64
|
-
// You can also change
|
|
65
|
-
//
|
|
66
|
-
//
|
|
64
|
+
// You can also change enlarge_factor (which defaults to
|
|
65
|
+
// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
|
|
66
|
+
// HT_EMPTY_PCT) with set_resizing_parameters().
|
|
67
67
|
//
|
|
68
68
|
// How to decide what values to use?
|
|
69
|
-
//
|
|
69
|
+
// shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
|
|
70
70
|
// HT_MIN_BUCKETS is probably unnecessary since you can specify
|
|
71
71
|
// (indirectly) the starting number of buckets at construct-time.
|
|
72
|
-
// For
|
|
72
|
+
// For enlarge_factor, you can use this chart to try to trade-off
|
|
73
73
|
// expected lookup time to the space taken up. By default, this
|
|
74
74
|
// code uses quadratic probing, though you can change it to linear
|
|
75
75
|
// via _JUMP below if you really want to.
|
|
@@ -79,7 +79,7 @@
|
|
|
79
79
|
// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
|
|
80
80
|
// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
|
|
81
81
|
//
|
|
82
|
-
// --
|
|
82
|
+
// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
|
|
83
83
|
// QUADRATIC COLLISION RES.
|
|
84
84
|
// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
|
|
85
85
|
// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
|
|
@@ -93,19 +93,23 @@
|
|
|
93
93
|
// The probing method
|
|
94
94
|
// Linear probing
|
|
95
95
|
// #define JUMP_(key, num_probes) ( 1 )
|
|
96
|
-
// Quadratic
|
|
96
|
+
// Quadratic probing
|
|
97
97
|
#define JUMP_(key, num_probes) ( num_probes )
|
|
98
98
|
|
|
99
99
|
|
|
100
100
|
#include <google/sparsehash/sparseconfig.h>
|
|
101
|
-
#include <assert.h>
|
|
102
101
|
#include <stdio.h>
|
|
102
|
+
#include <assert.h>
|
|
103
103
|
#include <stdlib.h> // for abort()
|
|
104
104
|
#include <algorithm> // For swap(), eg
|
|
105
|
+
#include <stdexcept> // For length_error
|
|
105
106
|
#include <iostream> // For cerr
|
|
106
107
|
#include <memory> // For uninitialized_fill, uninitialized_copy
|
|
107
108
|
#include <utility> // for pair<>
|
|
108
109
|
#include <iterator> // for facts about iterator tags
|
|
110
|
+
#include <limits> // for numeric_limits<>
|
|
111
|
+
#include <google/sparsehash/libc_allocator_with_realloc.h>
|
|
112
|
+
#include <google/sparsehash/hashtable-common.h>
|
|
109
113
|
#include <google/type_traits.h> // for true_type, integral_constant, etc.
|
|
110
114
|
|
|
111
115
|
_START_GOOGLE_NAMESPACE_
|
|
@@ -125,7 +129,7 @@ using STL_NAMESPACE::pair;
|
|
|
125
129
|
// with key == deleted_key or key == empty_key.
|
|
126
130
|
// EqualKey: Given two Keys, says whether they are the same (that is,
|
|
127
131
|
// if they are both associated with the same Value).
|
|
128
|
-
// Alloc: STL allocator to use to allocate memory.
|
|
132
|
+
// Alloc: STL allocator to use to allocate memory.
|
|
129
133
|
|
|
130
134
|
template <class Value, class Key, class HashFcn,
|
|
131
135
|
class ExtractKey, class SetKey, class EqualKey, class Alloc>
|
|
@@ -140,16 +144,19 @@ struct dense_hashtable_const_iterator;
|
|
|
140
144
|
// We're just an array, but we need to skip over empty and deleted elements
|
|
141
145
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
142
146
|
struct dense_hashtable_iterator {
|
|
147
|
+
private:
|
|
148
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
|
149
|
+
|
|
143
150
|
public:
|
|
144
151
|
typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
|
145
152
|
typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
|
|
146
153
|
|
|
147
154
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
|
148
155
|
typedef V value_type;
|
|
149
|
-
typedef
|
|
150
|
-
typedef
|
|
151
|
-
typedef
|
|
152
|
-
typedef
|
|
156
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
157
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
158
|
+
typedef typename value_alloc_type::reference reference;
|
|
159
|
+
typedef typename value_alloc_type::pointer pointer;
|
|
153
160
|
|
|
154
161
|
// "Real" constructor and default constructor
|
|
155
162
|
dense_hashtable_iterator(const dense_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
|
|
@@ -190,16 +197,19 @@ struct dense_hashtable_iterator {
|
|
|
190
197
|
// Now do it all again, but with const-ness!
|
|
191
198
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
192
199
|
struct dense_hashtable_const_iterator {
|
|
200
|
+
private:
|
|
201
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
|
202
|
+
|
|
193
203
|
public:
|
|
194
204
|
typedef dense_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
|
195
205
|
typedef dense_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
|
|
196
206
|
|
|
197
207
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
|
198
208
|
typedef V value_type;
|
|
199
|
-
typedef
|
|
200
|
-
typedef
|
|
201
|
-
typedef
|
|
202
|
-
typedef
|
|
209
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
210
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
211
|
+
typedef typename value_alloc_type::const_reference reference;
|
|
212
|
+
typedef typename value_alloc_type::const_pointer pointer;
|
|
203
213
|
|
|
204
214
|
// "Real" constructor and default constructor
|
|
205
215
|
dense_hashtable_const_iterator(
|
|
@@ -243,18 +253,22 @@ struct dense_hashtable_const_iterator {
|
|
|
243
253
|
template <class Value, class Key, class HashFcn,
|
|
244
254
|
class ExtractKey, class SetKey, class EqualKey, class Alloc>
|
|
245
255
|
class dense_hashtable {
|
|
256
|
+
private:
|
|
257
|
+
typedef typename Alloc::template rebind<Value>::other value_alloc_type;
|
|
258
|
+
|
|
246
259
|
public:
|
|
247
260
|
typedef Key key_type;
|
|
248
261
|
typedef Value value_type;
|
|
249
262
|
typedef HashFcn hasher;
|
|
250
263
|
typedef EqualKey key_equal;
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
typedef
|
|
254
|
-
typedef
|
|
255
|
-
typedef
|
|
256
|
-
typedef
|
|
257
|
-
typedef
|
|
264
|
+
typedef Alloc allocator_type;
|
|
265
|
+
|
|
266
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
267
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
268
|
+
typedef typename value_alloc_type::reference reference;
|
|
269
|
+
typedef typename value_alloc_type::const_reference const_reference;
|
|
270
|
+
typedef typename value_alloc_type::pointer pointer;
|
|
271
|
+
typedef typename value_alloc_type::const_pointer const_pointer;
|
|
258
272
|
typedef dense_hashtable_iterator<Value, Key, HashFcn,
|
|
259
273
|
ExtractKey, SetKey, EqualKey, Alloc>
|
|
260
274
|
iterator;
|
|
@@ -270,24 +284,23 @@ class dense_hashtable {
|
|
|
270
284
|
// How full we let the table get before we resize, by default.
|
|
271
285
|
// Knuth says .8 is good -- higher causes us to probe too much,
|
|
272
286
|
// though it saves memory.
|
|
273
|
-
static const
|
|
287
|
+
static const int HT_OCCUPANCY_PCT; // = 50 (out of 100)
|
|
274
288
|
|
|
275
289
|
// How empty we let the table get before we resize lower, by default.
|
|
276
290
|
// (0.0 means never resize lower.)
|
|
277
|
-
// It should be less than
|
|
278
|
-
static const
|
|
291
|
+
// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
|
|
292
|
+
static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
|
|
279
293
|
|
|
280
294
|
// Minimum size we're willing to let hashtables be.
|
|
281
295
|
// Must be a power of two, and at least 4.
|
|
282
296
|
// Note, however, that for a given hashtable, the initial size is a
|
|
283
297
|
// function of the first constructor arg, and may be >HT_MIN_BUCKETS.
|
|
284
|
-
static const
|
|
298
|
+
static const size_type HT_MIN_BUCKETS = 4;
|
|
285
299
|
|
|
286
300
|
// By default, if you don't specify a hashtable size at
|
|
287
301
|
// construction-time, we use this size. Must be a power of two, and
|
|
288
302
|
// at least HT_MIN_BUCKETS.
|
|
289
|
-
static const
|
|
290
|
-
|
|
303
|
+
static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
|
|
291
304
|
|
|
292
305
|
// ITERATOR FUNCTIONS
|
|
293
306
|
iterator begin() { return iterator(this, table,
|
|
@@ -300,10 +313,7 @@ class dense_hashtable {
|
|
|
300
313
|
table+num_buckets,true);}
|
|
301
314
|
|
|
302
315
|
// These come from tr1 unordered_map. They iterate over 'bucket' n.
|
|
303
|
-
//
|
|
304
|
-
// I guess, but I don't really see the point. We'll just consider
|
|
305
|
-
// bucket n to be the n-th element of the sparsetable, if it's occupied,
|
|
306
|
-
// or some empty element, otherwise.
|
|
316
|
+
// We'll just consider bucket n to be the n-th element of the table.
|
|
307
317
|
local_iterator begin(size_type i) {
|
|
308
318
|
return local_iterator(this, table + i, table + i+1, false);
|
|
309
319
|
}
|
|
@@ -324,16 +334,20 @@ class dense_hashtable {
|
|
|
324
334
|
}
|
|
325
335
|
|
|
326
336
|
// ACCESSOR FUNCTIONS for the things we templatize on, basically
|
|
327
|
-
hasher hash_funct() const
|
|
328
|
-
key_equal key_eq() const
|
|
337
|
+
hasher hash_funct() const { return settings; }
|
|
338
|
+
key_equal key_eq() const { return key_info; }
|
|
339
|
+
allocator_type get_allocator() const { return allocator; }
|
|
340
|
+
|
|
341
|
+
// Accessor function for statistics gathering.
|
|
342
|
+
int num_table_copies() const { return settings.num_ht_copies(); }
|
|
329
343
|
|
|
330
344
|
private:
|
|
331
345
|
// Annoyingly, we can't copy values around, because they might have
|
|
332
346
|
// const components (they're probably pair<const X, Y>). We use
|
|
333
347
|
// explicit destructor invocation and placement new to get around
|
|
334
348
|
// this. Arg.
|
|
335
|
-
void set_value(
|
|
336
|
-
dst->~value_type();
|
|
349
|
+
void set_value(pointer dst, const_reference src) {
|
|
350
|
+
dst->~value_type(); // delete the old value, if any
|
|
337
351
|
new(dst) value_type(src);
|
|
338
352
|
}
|
|
339
353
|
|
|
@@ -357,50 +371,77 @@ class dense_hashtable {
|
|
|
357
371
|
assert(num_deleted == 0);
|
|
358
372
|
}
|
|
359
373
|
|
|
374
|
+
bool test_deleted_key(const key_type& key) const {
|
|
375
|
+
// The num_deleted test is crucial for read(): after read(), the ht values
|
|
376
|
+
// are garbage, and we don't want to think some of them are deleted.
|
|
377
|
+
// Invariant: !use_deleted implies num_deleted is 0.
|
|
378
|
+
assert(settings.use_deleted() || num_deleted == 0);
|
|
379
|
+
return num_deleted > 0 && equals(key_info.delkey, key);
|
|
380
|
+
}
|
|
381
|
+
|
|
360
382
|
public:
|
|
361
383
|
void set_deleted_key(const key_type &key) {
|
|
362
384
|
// the empty indicator (if specified) and the deleted indicator
|
|
363
385
|
// must be different
|
|
364
|
-
assert(!use_empty || !equals(key, get_key(emptyval)))
|
|
386
|
+
assert((!settings.use_empty() || !equals(key, get_key(emptyval)))
|
|
387
|
+
&& "Passed the empty-key to set_deleted_key");
|
|
365
388
|
// It's only safe to change what "deleted" means if we purge deleted guys
|
|
366
389
|
squash_deleted();
|
|
367
|
-
|
|
368
|
-
delkey = key;
|
|
390
|
+
settings.set_use_deleted(true);
|
|
391
|
+
key_info.delkey = key;
|
|
369
392
|
}
|
|
370
393
|
void clear_deleted_key() {
|
|
371
394
|
squash_deleted();
|
|
372
|
-
|
|
395
|
+
settings.set_use_deleted(false);
|
|
396
|
+
}
|
|
397
|
+
key_type deleted_key() const {
|
|
398
|
+
assert(settings.use_deleted()
|
|
399
|
+
&& "Must set deleted key before calling deleted_key");
|
|
400
|
+
return key_info.delkey;
|
|
373
401
|
}
|
|
374
402
|
|
|
375
403
|
// These are public so the iterators can use them
|
|
376
404
|
// True if the item at position bucknum is "deleted" marker
|
|
377
405
|
bool test_deleted(size_type bucknum) const {
|
|
378
|
-
|
|
379
|
-
// are garbage, and we don't want to think some of them are deleted.
|
|
380
|
-
return (use_deleted && num_deleted > 0 &&
|
|
381
|
-
equals(delkey, get_key(table[bucknum])));
|
|
406
|
+
return test_deleted_key(get_key(table[bucknum]));
|
|
382
407
|
}
|
|
383
408
|
bool test_deleted(const iterator &it) const {
|
|
384
|
-
return (
|
|
385
|
-
equals(delkey, get_key(*it)));
|
|
409
|
+
return test_deleted_key(get_key(*it));
|
|
386
410
|
}
|
|
387
411
|
bool test_deleted(const const_iterator &it) const {
|
|
388
|
-
return (
|
|
389
|
-
|
|
412
|
+
return test_deleted_key(get_key(*it));
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
private:
|
|
416
|
+
// Set it so test_deleted is true. true if object didn't used to be deleted.
|
|
417
|
+
bool set_deleted(iterator &it) {
|
|
418
|
+
assert(settings.use_deleted());
|
|
419
|
+
bool retval = !test_deleted(it);
|
|
420
|
+
// &* converts from iterator to value-type.
|
|
421
|
+
set_key(&(*it), key_info.delkey);
|
|
422
|
+
return retval;
|
|
390
423
|
}
|
|
391
|
-
// Set it so test_deleted is
|
|
392
|
-
|
|
424
|
+
// Set it so test_deleted is false. true if object used to be deleted.
|
|
425
|
+
bool clear_deleted(iterator &it) {
|
|
426
|
+
assert(settings.use_deleted());
|
|
427
|
+
// Happens automatically when we assign something else in its place.
|
|
428
|
+
return test_deleted(it);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// We also allow to set/clear the deleted bit on a const iterator.
|
|
432
|
+
// We allow a const_iterator for the same reason you can delete a
|
|
433
|
+
// const pointer: it's convenient, and semantically you can't use
|
|
434
|
+
// 'it' after it's been deleted anyway, so its const-ness doesn't
|
|
435
|
+
// really matter.
|
|
393
436
|
bool set_deleted(const_iterator &it) {
|
|
394
|
-
assert(use_deleted);
|
|
437
|
+
assert(settings.use_deleted());
|
|
395
438
|
bool retval = !test_deleted(it);
|
|
396
|
-
|
|
397
|
-
set_key(const_cast<value_type*>(&(*it)), delkey);
|
|
439
|
+
set_key(const_cast<pointer>(&(*it)), key_info.delkey);
|
|
398
440
|
return retval;
|
|
399
441
|
}
|
|
400
|
-
// Set it so test_deleted is false. true if object used to be deleted
|
|
442
|
+
// Set it so test_deleted is false. true if object used to be deleted.
|
|
401
443
|
bool clear_deleted(const_iterator &it) {
|
|
402
|
-
assert(use_deleted);
|
|
403
|
-
// happens automatically when we assign something else in its place
|
|
444
|
+
assert(settings.use_deleted());
|
|
404
445
|
return test_deleted(it);
|
|
405
446
|
}
|
|
406
447
|
|
|
@@ -414,58 +455,52 @@ class dense_hashtable {
|
|
|
414
455
|
// These are public so the iterators can use them
|
|
415
456
|
// True if the item at position bucknum is "empty" marker
|
|
416
457
|
bool test_empty(size_type bucknum) const {
|
|
417
|
-
assert(use_empty);
|
|
458
|
+
assert(settings.use_empty()); // we always need to know what's empty!
|
|
418
459
|
return equals(get_key(emptyval), get_key(table[bucknum]));
|
|
419
460
|
}
|
|
420
461
|
bool test_empty(const iterator &it) const {
|
|
421
|
-
assert(use_empty);
|
|
462
|
+
assert(settings.use_empty()); // we always need to know what's empty!
|
|
422
463
|
return equals(get_key(emptyval), get_key(*it));
|
|
423
464
|
}
|
|
424
465
|
bool test_empty(const const_iterator &it) const {
|
|
425
|
-
assert(use_empty);
|
|
466
|
+
assert(settings.use_empty()); // we always need to know what's empty!
|
|
426
467
|
return equals(get_key(emptyval), get_key(*it));
|
|
427
468
|
}
|
|
428
469
|
|
|
429
470
|
private:
|
|
430
|
-
|
|
431
|
-
void set_empty(size_type bucknum) {
|
|
432
|
-
assert(use_empty);
|
|
433
|
-
set_value(&table[bucknum], emptyval);
|
|
434
|
-
}
|
|
435
|
-
void fill_range_with_empty(value_type* table_start, value_type* table_end) {
|
|
436
|
-
// Like set_empty(range), but doesn't destroy previous contents
|
|
471
|
+
void fill_range_with_empty(pointer table_start, pointer table_end) {
|
|
437
472
|
STL_NAMESPACE::uninitialized_fill(table_start, table_end, emptyval);
|
|
438
473
|
}
|
|
439
|
-
void set_empty(size_type buckstart, size_type buckend) {
|
|
440
|
-
assert(use_empty);
|
|
441
|
-
destroy_buckets(buckstart, buckend);
|
|
442
|
-
fill_range_with_empty(table + buckstart, table + buckend);
|
|
443
|
-
}
|
|
444
474
|
|
|
445
475
|
public:
|
|
446
476
|
// TODO(csilvers): change all callers of this to pass in a key instead,
|
|
447
477
|
// and take a const key_type instead of const value_type.
|
|
448
|
-
void set_empty_key(
|
|
478
|
+
void set_empty_key(const_reference val) {
|
|
449
479
|
// Once you set the empty key, you can't change it
|
|
450
|
-
assert(!use_empty);
|
|
480
|
+
assert(!settings.use_empty() && "Calling set_empty_key multiple times");
|
|
451
481
|
// The deleted indicator (if specified) and the empty indicator
|
|
452
482
|
// must be different.
|
|
453
|
-
assert(!use_deleted || !equals(get_key(val), delkey))
|
|
454
|
-
|
|
483
|
+
assert((!settings.use_deleted() || !equals(get_key(val), key_info.delkey))
|
|
484
|
+
&& "Setting the empty key the same as the deleted key");
|
|
485
|
+
settings.set_use_empty(true);
|
|
455
486
|
set_value(&emptyval, val);
|
|
456
487
|
|
|
457
488
|
assert(!table); // must set before first use
|
|
458
489
|
// num_buckets was set in constructor even though table was NULL
|
|
459
|
-
table = (
|
|
490
|
+
table = allocator.allocate(num_buckets);
|
|
460
491
|
assert(table);
|
|
461
492
|
fill_range_with_empty(table, table + num_buckets);
|
|
462
493
|
}
|
|
494
|
+
// TODO(sjackman): return a key_type rather than a value_type
|
|
495
|
+
value_type empty_key() const {
|
|
496
|
+
assert(settings.use_empty());
|
|
497
|
+
return emptyval;
|
|
498
|
+
}
|
|
463
499
|
|
|
464
500
|
// FUNCTIONS CONCERNING SIZE
|
|
465
501
|
public:
|
|
466
502
|
size_type size() const { return num_elements - num_deleted; }
|
|
467
|
-
|
|
468
|
-
size_type max_size() const { return (size_type(-1) >> 1U) + 1; }
|
|
503
|
+
size_type max_size() const { return allocator.max_size(); }
|
|
469
504
|
bool empty() const { return size() == 0; }
|
|
470
505
|
size_type bucket_count() const { return num_buckets; }
|
|
471
506
|
size_type max_bucket_count() const { return max_size(); }
|
|
@@ -476,54 +511,56 @@ class dense_hashtable {
|
|
|
476
511
|
return begin(i) == end(i) ? 0 : 1;
|
|
477
512
|
}
|
|
478
513
|
|
|
479
|
-
|
|
480
|
-
|
|
481
514
|
private:
|
|
482
515
|
// Because of the above, size_type(-1) is never legal; use it for errors
|
|
483
516
|
static const size_type ILLEGAL_BUCKET = size_type(-1);
|
|
484
517
|
|
|
485
|
-
|
|
486
|
-
//
|
|
487
|
-
//
|
|
488
|
-
|
|
489
|
-
size_type sz = HT_MIN_BUCKETS; // min buckets allowed
|
|
490
|
-
while ( sz < min_buckets_wanted || num_elts >= sz * enlarge_resize_percent )
|
|
491
|
-
sz *= 2;
|
|
492
|
-
return sz;
|
|
493
|
-
}
|
|
494
|
-
|
|
495
|
-
// Used after a string of deletes
|
|
496
|
-
void maybe_shrink() {
|
|
518
|
+
// Used after a string of deletes. Returns true if we actually shrunk.
|
|
519
|
+
// TODO(csilvers): take a delta so we can take into account inserts
|
|
520
|
+
// done after shrinking. Maybe make part of the Settings class?
|
|
521
|
+
bool maybe_shrink() {
|
|
497
522
|
assert(num_elements >= num_deleted);
|
|
498
523
|
assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
|
|
499
524
|
assert(bucket_count() >= HT_MIN_BUCKETS);
|
|
525
|
+
bool retval = false;
|
|
500
526
|
|
|
501
527
|
// If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
|
|
502
528
|
// we'll never shrink until you get relatively big, and we'll never
|
|
503
529
|
// shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
|
|
504
530
|
// like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
|
|
505
531
|
// shrink us down to HT_MIN_BUCKETS buckets, which is too small.
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
532
|
+
const size_type num_remain = num_elements - num_deleted;
|
|
533
|
+
const size_type shrink_threshold = settings.shrink_threshold();
|
|
534
|
+
if (shrink_threshold > 0 && num_remain < shrink_threshold &&
|
|
535
|
+
bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
|
|
536
|
+
const float shrink_factor = settings.shrink_factor();
|
|
509
537
|
size_type sz = bucket_count() / 2; // find how much we should shrink
|
|
510
|
-
while (
|
|
511
|
-
|
|
538
|
+
while (sz > HT_DEFAULT_STARTING_BUCKETS &&
|
|
539
|
+
num_remain < sz * shrink_factor) {
|
|
512
540
|
sz /= 2; // stay a power of 2
|
|
541
|
+
}
|
|
513
542
|
dense_hashtable tmp(*this, sz); // Do the actual resizing
|
|
514
543
|
swap(tmp); // now we are tmp
|
|
544
|
+
retval = true;
|
|
515
545
|
}
|
|
516
|
-
|
|
546
|
+
settings.set_consider_shrink(false); // because we just considered it
|
|
547
|
+
return retval;
|
|
517
548
|
}
|
|
518
549
|
|
|
519
550
|
// We'll let you resize a hashtable -- though this makes us copy all!
|
|
520
551
|
// When you resize, you say, "make it big enough for this many more elements"
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
if (
|
|
525
|
-
|
|
526
|
-
|
|
552
|
+
// Returns true if we actually resized, false if size was already ok.
|
|
553
|
+
bool resize_delta(size_type delta) {
|
|
554
|
+
bool did_resize = false;
|
|
555
|
+
if ( settings.consider_shrink() ) { // see if lots of deletes happened
|
|
556
|
+
if ( maybe_shrink() )
|
|
557
|
+
did_resize = true;
|
|
558
|
+
}
|
|
559
|
+
if (num_elements >= (STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
|
|
560
|
+
throw std::length_error("resize overflow");
|
|
561
|
+
if ( bucket_count() >= HT_MIN_BUCKETS &&
|
|
562
|
+
(num_elements + delta) <= settings.enlarge_threshold() )
|
|
563
|
+
return did_resize; // we're ok as we are
|
|
527
564
|
|
|
528
565
|
// Sometimes, we need to resize just to get rid of all the
|
|
529
566
|
// "deleted" buckets that are clogging up the hashtable. So when
|
|
@@ -531,56 +568,48 @@ class dense_hashtable {
|
|
|
531
568
|
// are currently taking up room). But later, when we decide what
|
|
532
569
|
// size to resize to, *don't* count deleted buckets, since they
|
|
533
570
|
// get discarded during the resize.
|
|
534
|
-
const size_type needed_size =
|
|
535
|
-
if ( needed_size
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
571
|
+
const size_type needed_size = settings.min_buckets(num_elements + delta, 0);
|
|
572
|
+
if ( needed_size <= bucket_count() ) // we have enough buckets
|
|
573
|
+
return did_resize;
|
|
574
|
+
|
|
575
|
+
size_type resize_to =
|
|
576
|
+
settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
|
|
577
|
+
|
|
578
|
+
if (resize_to < needed_size && // may double resize_to
|
|
579
|
+
resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
|
|
580
|
+
// This situation means that we have enough deleted elements,
|
|
581
|
+
// that once we purge them, we won't actually have needed to
|
|
582
|
+
// grow. But we may want to grow anyway: if we just purge one
|
|
583
|
+
// element, say, we'll have to grow anyway next time we
|
|
584
|
+
// insert. Might as well grow now, since we're already going
|
|
585
|
+
// through the trouble of copying (in order to purge the
|
|
586
|
+
// deleted elements).
|
|
587
|
+
const size_type target =
|
|
588
|
+
static_cast<size_type>(settings.shrink_size(resize_to*2));
|
|
589
|
+
if (num_elements - num_deleted + delta >= target) {
|
|
590
|
+
// Good, we won't be below the shrink threshhold even if we double.
|
|
591
|
+
resize_to *= 2;
|
|
592
|
+
}
|
|
540
593
|
}
|
|
594
|
+
dense_hashtable tmp(*this, resize_to);
|
|
595
|
+
swap(tmp); // now we are tmp
|
|
596
|
+
return true;
|
|
541
597
|
}
|
|
542
598
|
|
|
543
|
-
//
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
// equivalent to "x.~T(); new(x) T(y);" which is pretty much
|
|
548
|
-
// correct, if a bit conservative.)
|
|
549
|
-
void expand_array(size_t resize_to, true_type) {
|
|
550
|
-
table = (value_type *) realloc(table, resize_to * sizeof(value_type));
|
|
551
|
-
assert(table);
|
|
552
|
-
fill_range_with_empty(table + num_buckets, table + resize_to);
|
|
599
|
+
// We require table be not-NULL and empty before calling this.
|
|
600
|
+
void resize_table(size_type /*old_size*/, size_type new_size,
|
|
601
|
+
true_type) {
|
|
602
|
+
table = allocator.realloc_or_die(table, new_size);
|
|
553
603
|
}
|
|
554
604
|
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
void expand_array(size_t resize_to, false_type) {
|
|
559
|
-
value_type* new_table =
|
|
560
|
-
(value_type *) malloc(resize_to * sizeof(value_type));
|
|
561
|
-
assert(new_table);
|
|
562
|
-
STL_NAMESPACE::uninitialized_copy(table, table + num_buckets, new_table);
|
|
563
|
-
fill_range_with_empty(new_table + num_buckets, new_table + resize_to);
|
|
564
|
-
destroy_buckets(0, num_buckets);
|
|
565
|
-
free(table);
|
|
566
|
-
table = new_table;
|
|
605
|
+
void resize_table(size_type old_size, size_type new_size, false_type) {
|
|
606
|
+
allocator.deallocate(table, old_size);
|
|
607
|
+
table = allocator.allocate(new_size);
|
|
567
608
|
}
|
|
568
609
|
|
|
569
610
|
// Used to actually do the rehashing when we grow/shrink a hashtable
|
|
570
611
|
void copy_from(const dense_hashtable &ht, size_type min_buckets_wanted) {
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
// If we need to change the size of our table, do it now
|
|
574
|
-
const size_type resize_to = min_size(ht.size(), min_buckets_wanted);
|
|
575
|
-
if ( resize_to > bucket_count() ) { // we don't have enough buckets
|
|
576
|
-
typedef integral_constant<bool,
|
|
577
|
-
(has_trivial_copy<value_type>::value &&
|
|
578
|
-
has_trivial_destructor<value_type>::value)>
|
|
579
|
-
realloc_ok; // we pretend mv(x,y) == "x.~T(); new(x) T(y)"
|
|
580
|
-
expand_array(resize_to, realloc_ok());
|
|
581
|
-
num_buckets = resize_to;
|
|
582
|
-
reset_thresholds();
|
|
583
|
-
}
|
|
612
|
+
clear_to_size(settings.min_buckets(ht.size(), min_buckets_wanted));
|
|
584
613
|
|
|
585
614
|
// We use a normal iterator to get non-deleted bcks from ht
|
|
586
615
|
// We could use insert() here, but since we know there are
|
|
@@ -594,41 +623,38 @@ class dense_hashtable {
|
|
|
594
623
|
!test_empty(bucknum); // not empty
|
|
595
624
|
bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
|
|
596
625
|
++num_probes;
|
|
597
|
-
assert(num_probes < bucket_count()
|
|
626
|
+
assert(num_probes < bucket_count()
|
|
627
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
|
598
628
|
}
|
|
599
629
|
set_value(&table[bucknum], *it); // copies the value to here
|
|
600
630
|
num_elements++;
|
|
601
631
|
}
|
|
632
|
+
settings.inc_num_ht_copies();
|
|
602
633
|
}
|
|
603
634
|
|
|
604
635
|
// Required by the spec for hashed associative container
|
|
605
636
|
public:
|
|
606
637
|
// Though the docs say this should be num_buckets, I think it's much
|
|
607
|
-
// more useful as
|
|
638
|
+
// more useful as num_elements. As a special feature, calling with
|
|
608
639
|
// req_elements==0 will cause us to shrink if we can, saving space.
|
|
609
640
|
void resize(size_type req_elements) { // resize to this or larger
|
|
610
|
-
if ( consider_shrink || req_elements == 0 )
|
|
641
|
+
if ( settings.consider_shrink() || req_elements == 0 )
|
|
611
642
|
maybe_shrink();
|
|
612
643
|
if ( req_elements > num_elements )
|
|
613
|
-
|
|
644
|
+
resize_delta(req_elements - num_elements);
|
|
614
645
|
}
|
|
615
646
|
|
|
616
|
-
// Get and change the value of
|
|
617
|
-
//
|
|
618
|
-
//
|
|
619
|
-
//
|
|
647
|
+
// Get and change the value of shrink_factor and enlarge_factor. The
|
|
648
|
+
// description at the beginning of this file explains how to choose
|
|
649
|
+
// the values. Setting the shrink parameter to 0.0 ensures that the
|
|
650
|
+
// table never shrinks.
|
|
620
651
|
void get_resizing_parameters(float* shrink, float* grow) const {
|
|
621
|
-
*shrink =
|
|
622
|
-
*grow =
|
|
652
|
+
*shrink = settings.shrink_factor();
|
|
653
|
+
*grow = settings.enlarge_factor();
|
|
623
654
|
}
|
|
624
655
|
void set_resizing_parameters(float shrink, float grow) {
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
if (shrink > grow/2.0f)
|
|
628
|
-
shrink = grow / 2.0f; // otherwise we thrash hashtable size
|
|
629
|
-
shrink_resize_percent = shrink;
|
|
630
|
-
enlarge_resize_percent = grow;
|
|
631
|
-
reset_thresholds();
|
|
656
|
+
settings.set_resizing_parameters(shrink, grow);
|
|
657
|
+
settings.reset_thresholds(bucket_count());
|
|
632
658
|
}
|
|
633
659
|
|
|
634
660
|
// CONSTRUCTORS -- as required by the specs, we take a size,
|
|
@@ -639,105 +665,133 @@ class dense_hashtable {
|
|
|
639
665
|
const HashFcn& hf = HashFcn(),
|
|
640
666
|
const EqualKey& eql = EqualKey(),
|
|
641
667
|
const ExtractKey& ext = ExtractKey(),
|
|
642
|
-
const SetKey& set = SetKey()
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
668
|
+
const SetKey& set = SetKey(),
|
|
669
|
+
const Alloc& alloc = Alloc())
|
|
670
|
+
: settings(hf),
|
|
671
|
+
key_info(ext, set, eql),
|
|
672
|
+
allocator(alloc),
|
|
673
|
+
num_deleted(0),
|
|
674
|
+
num_elements(0),
|
|
675
|
+
num_buckets(expected_max_items_in_table == 0
|
|
676
|
+
? HT_DEFAULT_STARTING_BUCKETS
|
|
677
|
+
: settings.min_buckets(expected_max_items_in_table, 0)),
|
|
678
|
+
emptyval(),
|
|
679
|
+
table(NULL) {
|
|
651
680
|
// table is NULL until emptyval is set. However, we set num_buckets
|
|
652
681
|
// here so we know how much space to allocate once emptyval is set
|
|
653
|
-
reset_thresholds();
|
|
682
|
+
settings.reset_thresholds(bucket_count());
|
|
654
683
|
}
|
|
655
684
|
|
|
656
685
|
// As a convenience for resize(), we allow an optional second argument
|
|
657
686
|
// which lets you make this new hashtable a different size than ht
|
|
658
687
|
dense_hashtable(const dense_hashtable& ht,
|
|
659
688
|
size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
689
|
+
: settings(ht.settings),
|
|
690
|
+
key_info(ht.key_info),
|
|
691
|
+
allocator(ht.allocator),
|
|
692
|
+
num_deleted(0),
|
|
693
|
+
num_elements(0),
|
|
694
|
+
num_buckets(0),
|
|
695
|
+
emptyval(ht.emptyval),
|
|
696
|
+
table(NULL) {
|
|
697
|
+
if (!ht.settings.use_empty()) {
|
|
698
|
+
// If use_empty isn't set, copy_from will crash, so we do our own copying.
|
|
699
|
+
assert(ht.empty());
|
|
700
|
+
num_buckets = settings.min_buckets(ht.size(), min_buckets_wanted);
|
|
701
|
+
settings.reset_thresholds(bucket_count());
|
|
702
|
+
return;
|
|
703
|
+
}
|
|
704
|
+
settings.reset_thresholds(bucket_count());
|
|
668
705
|
copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
|
|
669
706
|
}
|
|
670
707
|
|
|
671
708
|
dense_hashtable& operator= (const dense_hashtable& ht) {
|
|
672
709
|
if (&ht == this) return *this; // don't copy onto ourselves
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
710
|
+
if (!ht.settings.use_empty()) {
|
|
711
|
+
assert(ht.empty());
|
|
712
|
+
dense_hashtable empty_table(ht); // empty table with ht's thresholds
|
|
713
|
+
this->swap(empty_table);
|
|
714
|
+
return *this;
|
|
715
|
+
}
|
|
716
|
+
settings = ht.settings;
|
|
717
|
+
key_info = ht.key_info;
|
|
681
718
|
set_value(&emptyval, ht.emptyval);
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
719
|
+
// copy_from() calls clear and sets num_deleted to 0 too
|
|
720
|
+
copy_from(ht, HT_MIN_BUCKETS);
|
|
721
|
+
// we purposefully don't copy the allocator, which may not be copyable
|
|
685
722
|
return *this;
|
|
686
723
|
}
|
|
687
724
|
|
|
688
725
|
~dense_hashtable() {
|
|
689
726
|
if (table) {
|
|
690
727
|
destroy_buckets(0, num_buckets);
|
|
691
|
-
|
|
728
|
+
allocator.deallocate(table, num_buckets);
|
|
692
729
|
}
|
|
693
730
|
}
|
|
694
731
|
|
|
695
732
|
// Many STL algorithms use swap instead of copy constructors
|
|
696
733
|
void swap(dense_hashtable& ht) {
|
|
697
|
-
STL_NAMESPACE::swap(
|
|
698
|
-
STL_NAMESPACE::swap(
|
|
699
|
-
STL_NAMESPACE::swap(get_key, ht.get_key);
|
|
700
|
-
STL_NAMESPACE::swap(set_key, ht.set_key);
|
|
734
|
+
STL_NAMESPACE::swap(settings, ht.settings);
|
|
735
|
+
STL_NAMESPACE::swap(key_info, ht.key_info);
|
|
701
736
|
STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
|
|
702
|
-
STL_NAMESPACE::swap(
|
|
703
|
-
STL_NAMESPACE::swap(
|
|
704
|
-
STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent);
|
|
705
|
-
STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent);
|
|
706
|
-
STL_NAMESPACE::swap(delkey, ht.delkey);
|
|
737
|
+
STL_NAMESPACE::swap(num_elements, ht.num_elements);
|
|
738
|
+
STL_NAMESPACE::swap(num_buckets, ht.num_buckets);
|
|
707
739
|
{ value_type tmp; // for annoying reasons, swap() doesn't work
|
|
708
740
|
set_value(&tmp, emptyval);
|
|
709
741
|
set_value(&emptyval, ht.emptyval);
|
|
710
742
|
set_value(&ht.emptyval, tmp);
|
|
711
743
|
}
|
|
712
744
|
STL_NAMESPACE::swap(table, ht.table);
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
ht.reset_thresholds();
|
|
745
|
+
settings.reset_thresholds(bucket_count()); // this also resets consider_shrink
|
|
746
|
+
ht.settings.reset_thresholds(bucket_count());
|
|
747
|
+
// we purposefully don't swap the allocator, which may not be swap-able
|
|
717
748
|
}
|
|
718
749
|
|
|
719
|
-
|
|
720
|
-
void
|
|
721
|
-
if (table)
|
|
750
|
+
private:
|
|
751
|
+
void clear_to_size(size_type new_num_buckets) {
|
|
752
|
+
if (!table) {
|
|
753
|
+
table = allocator.allocate(new_num_buckets);
|
|
754
|
+
} else {
|
|
722
755
|
destroy_buckets(0, num_buckets);
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
756
|
+
if (new_num_buckets != num_buckets) { // resize, if necessary
|
|
757
|
+
typedef integral_constant<bool,
|
|
758
|
+
is_same<value_alloc_type,
|
|
759
|
+
libc_allocator_with_realloc<value_type> >::value>
|
|
760
|
+
realloc_ok;
|
|
761
|
+
resize_table(num_buckets, new_num_buckets, realloc_ok());
|
|
762
|
+
}
|
|
763
|
+
}
|
|
726
764
|
assert(table);
|
|
727
|
-
fill_range_with_empty(table, table +
|
|
765
|
+
fill_range_with_empty(table, table + new_num_buckets);
|
|
728
766
|
num_elements = 0;
|
|
729
767
|
num_deleted = 0;
|
|
768
|
+
num_buckets = new_num_buckets; // our new size
|
|
769
|
+
settings.reset_thresholds(bucket_count());
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
public:
|
|
773
|
+
// It's always nice to be able to clear a table without deallocating it
|
|
774
|
+
void clear() {
|
|
775
|
+
// If the table is already empty, and the number of buckets is
|
|
776
|
+
// already as we desire, there's nothing to do.
|
|
777
|
+
const size_type new_num_buckets = settings.min_buckets(0, 0);
|
|
778
|
+
if (num_elements == 0 && new_num_buckets == num_buckets) {
|
|
779
|
+
return;
|
|
780
|
+
}
|
|
781
|
+
clear_to_size(new_num_buckets);
|
|
730
782
|
}
|
|
731
783
|
|
|
732
784
|
// Clear the table without resizing it.
|
|
733
785
|
// Mimicks the stl_hashtable's behaviour when clear()-ing in that it
|
|
734
786
|
// does not modify the bucket count
|
|
735
787
|
void clear_no_resize() {
|
|
736
|
-
if (
|
|
737
|
-
|
|
788
|
+
if (num_elements > 0) {
|
|
789
|
+
assert(table);
|
|
790
|
+
destroy_buckets(0, num_buckets);
|
|
791
|
+
fill_range_with_empty(table, table + num_buckets);
|
|
738
792
|
}
|
|
739
793
|
// don't consider to shrink before another erase()
|
|
740
|
-
reset_thresholds();
|
|
794
|
+
settings.reset_thresholds(bucket_count());
|
|
741
795
|
num_elements = 0;
|
|
742
796
|
num_deleted = 0;
|
|
743
797
|
}
|
|
@@ -770,7 +824,8 @@ class dense_hashtable {
|
|
|
770
824
|
}
|
|
771
825
|
++num_probes; // we're doing another probe
|
|
772
826
|
bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
|
|
773
|
-
assert(num_probes < bucket_count()
|
|
827
|
+
assert(num_probes < bucket_count()
|
|
828
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
|
774
829
|
}
|
|
775
830
|
}
|
|
776
831
|
|
|
@@ -829,36 +884,63 @@ class dense_hashtable {
|
|
|
829
884
|
|
|
830
885
|
// INSERTION ROUTINES
|
|
831
886
|
private:
|
|
887
|
+
// Private method used by insert_noresize and find_or_insert.
|
|
888
|
+
iterator insert_at(const_reference obj, size_type pos) {
|
|
889
|
+
if (size() >= max_size())
|
|
890
|
+
throw std::length_error("insert overflow");
|
|
891
|
+
if ( test_deleted(pos) ) { // just replace if it's been del.
|
|
892
|
+
// shrug: shouldn't need to be const.
|
|
893
|
+
const_iterator delpos(this, table + pos, table + num_buckets, false);
|
|
894
|
+
clear_deleted(delpos);
|
|
895
|
+
assert( num_deleted > 0);
|
|
896
|
+
--num_deleted; // used to be, now it isn't
|
|
897
|
+
} else {
|
|
898
|
+
++num_elements; // replacing an empty bucket
|
|
899
|
+
}
|
|
900
|
+
set_value(&table[pos], obj);
|
|
901
|
+
return iterator(this, table + pos, table + num_buckets, false);
|
|
902
|
+
}
|
|
903
|
+
|
|
832
904
|
// If you know *this is big enough to hold obj, use this routine
|
|
833
|
-
pair<iterator, bool> insert_noresize(
|
|
905
|
+
pair<iterator, bool> insert_noresize(const_reference obj) {
|
|
834
906
|
// First, double-check we're not inserting delkey or emptyval
|
|
835
|
-
assert(!use_empty || !equals(get_key(obj), get_key(emptyval)))
|
|
836
|
-
|
|
907
|
+
assert((!settings.use_empty() || !equals(get_key(obj), get_key(emptyval)))
|
|
908
|
+
&& "Inserting the empty key");
|
|
909
|
+
assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
|
|
910
|
+
&& "Inserting the deleted key");
|
|
837
911
|
const pair<size_type,size_type> pos = find_position(get_key(obj));
|
|
838
912
|
if ( pos.first != ILLEGAL_BUCKET) { // object was already there
|
|
839
913
|
return pair<iterator,bool>(iterator(this, table + pos.first,
|
|
840
914
|
table + num_buckets, false),
|
|
841
915
|
false); // false: we didn't insert
|
|
842
916
|
} else { // pos.second says where to put it
|
|
843
|
-
|
|
844
|
-
const_iterator delpos(this, table + pos.second, // shrug:
|
|
845
|
-
table + num_buckets, false);// shouldn't need const
|
|
846
|
-
clear_deleted(delpos);
|
|
847
|
-
assert( num_deleted > 0);
|
|
848
|
-
--num_deleted; // used to be, now it isn't
|
|
849
|
-
} else {
|
|
850
|
-
++num_elements; // replacing an empty bucket
|
|
851
|
-
}
|
|
852
|
-
set_value(&table[pos.second], obj);
|
|
853
|
-
return pair<iterator,bool>(iterator(this, table + pos.second,
|
|
854
|
-
table + num_buckets, false),
|
|
855
|
-
true); // true: we did insert
|
|
917
|
+
return pair<iterator,bool>(insert_at(obj, pos.second), true);
|
|
856
918
|
}
|
|
857
919
|
}
|
|
858
920
|
|
|
921
|
+
// Specializations of insert(it, it) depending on the power of the iterator:
|
|
922
|
+
// (1) Iterator supports operator-, resize before inserting
|
|
923
|
+
template <class ForwardIterator>
|
|
924
|
+
void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
|
|
925
|
+
size_t dist = STL_NAMESPACE::distance(f, l);
|
|
926
|
+
if (dist >= (std::numeric_limits<size_type>::max)())
|
|
927
|
+
throw std::length_error("insert-range overflow");
|
|
928
|
+
resize_delta(static_cast<size_type>(dist));
|
|
929
|
+
for ( ; dist > 0; --dist, ++f) {
|
|
930
|
+
insert_noresize(*f);
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
// (2) Arbitrary iterator, can't tell how much to resize
|
|
935
|
+
template <class InputIterator>
|
|
936
|
+
void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
|
|
937
|
+
for ( ; f != l; ++f)
|
|
938
|
+
insert(*f);
|
|
939
|
+
}
|
|
940
|
+
|
|
859
941
|
public:
|
|
860
942
|
// This is the normal insert routine, used by the outside world
|
|
861
|
-
pair<iterator, bool> insert(
|
|
943
|
+
pair<iterator, bool> insert(const_reference obj) {
|
|
862
944
|
resize_delta(1); // adding an object, grow if need be
|
|
863
945
|
return insert_noresize(obj);
|
|
864
946
|
}
|
|
@@ -870,59 +952,80 @@ class dense_hashtable {
|
|
|
870
952
|
insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
|
|
871
953
|
}
|
|
872
954
|
|
|
873
|
-
//
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
955
|
+
// This is public only because dense_hash_map::operator[] uses it.
|
|
956
|
+
// It does the minimal amount of work to implement operator[].
|
|
957
|
+
template <class DataType>
|
|
958
|
+
DataType& find_or_insert(const key_type& key) {
|
|
959
|
+
// First, double-check we're not inserting emptykey or delkey
|
|
960
|
+
assert((!settings.use_empty() || !equals(key, get_key(emptyval)))
|
|
961
|
+
&& "Inserting the empty key");
|
|
962
|
+
assert((!settings.use_deleted() || !equals(key, key_info.delkey))
|
|
963
|
+
&& "Inserting the deleted key");
|
|
964
|
+
const pair<size_type,size_type> pos = find_position(key);
|
|
965
|
+
if ( pos.first != ILLEGAL_BUCKET) { // object was already there
|
|
966
|
+
return table[pos.first].second;
|
|
967
|
+
} else if (resize_delta(1)) { // needed to rehash to make room
|
|
968
|
+
// Since we resized, we can't use pos, so recalculate where to insert.
|
|
969
|
+
return insert_noresize(value_type(key, DataType())).first->second;
|
|
970
|
+
} else { // no need to rehash, insert right here
|
|
971
|
+
return insert_at(value_type(key, DataType()), pos.second)->second;
|
|
972
|
+
}
|
|
889
973
|
}
|
|
890
974
|
|
|
891
|
-
|
|
892
975
|
// DELETION ROUTINES
|
|
893
976
|
size_type erase(const key_type& key) {
|
|
894
|
-
// First, double-check we're not trying to erase delkey or emptyval
|
|
895
|
-
assert(!use_empty || !equals(key, get_key(emptyval)))
|
|
896
|
-
|
|
977
|
+
// First, double-check we're not trying to erase delkey or emptyval.
|
|
978
|
+
assert((!settings.use_empty() || !equals(key, get_key(emptyval)))
|
|
979
|
+
&& "Erasing the empty key");
|
|
980
|
+
assert((!settings.use_deleted() || !equals(key, key_info.delkey))
|
|
981
|
+
&& "Erasing the deleted key");
|
|
897
982
|
const_iterator pos = find(key); // shrug: shouldn't need to be const
|
|
898
983
|
if ( pos != end() ) {
|
|
899
984
|
assert(!test_deleted(pos)); // or find() shouldn't have returned it
|
|
900
985
|
set_deleted(pos);
|
|
901
986
|
++num_deleted;
|
|
902
|
-
|
|
987
|
+
settings.set_consider_shrink(true); // will think about shrink after next insert
|
|
903
988
|
return 1; // because we deleted one thing
|
|
904
989
|
} else {
|
|
905
990
|
return 0; // because we deleted nothing
|
|
906
991
|
}
|
|
907
992
|
}
|
|
908
993
|
|
|
909
|
-
//
|
|
910
|
-
|
|
911
|
-
// Since that's a moot issue for deleted keys, we allow const_iterators
|
|
912
|
-
void erase(const_iterator pos) {
|
|
994
|
+
// We return the iterator past the deleted item.
|
|
995
|
+
void erase(iterator pos) {
|
|
913
996
|
if ( pos == end() ) return; // sanity check
|
|
914
997
|
if ( set_deleted(pos) ) { // true if object has been newly deleted
|
|
915
998
|
++num_deleted;
|
|
916
|
-
|
|
999
|
+
settings.set_consider_shrink(true); // will think about shrink after next insert
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
void erase(iterator f, iterator l) {
|
|
1004
|
+
for ( ; f != l; ++f) {
|
|
1005
|
+
if ( set_deleted(f) ) // should always be true
|
|
1006
|
+
++num_deleted;
|
|
917
1007
|
}
|
|
1008
|
+
settings.set_consider_shrink(true); // will think about shrink after next insert
|
|
918
1009
|
}
|
|
919
1010
|
|
|
1011
|
+
// We allow you to erase a const_iterator just like we allow you to
|
|
1012
|
+
// erase an iterator. This is in parallel to 'delete': you can delete
|
|
1013
|
+
// a const pointer just like a non-const pointer. The logic is that
|
|
1014
|
+
// you can't use the object after it's erased anyway, so it doesn't matter
|
|
1015
|
+
// if it's const or not.
|
|
1016
|
+
void erase(const_iterator pos) {
|
|
1017
|
+
if ( pos == end() ) return; // sanity check
|
|
1018
|
+
if ( set_deleted(pos) ) { // true if object has been newly deleted
|
|
1019
|
+
++num_deleted;
|
|
1020
|
+
settings.set_consider_shrink(true); // will think about shrink after next insert
|
|
1021
|
+
}
|
|
1022
|
+
}
|
|
920
1023
|
void erase(const_iterator f, const_iterator l) {
|
|
921
1024
|
for ( ; f != l; ++f) {
|
|
922
1025
|
if ( set_deleted(f) ) // should always be true
|
|
923
1026
|
++num_deleted;
|
|
924
1027
|
}
|
|
925
|
-
|
|
1028
|
+
settings.set_consider_shrink(true); // will think about shrink after next insert
|
|
926
1029
|
}
|
|
927
1030
|
|
|
928
1031
|
|
|
@@ -962,12 +1065,12 @@ class dense_hashtable {
|
|
|
962
1065
|
|
|
963
1066
|
bool read_metadata(FILE *fp) {
|
|
964
1067
|
num_deleted = 0; // since we got rid before writing
|
|
965
|
-
assert(use_empty)
|
|
966
|
-
if (table)
|
|
1068
|
+
assert(settings.use_empty() && "empty_key not set for read_metadata");
|
|
1069
|
+
if (table) allocator.deallocate(table, num_buckets); // we'll make our own
|
|
967
1070
|
// TODO: read magic number
|
|
968
1071
|
// TODO: read num_buckets
|
|
969
|
-
reset_thresholds();
|
|
970
|
-
table = (
|
|
1072
|
+
settings.reset_thresholds(bucket_count());
|
|
1073
|
+
table = allocator.allocate(num_buckets);
|
|
971
1074
|
assert(table);
|
|
972
1075
|
fill_range_with_empty(table, table + num_buckets);
|
|
973
1076
|
// TODO: read num_elements
|
|
@@ -1001,35 +1104,114 @@ class dense_hashtable {
|
|
|
1001
1104
|
}
|
|
1002
1105
|
|
|
1003
1106
|
private:
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1107
|
+
template <class A>
|
|
1108
|
+
class alloc_impl : public A {
|
|
1109
|
+
public:
|
|
1110
|
+
typedef typename A::pointer pointer;
|
|
1111
|
+
typedef typename A::size_type size_type;
|
|
1112
|
+
|
|
1113
|
+
// Convert a normal allocator to one that has realloc_or_die()
|
|
1114
|
+
alloc_impl(const A& a) : A(a) { }
|
|
1115
|
+
|
|
1116
|
+
// realloc_or_die should only be used when using the default
|
|
1117
|
+
// allocator (libc_allocator_with_realloc).
|
|
1118
|
+
pointer realloc_or_die(pointer ptr, size_type n) {
|
|
1119
|
+
fprintf(stderr, "realloc_or_die is only supported for "
|
|
1120
|
+
"libc_allocator_with_realloc");
|
|
1121
|
+
exit(1);
|
|
1122
|
+
return NULL;
|
|
1123
|
+
}
|
|
1124
|
+
};
|
|
1125
|
+
|
|
1126
|
+
// A template specialization of alloc_impl for
|
|
1127
|
+
// libc_allocator_with_realloc that can handle realloc_or_die.
|
|
1128
|
+
template <class A>
|
|
1129
|
+
class alloc_impl<libc_allocator_with_realloc<A> >
|
|
1130
|
+
: public libc_allocator_with_realloc<A> {
|
|
1131
|
+
public:
|
|
1132
|
+
typedef typename libc_allocator_with_realloc<A>::pointer pointer;
|
|
1133
|
+
typedef typename libc_allocator_with_realloc<A>::size_type size_type;
|
|
1134
|
+
|
|
1135
|
+
alloc_impl(const libc_allocator_with_realloc<A>& a)
|
|
1136
|
+
: libc_allocator_with_realloc<A>(a) { }
|
|
1137
|
+
|
|
1138
|
+
pointer realloc_or_die(pointer ptr, size_type n) {
|
|
1139
|
+
pointer retval = this->reallocate(ptr, n);
|
|
1140
|
+
if (retval == NULL) {
|
|
1141
|
+
// We really should use PRIuS here, but I don't want to have to add
|
|
1142
|
+
// a whole new configure option, with concomitant macro namespace
|
|
1143
|
+
// pollution, just to print this (unlikely) error message. So I cast.
|
|
1144
|
+
fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
|
|
1145
|
+
"%lu elements for ptr %p",
|
|
1146
|
+
static_cast<unsigned long>(n), ptr);
|
|
1147
|
+
exit(1);
|
|
1148
|
+
}
|
|
1149
|
+
return retval;
|
|
1150
|
+
}
|
|
1151
|
+
};
|
|
1152
|
+
|
|
1153
|
+
// Package functors with another class to eliminate memory needed for
|
|
1154
|
+
// zero-size functors. Since ExtractKey and hasher's operator() might
|
|
1155
|
+
// have the same function signature, they must be packaged in
|
|
1156
|
+
// different classes.
|
|
1157
|
+
struct Settings :
|
|
1158
|
+
sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
|
|
1159
|
+
explicit Settings(const hasher& hf)
|
|
1160
|
+
: sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
|
|
1161
|
+
hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
|
|
1162
|
+
};
|
|
1163
|
+
|
|
1164
|
+
// Packages ExtractKey and SetKey functors.
|
|
1165
|
+
class KeyInfo : public ExtractKey, public SetKey, public key_equal {
|
|
1166
|
+
public:
|
|
1167
|
+
KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
|
|
1168
|
+
: ExtractKey(ek),
|
|
1169
|
+
SetKey(sk),
|
|
1170
|
+
key_equal(eq) {
|
|
1171
|
+
}
|
|
1172
|
+
const key_type get_key(const_reference v) const {
|
|
1173
|
+
return ExtractKey::operator()(v);
|
|
1174
|
+
}
|
|
1175
|
+
void set_key(pointer v, const key_type& k) const {
|
|
1176
|
+
SetKey::operator()(v, k);
|
|
1177
|
+
}
|
|
1178
|
+
bool equals(const key_type& a, const key_type& b) const {
|
|
1179
|
+
return key_equal::operator()(a, b);
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
// Which key marks deleted entries.
|
|
1183
|
+
// TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
|
|
1184
|
+
typename remove_const<key_type>::type delkey;
|
|
1185
|
+
};
|
|
1023
1186
|
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1187
|
+
// Utility functions to access the templated operators
|
|
1188
|
+
size_type hash(const key_type& v) const {
|
|
1189
|
+
return settings.hash(v);
|
|
1190
|
+
}
|
|
1191
|
+
bool equals(const key_type& a, const key_type& b) const {
|
|
1192
|
+
return key_info.equals(a, b);
|
|
1193
|
+
}
|
|
1194
|
+
const key_type get_key(const_reference v) const {
|
|
1195
|
+
return key_info.get_key(v);
|
|
1030
1196
|
}
|
|
1197
|
+
void set_key(pointer v, const key_type& k) const {
|
|
1198
|
+
key_info.set_key(v, k);
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
private:
|
|
1202
|
+
// Actual data
|
|
1203
|
+
Settings settings;
|
|
1204
|
+
KeyInfo key_info;
|
|
1205
|
+
alloc_impl<value_alloc_type> allocator;
|
|
1206
|
+
|
|
1207
|
+
size_type num_deleted; // how many occupied buckets are marked deleted
|
|
1208
|
+
size_type num_elements;
|
|
1209
|
+
size_type num_buckets;
|
|
1210
|
+
value_type emptyval; // which key marks unused entries
|
|
1211
|
+
pointer table;
|
|
1031
1212
|
};
|
|
1032
1213
|
|
|
1214
|
+
|
|
1033
1215
|
// We need a global swap as well
|
|
1034
1216
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
1035
1217
|
inline void swap(dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
|
|
@@ -1041,7 +1223,7 @@ inline void swap(dense_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
|
|
|
1041
1223
|
|
|
1042
1224
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
1043
1225
|
const typename dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
|
|
1044
|
-
dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
|
|
1226
|
+
dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
|
|
1045
1227
|
|
|
1046
1228
|
// How full we let the table get before we resize. Knuth says .8 is
|
|
1047
1229
|
// good -- higher causes us to probe too much, though saves memory.
|
|
@@ -1049,13 +1231,14 @@ dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
|
|
|
1049
1231
|
// more space (a trade-off densehashtable explicitly chooses to make).
|
|
1050
1232
|
// Feel free to play around with different values, though.
|
|
1051
1233
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
1052
|
-
const
|
|
1234
|
+
const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
|
|
1053
1235
|
|
|
1054
1236
|
// How empty we let the table get before we resize lower.
|
|
1055
|
-
// It should be less than
|
|
1237
|
+
// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
|
|
1056
1238
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
1057
|
-
const
|
|
1058
|
-
|
|
1239
|
+
const int dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
|
|
1240
|
+
= static_cast<int>(0.4 *
|
|
1241
|
+
dense_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
|
|
1059
1242
|
|
|
1060
1243
|
_END_GOOGLE_NAMESPACE_
|
|
1061
1244
|
|