google_hash 0.6.2 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +61 -27
- data/Rakefile +4 -1
- data/TODO +5 -0
- data/VERSION +1 -1
- data/changelog +3 -0
- data/ext/extconf.rb +10 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/AUTHORS +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/COPYING +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/ChangeLog +47 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/INSTALL +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/Makefile.am +29 -14
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/Makefile.in +77 -42
- data/ext/sparsehash-1.8.1/NEWS +71 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/README +0 -0
- data/ext/{sparsehash-1.5.2/README.windows → sparsehash-1.8.1/README_windows.txt} +25 -25
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/TODO +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/aclocal.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/compile +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/config.guess +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/config.sub +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/configure +3690 -4560
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/configure.ac +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/depcomp +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/dense_hash_map.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/dense_hash_set.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/designstyle.css +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/implementation.html +11 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/index.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/performance.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparse_hash_map.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparse_hash_set.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparsetable.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/Makefile +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/README +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/example.c +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/libchash.c +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/libchash.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/google-sparsehash.sln +17 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/install-sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/acx_pthread.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/google_namespace.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/namespaces.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_hash.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_hash_fun.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_namespace.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/missing +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/mkinstalldirs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb.sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/README +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/changelog +24 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/compat +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/control +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/copyright +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/docs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/rules +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/sparsehash.dirs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/sparsehash.install +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/rpm.sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/rpm/rpm.spec +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/config.h.in +3 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/config.h.include +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/dense_hash_map +43 -27
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/dense_hash_set +40 -19
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparse_hash_map +32 -23
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparse_hash_set +31 -21
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsehash/densehashtable.h +481 -298
- data/ext/sparsehash-1.8.1/src/google/sparsehash/hashtable-common.h +178 -0
- data/ext/sparsehash-1.8.1/src/google/sparsehash/libc_allocator_with_realloc.h +121 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsehash/sparsehashtable.h +404 -233
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsetable +173 -83
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/type_traits.h +3 -29
- data/ext/sparsehash-1.8.1/src/hash_test_interface.h +1011 -0
- data/ext/sparsehash-1.8.1/src/hashtable_test.cc +1733 -0
- data/ext/sparsehash-1.8.1/src/libc_allocator_with_realloc_test.cc +129 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/simple_test.cc +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/sparsetable_unittest.cc +202 -6
- data/ext/sparsehash-1.8.1/src/testutil.h +251 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/time_hash_map.cc +128 -54
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/type_traits_unittest.cc +30 -20
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/config.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/google/sparsehash/sparseconfig.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/port.cc +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/port.h +0 -0
- data/ext/sparsehash-1.8.1/vsprojects/hashtable_test/hashtable_test.vcproj +197 -0
- data/ext/{sparsehash-1.5.2/vsprojects/hashtable_unittest/hashtable_unittest.vcproj → sparsehash-1.8.1/vsprojects/simple_test/simple_test.vcproj} +9 -8
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj +0 -2
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/time_hash_map/time_hash_map.vcproj +3 -2
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/type_traits_unittest/type_traits_unittest.vcproj +0 -2
- data/ext/template/google_hash.cpp.erb +2 -1
- data/ext/template/main.cpp.erb +1 -1
- data/results.txt +6 -22
- data/spec/benchmark.rb +57 -0
- data/spec/spec.google_hash.rb +1 -8
- metadata +140 -130
- data/ext/benchmark.rb +0 -47
- data/ext/sparsehash-1.5.2/NEWS +0 -0
- data/ext/sparsehash-1.5.2/src/hashtable_unittest.cc +0 -1375
- data/ext/sparsehash-1.5.2/src/words +0 -8944
- data/types.txt +0 -18
@@ -0,0 +1,178 @@
|
|
1
|
+
// Copyright (c) 2005, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
// ---
|
31
|
+
// Author: Giao Nguyen
|
32
|
+
|
33
|
+
#ifndef UTIL_GTL_HASHTABLE_COMMON_H_
|
34
|
+
#define UTIL_GTL_HASHTABLE_COMMON_H_
|
35
|
+
|
36
|
+
#include <assert.h>
|
37
|
+
|
38
|
+
// Settings contains parameters for growing and shrinking the table.
|
39
|
+
// It also packages zero-size functor (ie. hasher).
|
40
|
+
|
41
|
+
template<typename Key, typename HashFunc,
|
42
|
+
typename SizeType, int HT_MIN_BUCKETS>
|
43
|
+
class sh_hashtable_settings : public HashFunc {
|
44
|
+
public:
|
45
|
+
typedef Key key_type;
|
46
|
+
typedef HashFunc hasher;
|
47
|
+
typedef SizeType size_type;
|
48
|
+
|
49
|
+
public:
|
50
|
+
sh_hashtable_settings(const hasher& hf,
|
51
|
+
const float ht_occupancy_flt,
|
52
|
+
const float ht_empty_flt)
|
53
|
+
: hasher(hf),
|
54
|
+
enlarge_threshold_(0),
|
55
|
+
shrink_threshold_(0),
|
56
|
+
consider_shrink_(false),
|
57
|
+
use_empty_(false),
|
58
|
+
use_deleted_(false),
|
59
|
+
num_ht_copies_(0) {
|
60
|
+
set_enlarge_factor(ht_occupancy_flt);
|
61
|
+
set_shrink_factor(ht_empty_flt);
|
62
|
+
}
|
63
|
+
|
64
|
+
size_type hash(const key_type& v) const {
|
65
|
+
return hasher::operator()(v);
|
66
|
+
}
|
67
|
+
|
68
|
+
float enlarge_factor() const {
|
69
|
+
return enlarge_factor_;
|
70
|
+
}
|
71
|
+
void set_enlarge_factor(float f) {
|
72
|
+
enlarge_factor_ = f;
|
73
|
+
}
|
74
|
+
float shrink_factor() const {
|
75
|
+
return shrink_factor_;
|
76
|
+
}
|
77
|
+
void set_shrink_factor(float f) {
|
78
|
+
shrink_factor_ = f;
|
79
|
+
}
|
80
|
+
|
81
|
+
size_type enlarge_threshold() const {
|
82
|
+
return enlarge_threshold_;
|
83
|
+
}
|
84
|
+
void set_enlarge_threshold(size_type t) {
|
85
|
+
enlarge_threshold_ = t;
|
86
|
+
}
|
87
|
+
size_type shrink_threshold() const {
|
88
|
+
return shrink_threshold_;
|
89
|
+
}
|
90
|
+
void set_shrink_threshold(size_type t) {
|
91
|
+
shrink_threshold_ = t;
|
92
|
+
}
|
93
|
+
|
94
|
+
size_type enlarge_size(size_type x) const {
|
95
|
+
return static_cast<size_type>(x * enlarge_factor_);
|
96
|
+
}
|
97
|
+
size_type shrink_size(size_type x) const {
|
98
|
+
return static_cast<size_type>(x * shrink_factor_);
|
99
|
+
}
|
100
|
+
|
101
|
+
bool consider_shrink() const {
|
102
|
+
return consider_shrink_;
|
103
|
+
}
|
104
|
+
void set_consider_shrink(bool t) {
|
105
|
+
consider_shrink_ = t;
|
106
|
+
}
|
107
|
+
|
108
|
+
bool use_empty() const {
|
109
|
+
return use_empty_;
|
110
|
+
}
|
111
|
+
void set_use_empty(bool t) {
|
112
|
+
use_empty_ = t;
|
113
|
+
}
|
114
|
+
|
115
|
+
bool use_deleted() const {
|
116
|
+
return use_deleted_;
|
117
|
+
}
|
118
|
+
void set_use_deleted(bool t) {
|
119
|
+
use_deleted_ = t;
|
120
|
+
}
|
121
|
+
|
122
|
+
size_type num_ht_copies() const {
|
123
|
+
return static_cast<size_type>(num_ht_copies_);
|
124
|
+
}
|
125
|
+
void inc_num_ht_copies() {
|
126
|
+
++num_ht_copies_;
|
127
|
+
}
|
128
|
+
|
129
|
+
// Reset the enlarge and shrink thresholds
|
130
|
+
void reset_thresholds(int num_buckets) {
|
131
|
+
set_enlarge_threshold(enlarge_size(num_buckets));
|
132
|
+
set_shrink_threshold(shrink_size(num_buckets));
|
133
|
+
// whatever caused us to reset already considered
|
134
|
+
set_consider_shrink(false);
|
135
|
+
}
|
136
|
+
|
137
|
+
// Caller is resposible for calling reset_threshold right after
|
138
|
+
// set_resizing_parameters.
|
139
|
+
void set_resizing_parameters(float shrink, float grow) {
|
140
|
+
assert(shrink >= 0.0);
|
141
|
+
assert(grow <= 1.0);
|
142
|
+
if (shrink > grow/2.0f)
|
143
|
+
shrink = grow / 2.0f; // otherwise we thrash hashtable size
|
144
|
+
set_shrink_factor(shrink);
|
145
|
+
set_enlarge_factor(grow);
|
146
|
+
}
|
147
|
+
|
148
|
+
// This is the smallest size a hashtable can be without being too crowded
|
149
|
+
// If you like, you can give a min #buckets as well as a min #elts
|
150
|
+
size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) {
|
151
|
+
float enlarge = enlarge_factor();
|
152
|
+
size_type sz = HT_MIN_BUCKETS; // min buckets allowed
|
153
|
+
while ( sz < min_buckets_wanted ||
|
154
|
+
num_elts >= static_cast<size_type>(sz * enlarge) ) {
|
155
|
+
// This just prevents overflowing size_type, since sz can exceed
|
156
|
+
// max_size() here.
|
157
|
+
if (static_cast<size_type>(sz * 2) < sz) {
|
158
|
+
throw std::length_error("resize overflow"); // protect against overflow
|
159
|
+
}
|
160
|
+
sz *= 2;
|
161
|
+
}
|
162
|
+
return sz;
|
163
|
+
}
|
164
|
+
|
165
|
+
private:
|
166
|
+
size_type enlarge_threshold_; // table.size() * enlarge_factor
|
167
|
+
size_type shrink_threshold_; // table.size() * shrink_factor
|
168
|
+
float enlarge_factor_; // how full before resize
|
169
|
+
float shrink_factor_; // how empty before resize
|
170
|
+
// consider_shrink=true if we should try to shrink before next insert
|
171
|
+
bool consider_shrink_;
|
172
|
+
bool use_empty_; // used only by densehashtable, not sparsehashtable
|
173
|
+
bool use_deleted_; // false until delkey has been set
|
174
|
+
// num_ht_copies is a counter incremented every Copy/Move
|
175
|
+
unsigned int num_ht_copies_;
|
176
|
+
};
|
177
|
+
|
178
|
+
#endif // UTIL_GTL_HASHTABLE_COMMON_H_
|
@@ -0,0 +1,121 @@
|
|
1
|
+
// Copyright (c) 2010, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
// ---
|
31
|
+
// Author: Guilin Chen
|
32
|
+
|
33
|
+
#ifndef UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
|
34
|
+
#define UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
|
35
|
+
|
36
|
+
#include <google/sparsehash/sparseconfig.h>
|
37
|
+
|
38
|
+
#include <stdlib.h> // for malloc/realloc/free
|
39
|
+
#include <stddef.h> // for ptrdiff_t
|
40
|
+
|
41
|
+
|
42
|
+
_START_GOOGLE_NAMESPACE_
|
43
|
+
|
44
|
+
template<class T>
|
45
|
+
class libc_allocator_with_realloc {
|
46
|
+
public:
|
47
|
+
typedef T value_type;
|
48
|
+
typedef size_t size_type;
|
49
|
+
typedef ptrdiff_t difference_type;
|
50
|
+
|
51
|
+
typedef T* pointer;
|
52
|
+
typedef const T* const_pointer;
|
53
|
+
typedef T& reference;
|
54
|
+
typedef const T& const_reference;
|
55
|
+
|
56
|
+
libc_allocator_with_realloc() {}
|
57
|
+
libc_allocator_with_realloc(const libc_allocator_with_realloc&) {}
|
58
|
+
~libc_allocator_with_realloc() {}
|
59
|
+
|
60
|
+
pointer address(reference r) const { return &r; }
|
61
|
+
const_pointer address(const_reference r) const { return &r; }
|
62
|
+
|
63
|
+
pointer allocate(size_type n, const_pointer = 0) {
|
64
|
+
return static_cast<pointer>(malloc(n * sizeof(value_type)));
|
65
|
+
}
|
66
|
+
void deallocate(pointer p, size_type) {
|
67
|
+
free(p);
|
68
|
+
}
|
69
|
+
pointer reallocate(pointer p, size_type n) {
|
70
|
+
return static_cast<pointer>(realloc(p, n * sizeof(value_type)));
|
71
|
+
}
|
72
|
+
|
73
|
+
size_type max_size() const {
|
74
|
+
return static_cast<size_type>(-1) / sizeof(value_type);
|
75
|
+
}
|
76
|
+
|
77
|
+
void construct(pointer p, const value_type& val) {
|
78
|
+
new(p) value_type(val);
|
79
|
+
}
|
80
|
+
void destroy(pointer p) { p->~value_type(); }
|
81
|
+
|
82
|
+
template <class U>
|
83
|
+
libc_allocator_with_realloc(const libc_allocator_with_realloc<U>&) {}
|
84
|
+
|
85
|
+
template<class U>
|
86
|
+
struct rebind {
|
87
|
+
typedef libc_allocator_with_realloc<U> other;
|
88
|
+
};
|
89
|
+
};
|
90
|
+
|
91
|
+
// libc_allocator_with_realloc<void> specialization.
|
92
|
+
template<>
|
93
|
+
class libc_allocator_with_realloc<void> {
|
94
|
+
public:
|
95
|
+
typedef void value_type;
|
96
|
+
typedef size_t size_type;
|
97
|
+
typedef ptrdiff_t difference_type;
|
98
|
+
typedef void* pointer;
|
99
|
+
typedef const void* const_pointer;
|
100
|
+
|
101
|
+
template<class U>
|
102
|
+
struct rebind {
|
103
|
+
typedef libc_allocator_with_realloc<U> other;
|
104
|
+
};
|
105
|
+
};
|
106
|
+
|
107
|
+
template<class T>
|
108
|
+
inline bool operator==(const libc_allocator_with_realloc<T>&,
|
109
|
+
const libc_allocator_with_realloc<T>&) {
|
110
|
+
return true;
|
111
|
+
}
|
112
|
+
|
113
|
+
template<class T>
|
114
|
+
inline bool operator!=(const libc_allocator_with_realloc<T>&,
|
115
|
+
const libc_allocator_with_realloc<T>&) {
|
116
|
+
return false;
|
117
|
+
}
|
118
|
+
|
119
|
+
_END_GOOGLE_NAMESPACE_
|
120
|
+
|
121
|
+
#endif // UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
|
@@ -59,20 +59,20 @@
|
|
59
59
|
// <google/sparse_hash_table> or <google/sparse_hash_set> instead.
|
60
60
|
//
|
61
61
|
// You can modify the following, below:
|
62
|
-
//
|
63
|
-
//
|
62
|
+
// HT_OCCUPANCY_PCT -- how full before we double size
|
63
|
+
// HT_EMPTY_PCT -- how empty before we halve size
|
64
64
|
// HT_MIN_BUCKETS -- smallest bucket size
|
65
65
|
// HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time
|
66
66
|
//
|
67
|
-
// You can also change
|
68
|
-
//
|
69
|
-
//
|
67
|
+
// You can also change enlarge_factor (which defaults to
|
68
|
+
// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
|
69
|
+
// HT_EMPTY_PCT) with set_resizing_parameters().
|
70
70
|
//
|
71
71
|
// How to decide what values to use?
|
72
|
-
//
|
72
|
+
// shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
|
73
73
|
// HT_MIN_BUCKETS is probably unnecessary since you can specify
|
74
74
|
// (indirectly) the starting number of buckets at construct-time.
|
75
|
-
// For
|
75
|
+
// For enlarge_factor, you can use this chart to try to trade-off
|
76
76
|
// expected lookup time to the space taken up. By default, this
|
77
77
|
// code uses quadratic probing, though you can change it to linear
|
78
78
|
// via _JUMP below if you really want to.
|
@@ -82,7 +82,7 @@
|
|
82
82
|
// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
|
83
83
|
// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
|
84
84
|
//
|
85
|
-
// --
|
85
|
+
// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
|
86
86
|
// QUADRATIC COLLISION RES.
|
87
87
|
// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
|
88
88
|
// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
|
@@ -103,21 +103,29 @@
|
|
103
103
|
// The probing method
|
104
104
|
// Linear probing
|
105
105
|
// #define JUMP_(key, num_probes) ( 1 )
|
106
|
-
// Quadratic
|
106
|
+
// Quadratic probing
|
107
107
|
#define JUMP_(key, num_probes) ( num_probes )
|
108
108
|
|
109
|
-
|
110
109
|
#include <google/sparsehash/sparseconfig.h>
|
111
110
|
#include <assert.h>
|
112
111
|
#include <algorithm> // For swap(), eg
|
112
|
+
#include <stdexcept> // For length_error
|
113
113
|
#include <iterator> // for facts about iterator tags
|
114
|
+
#include <limits> // for numeric_limits<>
|
114
115
|
#include <utility> // for pair<>
|
116
|
+
#include <google/sparsehash/hashtable-common.h>
|
115
117
|
#include <google/sparsetable> // Since that's basically what we are
|
116
118
|
|
117
119
|
_START_GOOGLE_NAMESPACE_
|
118
120
|
|
119
121
|
using STL_NAMESPACE::pair;
|
120
122
|
|
123
|
+
// The smaller this is, the faster lookup is (because the group bitmap is
|
124
|
+
// smaller) and the faster insert is, because there's less to move.
|
125
|
+
// On the other hand, there are more groups. Since group::size_type is
|
126
|
+
// a short, this number should be of the form 32*x + 16 to avoid waste.
|
127
|
+
static const u_int16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words
|
128
|
+
|
121
129
|
// Hashtable class, used to implement the hashed associative containers
|
122
130
|
// hash_set and hash_map.
|
123
131
|
//
|
@@ -131,7 +139,7 @@ using STL_NAMESPACE::pair;
|
|
131
139
|
// with key == deleted_key.
|
132
140
|
// EqualKey: Given two Keys, says whether they are the same (that is,
|
133
141
|
// if they are both associated with the same Value).
|
134
|
-
// Alloc: STL allocator to use to allocate memory.
|
142
|
+
// Alloc: STL allocator to use to allocate memory.
|
135
143
|
|
136
144
|
template <class Value, class Key, class HashFcn,
|
137
145
|
class ExtractKey, class SetKey, class EqualKey, class Alloc>
|
@@ -147,17 +155,21 @@ struct sparse_hashtable_const_iterator;
|
|
147
155
|
// that skips over deleted elements.
|
148
156
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
149
157
|
struct sparse_hashtable_iterator {
|
158
|
+
private:
|
159
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
160
|
+
|
150
161
|
public:
|
151
162
|
typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
152
163
|
typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
|
153
|
-
typedef typename sparsetable<V>::nonempty_iterator
|
164
|
+
typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::nonempty_iterator
|
165
|
+
st_iterator;
|
154
166
|
|
155
167
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
156
168
|
typedef V value_type;
|
157
|
-
typedef
|
158
|
-
typedef
|
159
|
-
typedef
|
160
|
-
typedef
|
169
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
170
|
+
typedef typename value_alloc_type::size_type size_type;
|
171
|
+
typedef typename value_alloc_type::reference reference;
|
172
|
+
typedef typename value_alloc_type::pointer pointer;
|
161
173
|
|
162
174
|
// "Real" constructor and default constructor
|
163
175
|
sparse_hashtable_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
|
@@ -195,17 +207,21 @@ struct sparse_hashtable_iterator {
|
|
195
207
|
// Now do it all again, but with const-ness!
|
196
208
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
197
209
|
struct sparse_hashtable_const_iterator {
|
210
|
+
private:
|
211
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
212
|
+
|
198
213
|
public:
|
199
214
|
typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
200
215
|
typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
|
201
|
-
typedef typename sparsetable<V>::const_nonempty_iterator
|
216
|
+
typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::const_nonempty_iterator
|
217
|
+
st_iterator;
|
202
218
|
|
203
219
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
204
220
|
typedef V value_type;
|
205
|
-
typedef
|
206
|
-
typedef
|
207
|
-
typedef
|
208
|
-
typedef
|
221
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
222
|
+
typedef typename value_alloc_type::size_type size_type;
|
223
|
+
typedef typename value_alloc_type::const_reference reference;
|
224
|
+
typedef typename value_alloc_type::const_pointer pointer;
|
209
225
|
|
210
226
|
// "Real" constructor and default constructor
|
211
227
|
sparse_hashtable_const_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
|
@@ -246,16 +262,20 @@ struct sparse_hashtable_const_iterator {
|
|
246
262
|
// And once again, but this time freeing up memory as we iterate
|
247
263
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
248
264
|
struct sparse_hashtable_destructive_iterator {
|
265
|
+
private:
|
266
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
267
|
+
|
249
268
|
public:
|
250
269
|
typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
251
|
-
typedef typename sparsetable<V>::destructive_iterator
|
270
|
+
typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::destructive_iterator
|
271
|
+
st_iterator;
|
252
272
|
|
253
273
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
254
274
|
typedef V value_type;
|
255
|
-
typedef
|
256
|
-
typedef
|
257
|
-
typedef
|
258
|
-
typedef
|
275
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
276
|
+
typedef typename value_alloc_type::size_type size_type;
|
277
|
+
typedef typename value_alloc_type::reference reference;
|
278
|
+
typedef typename value_alloc_type::pointer pointer;
|
259
279
|
|
260
280
|
// "Real" constructor and default constructor
|
261
281
|
sparse_hashtable_destructive_iterator(const
|
@@ -295,18 +315,22 @@ struct sparse_hashtable_destructive_iterator {
|
|
295
315
|
template <class Value, class Key, class HashFcn,
|
296
316
|
class ExtractKey, class SetKey, class EqualKey, class Alloc>
|
297
317
|
class sparse_hashtable {
|
318
|
+
private:
|
319
|
+
typedef typename Alloc::template rebind<Value>::other value_alloc_type;
|
320
|
+
|
298
321
|
public:
|
299
322
|
typedef Key key_type;
|
300
323
|
typedef Value value_type;
|
301
324
|
typedef HashFcn hasher;
|
302
325
|
typedef EqualKey key_equal;
|
303
|
-
|
304
|
-
|
305
|
-
typedef
|
306
|
-
typedef
|
307
|
-
typedef
|
308
|
-
typedef
|
309
|
-
typedef
|
326
|
+
typedef Alloc allocator_type;
|
327
|
+
|
328
|
+
typedef typename value_alloc_type::size_type size_type;
|
329
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
330
|
+
typedef typename value_alloc_type::reference reference;
|
331
|
+
typedef typename value_alloc_type::const_reference const_reference;
|
332
|
+
typedef typename value_alloc_type::pointer pointer;
|
333
|
+
typedef typename value_alloc_type::const_pointer const_pointer;
|
310
334
|
typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey,
|
311
335
|
SetKey, EqualKey, Alloc>
|
312
336
|
iterator;
|
@@ -326,22 +350,23 @@ class sparse_hashtable {
|
|
326
350
|
// How full we let the table get before we resize, by default.
|
327
351
|
// Knuth says .8 is good -- higher causes us to probe too much,
|
328
352
|
// though it saves memory.
|
329
|
-
static const
|
353
|
+
static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
|
330
354
|
|
331
355
|
// How empty we let the table get before we resize lower, by default.
|
332
|
-
//
|
333
|
-
|
356
|
+
// (0.0 means never resize lower.)
|
357
|
+
// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
|
358
|
+
static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
|
334
359
|
|
335
360
|
// Minimum size we're willing to let hashtables be.
|
336
361
|
// Must be a power of two, and at least 4.
|
337
|
-
// Note, however, that for a given hashtable, the
|
338
|
-
//
|
339
|
-
static const
|
362
|
+
// Note, however, that for a given hashtable, the initial size is a
|
363
|
+
// function of the first constructor arg, and may be >HT_MIN_BUCKETS.
|
364
|
+
static const size_type HT_MIN_BUCKETS = 4;
|
340
365
|
|
341
366
|
// By default, if you don't specify a hashtable size at
|
342
367
|
// construction-time, we use this size. Must be a power of two, and
|
343
368
|
// at least HT_MIN_BUCKETS.
|
344
|
-
static const
|
369
|
+
static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
|
345
370
|
|
346
371
|
// ITERATOR FUNCTIONS
|
347
372
|
iterator begin() { return iterator(this, table.nonempty_begin(),
|
@@ -399,8 +424,12 @@ class sparse_hashtable {
|
|
399
424
|
|
400
425
|
|
401
426
|
// ACCESSOR FUNCTIONS for the things we templatize on, basically
|
402
|
-
hasher hash_funct() const
|
403
|
-
key_equal key_eq() const
|
427
|
+
hasher hash_funct() const { return settings; }
|
428
|
+
key_equal key_eq() const { return key_info; }
|
429
|
+
allocator_type get_allocator() const { return table.get_allocator(); }
|
430
|
+
|
431
|
+
// Accessor function for statistics gathering.
|
432
|
+
int num_table_copies() const { return settings.num_ht_copies(); }
|
404
433
|
|
405
434
|
private:
|
406
435
|
// We need to copy values when we set the special marker for deleted
|
@@ -408,7 +437,7 @@ class sparse_hashtable {
|
|
408
437
|
// operator because value_type might not be assignable (it's often
|
409
438
|
// pair<const X, Y>). We use explicit destructor invocation and
|
410
439
|
// placement new to get around this. Arg.
|
411
|
-
void set_value(
|
440
|
+
void set_value(pointer dst, const_reference src) {
|
412
441
|
dst->~value_type(); // delete the old value, if any
|
413
442
|
new(dst) value_type(src);
|
414
443
|
}
|
@@ -419,7 +448,6 @@ class sparse_hashtable {
|
|
419
448
|
// can't do a destructive copy, we make the typename private.
|
420
449
|
enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
|
421
450
|
|
422
|
-
|
423
451
|
// DELETE HELPER FUNCTIONS
|
424
452
|
// This lets the user describe a key that will indicate deleted
|
425
453
|
// table entries. This key should be an "impossible" entry --
|
@@ -435,59 +463,88 @@ class sparse_hashtable {
|
|
435
463
|
assert(num_deleted == 0);
|
436
464
|
}
|
437
465
|
|
466
|
+
bool test_deleted_key(const key_type& key) const {
|
467
|
+
// The num_deleted test is crucial for read(): after read(), the ht values
|
468
|
+
// are garbage, and we don't want to think some of them are deleted.
|
469
|
+
// Invariant: !use_deleted implies num_deleted is 0.
|
470
|
+
assert(settings.use_deleted() || num_deleted == 0);
|
471
|
+
return num_deleted > 0 && equals(key_info.delkey, key);
|
472
|
+
}
|
473
|
+
|
438
474
|
public:
|
439
475
|
void set_deleted_key(const key_type &key) {
|
440
476
|
// It's only safe to change what "deleted" means if we purge deleted guys
|
441
477
|
squash_deleted();
|
442
|
-
|
443
|
-
delkey = key;
|
478
|
+
settings.set_use_deleted(true);
|
479
|
+
key_info.delkey = key;
|
444
480
|
}
|
445
481
|
void clear_deleted_key() {
|
446
482
|
squash_deleted();
|
447
|
-
|
483
|
+
settings.set_use_deleted(false);
|
484
|
+
}
|
485
|
+
key_type deleted_key() const {
|
486
|
+
assert(settings.use_deleted()
|
487
|
+
&& "Must set deleted key before calling deleted_key");
|
488
|
+
return key_info.delkey;
|
448
489
|
}
|
449
490
|
|
450
491
|
// These are public so the iterators can use them
|
451
492
|
// True if the item at position bucknum is "deleted" marker
|
452
493
|
bool test_deleted(size_type bucknum) const {
|
453
|
-
|
454
|
-
|
455
|
-
return (use_deleted && num_deleted > 0 && table.test(bucknum) &&
|
456
|
-
equals(delkey, get_key(table.unsafe_get(bucknum))));
|
494
|
+
if (num_deleted == 0 || !table.test(bucknum)) return false;
|
495
|
+
return test_deleted_key(get_key(table.unsafe_get(bucknum)));
|
457
496
|
}
|
458
497
|
bool test_deleted(const iterator &it) const {
|
459
|
-
|
460
|
-
|
498
|
+
if (!settings.use_deleted()) return false;
|
499
|
+
return test_deleted_key(get_key(*it));
|
461
500
|
}
|
462
501
|
bool test_deleted(const const_iterator &it) const {
|
463
|
-
|
464
|
-
|
502
|
+
if (!settings.use_deleted()) return false;
|
503
|
+
return test_deleted_key(get_key(*it));
|
465
504
|
}
|
466
505
|
bool test_deleted(const destructive_iterator &it) const {
|
467
|
-
|
468
|
-
|
506
|
+
if (!settings.use_deleted()) return false;
|
507
|
+
return test_deleted_key(get_key(*it));
|
508
|
+
}
|
509
|
+
|
510
|
+
private:
|
511
|
+
// Set it so test_deleted is true. true if object didn't used to be deleted.
|
512
|
+
// TODO(csilvers): make these private (also in densehashtable.h)
|
513
|
+
bool set_deleted(iterator &it) {
|
514
|
+
assert(settings.use_deleted());
|
515
|
+
bool retval = !test_deleted(it);
|
516
|
+
// &* converts from iterator to value-type.
|
517
|
+
set_key(&(*it), key_info.delkey);
|
518
|
+
return retval;
|
469
519
|
}
|
470
|
-
// Set it so test_deleted is
|
471
|
-
|
520
|
+
// Set it so test_deleted is false. true if object used to be deleted.
|
521
|
+
bool clear_deleted(iterator &it) {
|
522
|
+
assert(settings.use_deleted());
|
523
|
+
// Happens automatically when we assign something else in its place.
|
524
|
+
return test_deleted(it);
|
525
|
+
}
|
526
|
+
|
527
|
+
// We also allow to set/clear the deleted bit on a const iterator.
|
528
|
+
// We allow a const_iterator for the same reason you can delete a
|
529
|
+
// const pointer: it's convenient, and semantically you can't use
|
530
|
+
// 'it' after it's been deleted anyway, so its const-ness doesn't
|
531
|
+
// really matter.
|
472
532
|
bool set_deleted(const_iterator &it) {
|
473
|
-
assert(use_deleted);
|
533
|
+
assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called
|
474
534
|
bool retval = !test_deleted(it);
|
475
|
-
|
476
|
-
set_key(const_cast<value_type*>(&(*it)), delkey);
|
535
|
+
set_key(const_cast<pointer>(&(*it)), key_info.delkey);
|
477
536
|
return retval;
|
478
537
|
}
|
479
|
-
// Set it so test_deleted is false. true if object used to be deleted
|
538
|
+
// Set it so test_deleted is false. true if object used to be deleted.
|
480
539
|
bool clear_deleted(const_iterator &it) {
|
481
|
-
assert(use_deleted);
|
482
|
-
// happens automatically when we assign something else in its place
|
540
|
+
assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called
|
483
541
|
return test_deleted(it);
|
484
542
|
}
|
485
543
|
|
486
|
-
|
487
544
|
// FUNCTIONS CONCERNING SIZE
|
545
|
+
public:
|
488
546
|
size_type size() const { return table.num_nonempty() - num_deleted; }
|
489
|
-
|
490
|
-
size_type max_size() const { return (size_type(-1) >> 1U) + 1; }
|
547
|
+
size_type max_size() const { return table.max_size(); }
|
491
548
|
bool empty() const { return size() == 0; }
|
492
549
|
size_type bucket_count() const { return table.size(); }
|
493
550
|
size_type max_bucket_count() const { return max_size(); }
|
@@ -497,54 +554,57 @@ class sparse_hashtable {
|
|
497
554
|
return begin(i) == end(i) ? 0 : 1;
|
498
555
|
}
|
499
556
|
|
500
|
-
|
501
557
|
private:
|
502
558
|
// Because of the above, size_type(-1) is never legal; use it for errors
|
503
559
|
static const size_type ILLEGAL_BUCKET = size_type(-1);
|
504
560
|
|
505
|
-
|
506
|
-
//
|
507
|
-
//
|
508
|
-
|
509
|
-
size_type sz = HT_MIN_BUCKETS;
|
510
|
-
while ( sz < min_buckets_wanted || num_elts >= sz * enlarge_resize_percent )
|
511
|
-
sz *= 2;
|
512
|
-
return sz;
|
513
|
-
}
|
514
|
-
|
515
|
-
// Used after a string of deletes
|
516
|
-
void maybe_shrink() {
|
561
|
+
// Used after a string of deletes. Returns true if we actually shrunk.
|
562
|
+
// TODO(csilvers): take a delta so we can take into account inserts
|
563
|
+
// done after shrinking. Maybe make part of the Settings class?
|
564
|
+
bool maybe_shrink() {
|
517
565
|
assert(table.num_nonempty() >= num_deleted);
|
518
566
|
assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
|
519
567
|
assert(bucket_count() >= HT_MIN_BUCKETS);
|
568
|
+
bool retval = false;
|
520
569
|
|
521
570
|
// If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
|
522
571
|
// we'll never shrink until you get relatively big, and we'll never
|
523
572
|
// shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
|
524
573
|
// like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
|
525
574
|
// shrink us down to HT_MIN_BUCKETS buckets, which is too small.
|
526
|
-
|
527
|
-
|
528
|
-
|
575
|
+
const size_type num_remain = table.num_nonempty() - num_deleted;
|
576
|
+
const size_type shrink_threshold = settings.shrink_threshold();
|
577
|
+
if (shrink_threshold > 0 && num_remain < shrink_threshold &&
|
578
|
+
bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
|
579
|
+
const float shrink_factor = settings.shrink_factor();
|
529
580
|
size_type sz = bucket_count() / 2; // find how much we should shrink
|
530
|
-
while (
|
531
|
-
|
532
|
-
shrink_resize_percent )
|
581
|
+
while (sz > HT_DEFAULT_STARTING_BUCKETS &&
|
582
|
+
num_remain < static_cast<size_type>(sz * shrink_factor)) {
|
533
583
|
sz /= 2; // stay a power of 2
|
584
|
+
}
|
534
585
|
sparse_hashtable tmp(MoveDontCopy, *this, sz);
|
535
586
|
swap(tmp); // now we are tmp
|
587
|
+
retval = true;
|
536
588
|
}
|
537
|
-
|
589
|
+
settings.set_consider_shrink(false); // because we just considered it
|
590
|
+
return retval;
|
538
591
|
}
|
539
592
|
|
540
593
|
// We'll let you resize a hashtable -- though this makes us copy all!
|
541
594
|
// When you resize, you say, "make it big enough for this many more elements"
|
542
|
-
|
543
|
-
|
544
|
-
|
595
|
+
// Returns true if we actually resized, false if size was already ok.
|
596
|
+
bool resize_delta(size_type delta) {
|
597
|
+
bool did_resize = false;
|
598
|
+
if ( settings.consider_shrink() ) { // see if lots of deletes happened
|
599
|
+
if ( maybe_shrink() )
|
600
|
+
did_resize = true;
|
601
|
+
}
|
602
|
+
if (table.num_nonempty() >=
|
603
|
+
(STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
|
604
|
+
throw std::length_error("resize overflow");
|
545
605
|
if ( bucket_count() >= HT_MIN_BUCKETS &&
|
546
|
-
(table.num_nonempty() + delta) <= enlarge_threshold )
|
547
|
-
return;
|
606
|
+
(table.num_nonempty() + delta) <= settings.enlarge_threshold() )
|
607
|
+
return did_resize; // we're ok as we are
|
548
608
|
|
549
609
|
// Sometimes, we need to resize just to get rid of all the
|
550
610
|
// "deleted" buckets that are clogging up the hashtable. So when
|
@@ -552,13 +612,34 @@ class sparse_hashtable {
|
|
552
612
|
// are currently taking up room). But later, when we decide what
|
553
613
|
// size to resize to, *don't* count deleted buckets, since they
|
554
614
|
// get discarded during the resize.
|
555
|
-
const size_type needed_size =
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
615
|
+
const size_type needed_size =
|
616
|
+
settings.min_buckets(table.num_nonempty() + delta, 0);
|
617
|
+
if ( needed_size <= bucket_count() ) // we have enough buckets
|
618
|
+
return did_resize;
|
619
|
+
|
620
|
+
size_type resize_to =
|
621
|
+
settings.min_buckets(table.num_nonempty() - num_deleted + delta,
|
622
|
+
bucket_count());
|
623
|
+
if (resize_to < needed_size && // may double resize_to
|
624
|
+
resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
|
625
|
+
// This situation means that we have enough deleted elements,
|
626
|
+
// that once we purge them, we won't actually have needed to
|
627
|
+
// grow. But we may want to grow anyway: if we just purge one
|
628
|
+
// element, say, we'll have to grow anyway next time we
|
629
|
+
// insert. Might as well grow now, since we're already going
|
630
|
+
// through the trouble of copying (in order to purge the
|
631
|
+
// deleted elements).
|
632
|
+
const size_type target =
|
633
|
+
static_cast<size_type>(settings.shrink_size(resize_to*2));
|
634
|
+
if (table.num_nonempty() - num_deleted + delta >= target) {
|
635
|
+
// Good, we won't be below the shrink threshhold even if we double.
|
636
|
+
resize_to *= 2;
|
637
|
+
}
|
561
638
|
}
|
639
|
+
|
640
|
+
sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
|
641
|
+
swap(tmp); // now we are tmp
|
642
|
+
return true;
|
562
643
|
}
|
563
644
|
|
564
645
|
// Used to actually do the rehashing when we grow/shrink a hashtable
|
@@ -566,16 +647,17 @@ class sparse_hashtable {
|
|
566
647
|
clear(); // clear table, set num_deleted to 0
|
567
648
|
|
568
649
|
// If we need to change the size of our table, do it now
|
569
|
-
const size_type resize_to =
|
650
|
+
const size_type resize_to =
|
651
|
+
settings.min_buckets(ht.size(), min_buckets_wanted);
|
570
652
|
if ( resize_to > bucket_count() ) { // we don't have enough buckets
|
571
653
|
table.resize(resize_to); // sets the number of buckets
|
572
|
-
reset_thresholds();
|
654
|
+
settings.reset_thresholds(bucket_count());
|
573
655
|
}
|
574
656
|
|
575
657
|
// We use a normal iterator to get non-deleted bcks from ht
|
576
658
|
// We could use insert() here, but since we know there are
|
577
659
|
// no duplicates and no deleted items, we can be more efficient
|
578
|
-
assert(
|
660
|
+
assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
|
579
661
|
for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
|
580
662
|
size_type num_probes = 0; // how many times we've probed
|
581
663
|
size_type bucknum;
|
@@ -584,10 +666,12 @@ class sparse_hashtable {
|
|
584
666
|
table.test(bucknum); // not empty
|
585
667
|
bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
|
586
668
|
++num_probes;
|
587
|
-
assert(num_probes < bucket_count()
|
669
|
+
assert(num_probes < bucket_count()
|
670
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
588
671
|
}
|
589
672
|
table.set(bucknum, *it); // copies the value to here
|
590
673
|
}
|
674
|
+
settings.inc_num_ht_copies();
|
591
675
|
}
|
592
676
|
|
593
677
|
// Implementation is like copy_from, but it destroys the table of the
|
@@ -598,14 +682,14 @@ class sparse_hashtable {
|
|
598
682
|
clear(); // clear table, set num_deleted to 0
|
599
683
|
|
600
684
|
// If we need to change the size of our table, do it now
|
601
|
-
|
685
|
+
size_type resize_to;
|
602
686
|
if ( mover == MoveDontGrow )
|
603
687
|
resize_to = ht.bucket_count(); // keep same size as old ht
|
604
688
|
else // MoveDontCopy
|
605
|
-
resize_to =
|
689
|
+
resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
|
606
690
|
if ( resize_to > bucket_count() ) { // we don't have enough buckets
|
607
691
|
table.resize(resize_to); // sets the number of buckets
|
608
|
-
reset_thresholds();
|
692
|
+
settings.reset_thresholds(bucket_count());
|
609
693
|
}
|
610
694
|
|
611
695
|
// We use a normal iterator to get non-deleted bcks from ht
|
@@ -621,10 +705,12 @@ class sparse_hashtable {
|
|
621
705
|
table.test(bucknum); // not empty
|
622
706
|
bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) {
|
623
707
|
++num_probes;
|
624
|
-
assert(num_probes < bucket_count()
|
708
|
+
assert(num_probes < bucket_count()
|
709
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
625
710
|
}
|
626
711
|
table.set(bucknum, *it); // copies the value to here
|
627
712
|
}
|
713
|
+
settings.inc_num_ht_copies();
|
628
714
|
}
|
629
715
|
|
630
716
|
|
@@ -634,28 +720,23 @@ class sparse_hashtable {
|
|
634
720
|
// more useful as num_elements. As a special feature, calling with
|
635
721
|
// req_elements==0 will cause us to shrink if we can, saving space.
|
636
722
|
void resize(size_type req_elements) { // resize to this or larger
|
637
|
-
if ( consider_shrink || req_elements == 0 )
|
723
|
+
if ( settings.consider_shrink() || req_elements == 0 )
|
638
724
|
maybe_shrink();
|
639
725
|
if ( req_elements > table.num_nonempty() ) // we only grow
|
640
726
|
resize_delta(req_elements - table.num_nonempty());
|
641
727
|
}
|
642
728
|
|
643
|
-
// Get and change the value of
|
644
|
-
//
|
645
|
-
//
|
646
|
-
//
|
729
|
+
// Get and change the value of shrink_factor and enlarge_factor. The
|
730
|
+
// description at the beginning of this file explains how to choose
|
731
|
+
// the values. Setting the shrink parameter to 0.0 ensures that the
|
732
|
+
// table never shrinks.
|
647
733
|
void get_resizing_parameters(float* shrink, float* grow) const {
|
648
|
-
*shrink =
|
649
|
-
*grow =
|
734
|
+
*shrink = settings.shrink_factor();
|
735
|
+
*grow = settings.enlarge_factor();
|
650
736
|
}
|
651
737
|
void set_resizing_parameters(float shrink, float grow) {
|
652
|
-
|
653
|
-
|
654
|
-
if (shrink > grow/2.0f)
|
655
|
-
shrink = grow / 2.0f; // otherwise we thrash hashtable size
|
656
|
-
shrink_resize_percent = shrink;
|
657
|
-
enlarge_resize_percent = grow;
|
658
|
-
reset_thresholds();
|
738
|
+
settings.set_resizing_parameters(shrink, grow);
|
739
|
+
settings.reset_thresholds(bucket_count());
|
659
740
|
}
|
660
741
|
|
661
742
|
// CONSTRUCTORS -- as required by the specs, we take a size,
|
@@ -665,15 +746,17 @@ class sparse_hashtable {
|
|
665
746
|
explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
|
666
747
|
const HashFcn& hf = HashFcn(),
|
667
748
|
const EqualKey& eql = EqualKey(),
|
749
|
+
const ExtractKey& ext = ExtractKey(),
|
668
750
|
const SetKey& set = SetKey(),
|
669
|
-
const
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
751
|
+
const Alloc& alloc = Alloc())
|
752
|
+
: settings(hf),
|
753
|
+
key_info(ext, set, eql),
|
754
|
+
num_deleted(0),
|
755
|
+
table((expected_max_items_in_table == 0
|
756
|
+
? HT_DEFAULT_STARTING_BUCKETS
|
757
|
+
: settings.min_buckets(expected_max_items_in_table, 0)),
|
758
|
+
alloc) {
|
759
|
+
settings.reset_thresholds(bucket_count());
|
677
760
|
}
|
678
761
|
|
679
762
|
// As a convenience for resize(), we allow an optional second argument
|
@@ -682,63 +765,51 @@ class sparse_hashtable {
|
|
682
765
|
// into us instead of copying.
|
683
766
|
sparse_hashtable(const sparse_hashtable& ht,
|
684
767
|
size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
table() {
|
691
|
-
reset_thresholds();
|
768
|
+
: settings(ht.settings),
|
769
|
+
key_info(ht.key_info),
|
770
|
+
num_deleted(0),
|
771
|
+
table(0, ht.get_allocator()) {
|
772
|
+
settings.reset_thresholds(bucket_count());
|
692
773
|
copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
|
693
774
|
}
|
694
775
|
sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,
|
695
776
|
size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
reset_thresholds();
|
777
|
+
: settings(ht.settings),
|
778
|
+
key_info(ht.key_info),
|
779
|
+
num_deleted(0),
|
780
|
+
table(0, ht.get_allocator()) {
|
781
|
+
settings.reset_thresholds(bucket_count());
|
702
782
|
move_from(mover, ht, min_buckets_wanted); // ignores deleted entries
|
703
783
|
}
|
704
784
|
|
705
785
|
sparse_hashtable& operator= (const sparse_hashtable& ht) {
|
706
786
|
if (&ht == this) return *this; // don't copy onto ourselves
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
delkey = ht.delkey;
|
714
|
-
copy_from(ht, HT_MIN_BUCKETS); // sets num_deleted to 0 too
|
787
|
+
settings = ht.settings;
|
788
|
+
key_info = ht.key_info;
|
789
|
+
num_deleted = ht.num_deleted;
|
790
|
+
// copy_from() calls clear and sets num_deleted to 0 too
|
791
|
+
copy_from(ht, HT_MIN_BUCKETS);
|
792
|
+
// we purposefully don't copy the allocator, which may not be copyable
|
715
793
|
return *this;
|
716
794
|
}
|
717
795
|
|
718
796
|
// Many STL algorithms use swap instead of copy constructors
|
719
797
|
void swap(sparse_hashtable& ht) {
|
720
|
-
STL_NAMESPACE::swap(
|
721
|
-
STL_NAMESPACE::swap(
|
722
|
-
STL_NAMESPACE::swap(get_key, ht.get_key);
|
723
|
-
STL_NAMESPACE::swap(set_key, ht.set_key);
|
798
|
+
STL_NAMESPACE::swap(settings, ht.settings);
|
799
|
+
STL_NAMESPACE::swap(key_info, ht.key_info);
|
724
800
|
STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
|
725
|
-
STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
|
726
|
-
STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent);
|
727
|
-
STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent);
|
728
|
-
STL_NAMESPACE::swap(delkey, ht.delkey);
|
729
801
|
table.swap(ht.table);
|
730
|
-
reset_thresholds();
|
731
|
-
ht.reset_thresholds();
|
732
802
|
}
|
733
803
|
|
734
804
|
// It's always nice to be able to clear a table without deallocating it
|
735
805
|
void clear() {
|
736
|
-
|
737
|
-
|
806
|
+
if (!empty() || (num_deleted != 0)) {
|
807
|
+
table.clear();
|
808
|
+
}
|
809
|
+
settings.reset_thresholds(bucket_count());
|
738
810
|
num_deleted = 0;
|
739
811
|
}
|
740
812
|
|
741
|
-
|
742
813
|
// LOOKUP ROUTINES
|
743
814
|
private:
|
744
815
|
// Returns a pair of positions: 1st where the object is, 2nd where
|
@@ -770,7 +841,8 @@ class sparse_hashtable {
|
|
770
841
|
}
|
771
842
|
++num_probes; // we're doing another probe
|
772
843
|
bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
|
773
|
-
assert(num_probes < bucket_count()
|
844
|
+
assert(num_probes < bucket_count()
|
845
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
774
846
|
}
|
775
847
|
}
|
776
848
|
|
@@ -830,32 +902,58 @@ class sparse_hashtable {
|
|
830
902
|
|
831
903
|
// INSERTION ROUTINES
|
832
904
|
private:
|
905
|
+
// Private method used by insert_noresize and find_or_insert.
|
906
|
+
iterator insert_at(const_reference obj, size_type pos) {
|
907
|
+
if (size() >= max_size())
|
908
|
+
throw std::length_error("insert overflow");
|
909
|
+
if ( test_deleted(pos) ) { // just replace if it's been deleted
|
910
|
+
// The set() below will undelete this object. We just worry about stats
|
911
|
+
assert(num_deleted > 0);
|
912
|
+
--num_deleted; // used to be, now it isn't
|
913
|
+
}
|
914
|
+
table.set(pos, obj);
|
915
|
+
return iterator(this, table.get_iter(pos), table.nonempty_end());
|
916
|
+
}
|
917
|
+
|
833
918
|
// If you know *this is big enough to hold obj, use this routine
|
834
|
-
pair<iterator, bool> insert_noresize(
|
919
|
+
pair<iterator, bool> insert_noresize(const_reference obj) {
|
835
920
|
// First, double-check we're not inserting delkey
|
836
|
-
assert(!use_deleted || !equals(get_key(obj), delkey))
|
921
|
+
assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
|
922
|
+
&& "Inserting the deleted key");
|
837
923
|
const pair<size_type,size_type> pos = find_position(get_key(obj));
|
838
924
|
if ( pos.first != ILLEGAL_BUCKET) { // object was already there
|
839
925
|
return pair<iterator,bool>(iterator(this, table.get_iter(pos.first),
|
840
926
|
table.nonempty_end()),
|
841
927
|
false); // false: we didn't insert
|
842
928
|
} else { // pos.second says where to put it
|
843
|
-
|
844
|
-
// The set() below will undelete this object. We just worry about stats
|
845
|
-
assert(num_deleted > 0);
|
846
|
-
--num_deleted; // used to be, now it isn't
|
847
|
-
}
|
848
|
-
table.set(pos.second, obj);
|
849
|
-
return pair<iterator,bool>(iterator(this, table.get_iter(pos.second),
|
850
|
-
table.nonempty_end()),
|
851
|
-
true); // true: we did insert
|
929
|
+
return pair<iterator,bool>(insert_at(obj, pos.second), true);
|
852
930
|
}
|
853
931
|
}
|
854
932
|
|
933
|
+
// Specializations of insert(it, it) depending on the power of the iterator:
|
934
|
+
// (1) Iterator supports operator-, resize before inserting
|
935
|
+
template <class ForwardIterator>
|
936
|
+
void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
|
937
|
+
size_t dist = STL_NAMESPACE::distance(f, l);
|
938
|
+
if (dist >= (std::numeric_limits<size_type>::max)())
|
939
|
+
throw std::length_error("insert-range overflow");
|
940
|
+
resize_delta(static_cast<size_type>(dist));
|
941
|
+
for ( ; dist > 0; --dist, ++f) {
|
942
|
+
insert_noresize(*f);
|
943
|
+
}
|
944
|
+
}
|
945
|
+
|
946
|
+
// (2) Arbitrary iterator, can't tell how much to resize
|
947
|
+
template <class InputIterator>
|
948
|
+
void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
|
949
|
+
for ( ; f != l; ++f)
|
950
|
+
insert(*f);
|
951
|
+
}
|
952
|
+
|
855
953
|
public:
|
856
954
|
// This is the normal insert routine, used by the outside world
|
857
|
-
pair<iterator, bool> insert(
|
858
|
-
resize_delta(1);
|
955
|
+
pair<iterator, bool> insert(const_reference obj) {
|
956
|
+
resize_delta(1); // adding an object, grow if need be
|
859
957
|
return insert_noresize(obj);
|
860
958
|
}
|
861
959
|
|
@@ -866,66 +964,102 @@ class sparse_hashtable {
|
|
866
964
|
insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
|
867
965
|
}
|
868
966
|
|
869
|
-
//
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
967
|
+
// This is public only because sparse_hash_map::operator[] uses it.
|
968
|
+
// It does the minimal amount of work to implement operator[].
|
969
|
+
template <class DataType>
|
970
|
+
DataType& find_or_insert(const key_type& key) {
|
971
|
+
// First, double-check we're not inserting delkey
|
972
|
+
assert((!settings.use_deleted() || !equals(key, key_info.delkey))
|
973
|
+
&& "Inserting the deleted key");
|
974
|
+
const pair<size_type,size_type> pos = find_position(key);
|
975
|
+
if ( pos.first != ILLEGAL_BUCKET) { // object was already there
|
976
|
+
return table.get_iter(pos.first)->second;
|
977
|
+
} else if (resize_delta(1)) { // needed to rehash to make room
|
978
|
+
// Since we resized, we can't use pos, so recalculate where to insert.
|
979
|
+
return insert_noresize(value_type(key, DataType())).first->second;
|
980
|
+
} else { // no need to rehash, insert right here
|
981
|
+
return insert_at(value_type(key, DataType()), pos.second)->second;
|
982
|
+
}
|
885
983
|
}
|
886
984
|
|
887
|
-
|
888
985
|
// DELETION ROUTINES
|
889
986
|
size_type erase(const key_type& key) {
|
890
|
-
// First, double-check we're not erasing delkey
|
891
|
-
assert(!use_deleted || !equals(key, delkey))
|
987
|
+
// First, double-check we're not erasing delkey.
|
988
|
+
assert((!settings.use_deleted() || !equals(key, key_info.delkey))
|
989
|
+
&& "Erasing the deleted key");
|
990
|
+
assert(!settings.use_deleted() || !equals(key, key_info.delkey));
|
892
991
|
const_iterator pos = find(key); // shrug: shouldn't need to be const
|
893
992
|
if ( pos != end() ) {
|
894
993
|
assert(!test_deleted(pos)); // or find() shouldn't have returned it
|
895
994
|
set_deleted(pos);
|
896
995
|
++num_deleted;
|
897
|
-
|
996
|
+
// will think about shrink after next insert
|
997
|
+
settings.set_consider_shrink(true);
|
898
998
|
return 1; // because we deleted one thing
|
899
999
|
} else {
|
900
1000
|
return 0; // because we deleted nothing
|
901
1001
|
}
|
902
1002
|
}
|
903
1003
|
|
904
|
-
//
|
905
|
-
|
906
|
-
// Since that's a moot issue for deleted keys, we allow const_iterators
|
907
|
-
void erase(const_iterator pos) {
|
1004
|
+
// We return the iterator past the deleted item.
|
1005
|
+
void erase(iterator pos) {
|
908
1006
|
if ( pos == end() ) return; // sanity check
|
909
1007
|
if ( set_deleted(pos) ) { // true if object has been newly deleted
|
910
1008
|
++num_deleted;
|
911
|
-
|
1009
|
+
// will think about shrink after next insert
|
1010
|
+
settings.set_consider_shrink(true);
|
1011
|
+
}
|
1012
|
+
}
|
1013
|
+
|
1014
|
+
void erase(iterator f, iterator l) {
|
1015
|
+
for ( ; f != l; ++f) {
|
1016
|
+
if ( set_deleted(f) ) // should always be true
|
1017
|
+
++num_deleted;
|
912
1018
|
}
|
1019
|
+
// will think about shrink after next insert
|
1020
|
+
settings.set_consider_shrink(true);
|
913
1021
|
}
|
914
1022
|
|
1023
|
+
// We allow you to erase a const_iterator just like we allow you to
|
1024
|
+
// erase an iterator. This is in parallel to 'delete': you can delete
|
1025
|
+
// a const pointer just like a non-const pointer. The logic is that
|
1026
|
+
// you can't use the object after it's erased anyway, so it doesn't matter
|
1027
|
+
// if it's const or not.
|
1028
|
+
void erase(const_iterator pos) {
|
1029
|
+
if ( pos == end() ) return; // sanity check
|
1030
|
+
if ( set_deleted(pos) ) { // true if object has been newly deleted
|
1031
|
+
++num_deleted;
|
1032
|
+
// will think about shrink after next insert
|
1033
|
+
settings.set_consider_shrink(true);
|
1034
|
+
}
|
1035
|
+
}
|
915
1036
|
void erase(const_iterator f, const_iterator l) {
|
916
1037
|
for ( ; f != l; ++f) {
|
917
1038
|
if ( set_deleted(f) ) // should always be true
|
918
1039
|
++num_deleted;
|
919
1040
|
}
|
920
|
-
|
1041
|
+
// will think about shrink after next insert
|
1042
|
+
settings.set_consider_shrink(true);
|
921
1043
|
}
|
922
1044
|
|
923
1045
|
|
924
1046
|
// COMPARISON
|
925
1047
|
bool operator==(const sparse_hashtable& ht) const {
|
926
|
-
|
927
|
-
|
928
|
-
|
1048
|
+
if (size() != ht.size()) {
|
1049
|
+
return false;
|
1050
|
+
} else if (this == &ht) {
|
1051
|
+
return true;
|
1052
|
+
} else {
|
1053
|
+
// Iterate through the elements in "this" and see if the
|
1054
|
+
// corresponding element is in ht
|
1055
|
+
for ( const_iterator it = begin(); it != end(); ++it ) {
|
1056
|
+
const_iterator it2 = ht.find(get_key(*it));
|
1057
|
+
if ((it2 == ht.end()) || (*it != *it2)) {
|
1058
|
+
return false;
|
1059
|
+
}
|
1060
|
+
}
|
1061
|
+
return true;
|
1062
|
+
}
|
929
1063
|
}
|
930
1064
|
bool operator!=(const sparse_hashtable& ht) const {
|
931
1065
|
return !(*this == ht);
|
@@ -946,7 +1080,7 @@ class sparse_hashtable {
|
|
946
1080
|
bool read_metadata(FILE *fp) {
|
947
1081
|
num_deleted = 0; // since we got rid before writing
|
948
1082
|
bool result = table.read_metadata(fp);
|
949
|
-
reset_thresholds();
|
1083
|
+
settings.reset_thresholds(bucket_count());
|
950
1084
|
return result;
|
951
1085
|
}
|
952
1086
|
|
@@ -961,31 +1095,67 @@ class sparse_hashtable {
|
|
961
1095
|
}
|
962
1096
|
|
963
1097
|
private:
|
964
|
-
//
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
1098
|
+
// Table is the main storage class.
|
1099
|
+
typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table;
|
1100
|
+
|
1101
|
+
// Package templated functors with the other types to eliminate memory
|
1102
|
+
// needed for storing these zero-size operators. Since ExtractKey and
|
1103
|
+
// hasher's operator() might have the same function signature, they
|
1104
|
+
// must be packaged in different classes.
|
1105
|
+
struct Settings :
|
1106
|
+
sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
|
1107
|
+
explicit Settings(const hasher& hf)
|
1108
|
+
: sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
|
1109
|
+
hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
|
1110
|
+
};
|
1111
|
+
|
1112
|
+
// KeyInfo stores delete key and packages zero-size functors:
|
1113
|
+
// ExtractKey and SetKey.
|
1114
|
+
class KeyInfo : public ExtractKey, public SetKey, public key_equal {
|
1115
|
+
public:
|
1116
|
+
KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
|
1117
|
+
: ExtractKey(ek),
|
1118
|
+
SetKey(sk),
|
1119
|
+
key_equal(eq) {
|
1120
|
+
}
|
1121
|
+
const key_type get_key(const_reference v) const {
|
1122
|
+
return ExtractKey::operator()(v);
|
1123
|
+
}
|
1124
|
+
void set_key(pointer v, const key_type& k) const {
|
1125
|
+
SetKey::operator()(v, k);
|
1126
|
+
}
|
1127
|
+
bool equals(const key_type& a, const key_type& b) const {
|
1128
|
+
return key_equal::operator()(a, b);
|
1129
|
+
}
|
1130
|
+
|
1131
|
+
// Which key marks deleted entries.
|
1132
|
+
// TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
|
1133
|
+
typename remove_const<key_type>::type delkey;
|
1134
|
+
};
|
1135
|
+
|
1136
|
+
// Utility functions to access the templated operators
|
1137
|
+
size_type hash(const key_type& v) const {
|
1138
|
+
return settings.hash(v);
|
986
1139
|
}
|
1140
|
+
bool equals(const key_type& a, const key_type& b) const {
|
1141
|
+
return key_info.equals(a, b);
|
1142
|
+
}
|
1143
|
+
const key_type get_key(const_reference v) const {
|
1144
|
+
return key_info.get_key(v);
|
1145
|
+
}
|
1146
|
+
void set_key(pointer v, const key_type& k) const {
|
1147
|
+
key_info.set_key(v, k);
|
1148
|
+
}
|
1149
|
+
|
1150
|
+
private:
|
1151
|
+
// Actual data
|
1152
|
+
Settings settings;
|
1153
|
+
KeyInfo key_info;
|
1154
|
+
size_type num_deleted; // how many occupied buckets are marked deleted
|
1155
|
+
Table table; // holds num_buckets and num_elements too
|
987
1156
|
};
|
988
1157
|
|
1158
|
+
|
989
1159
|
// We need a global swap as well
|
990
1160
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
991
1161
|
inline void swap(sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
|
@@ -1002,13 +1172,14 @@ const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
|
|
1002
1172
|
// How full we let the table get before we resize. Knuth says .8 is
|
1003
1173
|
// good -- higher causes us to probe too much, though saves memory
|
1004
1174
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
1005
|
-
const
|
1175
|
+
const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 80;
|
1006
1176
|
|
1007
1177
|
// How empty we let the table get before we resize lower.
|
1008
|
-
// It should be less than
|
1178
|
+
// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
|
1009
1179
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
1010
|
-
const
|
1011
|
-
|
1180
|
+
const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
|
1181
|
+
= static_cast<int>(0.4 *
|
1182
|
+
sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
|
1012
1183
|
|
1013
1184
|
_END_GOOGLE_NAMESPACE_
|
1014
1185
|
|