google_hash 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +61 -27
- data/Rakefile +4 -1
- data/TODO +5 -0
- data/VERSION +1 -1
- data/changelog +3 -0
- data/ext/extconf.rb +10 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/AUTHORS +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/COPYING +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/ChangeLog +47 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/INSTALL +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/Makefile.am +29 -14
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/Makefile.in +77 -42
- data/ext/sparsehash-1.8.1/NEWS +71 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/README +0 -0
- data/ext/{sparsehash-1.5.2/README.windows → sparsehash-1.8.1/README_windows.txt} +25 -25
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/TODO +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/aclocal.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/compile +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/config.guess +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/config.sub +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/configure +3690 -4560
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/configure.ac +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/depcomp +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/dense_hash_map.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/dense_hash_set.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/designstyle.css +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/implementation.html +11 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/index.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/performance.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparse_hash_map.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparse_hash_set.html +65 -5
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/doc/sparsetable.html +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/Makefile +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/README +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/example.c +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/libchash.c +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/experimental/libchash.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/google-sparsehash.sln +17 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/install-sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/acx_pthread.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/google_namespace.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/namespaces.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_hash.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_hash_fun.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/m4/stl_namespace.m4 +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/missing +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/mkinstalldirs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb.sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/README +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/changelog +24 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/compat +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/control +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/copyright +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/docs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/rules +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/sparsehash.dirs +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/deb/sparsehash.install +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/rpm.sh +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/packages/rpm/rpm.spec +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/config.h.in +3 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/config.h.include +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/dense_hash_map +43 -27
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/dense_hash_set +40 -19
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparse_hash_map +32 -23
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparse_hash_set +31 -21
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsehash/densehashtable.h +481 -298
- data/ext/sparsehash-1.8.1/src/google/sparsehash/hashtable-common.h +178 -0
- data/ext/sparsehash-1.8.1/src/google/sparsehash/libc_allocator_with_realloc.h +121 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsehash/sparsehashtable.h +404 -233
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/sparsetable +173 -83
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/google/type_traits.h +3 -29
- data/ext/sparsehash-1.8.1/src/hash_test_interface.h +1011 -0
- data/ext/sparsehash-1.8.1/src/hashtable_test.cc +1733 -0
- data/ext/sparsehash-1.8.1/src/libc_allocator_with_realloc_test.cc +129 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/simple_test.cc +1 -1
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/sparsetable_unittest.cc +202 -6
- data/ext/sparsehash-1.8.1/src/testutil.h +251 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/time_hash_map.cc +128 -54
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/type_traits_unittest.cc +30 -20
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/config.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/google/sparsehash/sparseconfig.h +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/port.cc +0 -0
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/src/windows/port.h +0 -0
- data/ext/sparsehash-1.8.1/vsprojects/hashtable_test/hashtable_test.vcproj +197 -0
- data/ext/{sparsehash-1.5.2/vsprojects/hashtable_unittest/hashtable_unittest.vcproj → sparsehash-1.8.1/vsprojects/simple_test/simple_test.vcproj} +9 -8
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/sparsetable_unittest/sparsetable_unittest.vcproj +0 -2
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/time_hash_map/time_hash_map.vcproj +3 -2
- data/ext/{sparsehash-1.5.2 → sparsehash-1.8.1}/vsprojects/type_traits_unittest/type_traits_unittest.vcproj +0 -2
- data/ext/template/google_hash.cpp.erb +2 -1
- data/ext/template/main.cpp.erb +1 -1
- data/results.txt +6 -22
- data/spec/benchmark.rb +57 -0
- data/spec/spec.google_hash.rb +1 -8
- metadata +140 -130
- data/ext/benchmark.rb +0 -47
- data/ext/sparsehash-1.5.2/NEWS +0 -0
- data/ext/sparsehash-1.5.2/src/hashtable_unittest.cc +0 -1375
- data/ext/sparsehash-1.5.2/src/words +0 -8944
- data/types.txt +0 -18
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
// Copyright (c) 2005, Google Inc.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
|
5
|
+
// modification, are permitted provided that the following conditions are
|
|
6
|
+
// met:
|
|
7
|
+
//
|
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
|
12
|
+
// in the documentation and/or other materials provided with the
|
|
13
|
+
// distribution.
|
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
|
16
|
+
// this software without specific prior written permission.
|
|
17
|
+
//
|
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
|
|
30
|
+
// ---
|
|
31
|
+
// Author: Giao Nguyen
|
|
32
|
+
|
|
33
|
+
#ifndef UTIL_GTL_HASHTABLE_COMMON_H_
|
|
34
|
+
#define UTIL_GTL_HASHTABLE_COMMON_H_
|
|
35
|
+
|
|
36
|
+
#include <assert.h>
|
|
37
|
+
|
|
38
|
+
// Settings contains parameters for growing and shrinking the table.
|
|
39
|
+
// It also packages zero-size functor (ie. hasher).
|
|
40
|
+
|
|
41
|
+
template<typename Key, typename HashFunc,
|
|
42
|
+
typename SizeType, int HT_MIN_BUCKETS>
|
|
43
|
+
class sh_hashtable_settings : public HashFunc {
|
|
44
|
+
public:
|
|
45
|
+
typedef Key key_type;
|
|
46
|
+
typedef HashFunc hasher;
|
|
47
|
+
typedef SizeType size_type;
|
|
48
|
+
|
|
49
|
+
public:
|
|
50
|
+
sh_hashtable_settings(const hasher& hf,
|
|
51
|
+
const float ht_occupancy_flt,
|
|
52
|
+
const float ht_empty_flt)
|
|
53
|
+
: hasher(hf),
|
|
54
|
+
enlarge_threshold_(0),
|
|
55
|
+
shrink_threshold_(0),
|
|
56
|
+
consider_shrink_(false),
|
|
57
|
+
use_empty_(false),
|
|
58
|
+
use_deleted_(false),
|
|
59
|
+
num_ht_copies_(0) {
|
|
60
|
+
set_enlarge_factor(ht_occupancy_flt);
|
|
61
|
+
set_shrink_factor(ht_empty_flt);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
size_type hash(const key_type& v) const {
|
|
65
|
+
return hasher::operator()(v);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
float enlarge_factor() const {
|
|
69
|
+
return enlarge_factor_;
|
|
70
|
+
}
|
|
71
|
+
void set_enlarge_factor(float f) {
|
|
72
|
+
enlarge_factor_ = f;
|
|
73
|
+
}
|
|
74
|
+
float shrink_factor() const {
|
|
75
|
+
return shrink_factor_;
|
|
76
|
+
}
|
|
77
|
+
void set_shrink_factor(float f) {
|
|
78
|
+
shrink_factor_ = f;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
size_type enlarge_threshold() const {
|
|
82
|
+
return enlarge_threshold_;
|
|
83
|
+
}
|
|
84
|
+
void set_enlarge_threshold(size_type t) {
|
|
85
|
+
enlarge_threshold_ = t;
|
|
86
|
+
}
|
|
87
|
+
size_type shrink_threshold() const {
|
|
88
|
+
return shrink_threshold_;
|
|
89
|
+
}
|
|
90
|
+
void set_shrink_threshold(size_type t) {
|
|
91
|
+
shrink_threshold_ = t;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
size_type enlarge_size(size_type x) const {
|
|
95
|
+
return static_cast<size_type>(x * enlarge_factor_);
|
|
96
|
+
}
|
|
97
|
+
size_type shrink_size(size_type x) const {
|
|
98
|
+
return static_cast<size_type>(x * shrink_factor_);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
bool consider_shrink() const {
|
|
102
|
+
return consider_shrink_;
|
|
103
|
+
}
|
|
104
|
+
void set_consider_shrink(bool t) {
|
|
105
|
+
consider_shrink_ = t;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
bool use_empty() const {
|
|
109
|
+
return use_empty_;
|
|
110
|
+
}
|
|
111
|
+
void set_use_empty(bool t) {
|
|
112
|
+
use_empty_ = t;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
bool use_deleted() const {
|
|
116
|
+
return use_deleted_;
|
|
117
|
+
}
|
|
118
|
+
void set_use_deleted(bool t) {
|
|
119
|
+
use_deleted_ = t;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
size_type num_ht_copies() const {
|
|
123
|
+
return static_cast<size_type>(num_ht_copies_);
|
|
124
|
+
}
|
|
125
|
+
void inc_num_ht_copies() {
|
|
126
|
+
++num_ht_copies_;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Reset the enlarge and shrink thresholds
|
|
130
|
+
void reset_thresholds(int num_buckets) {
|
|
131
|
+
set_enlarge_threshold(enlarge_size(num_buckets));
|
|
132
|
+
set_shrink_threshold(shrink_size(num_buckets));
|
|
133
|
+
// whatever caused us to reset already considered
|
|
134
|
+
set_consider_shrink(false);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Caller is resposible for calling reset_threshold right after
|
|
138
|
+
// set_resizing_parameters.
|
|
139
|
+
void set_resizing_parameters(float shrink, float grow) {
|
|
140
|
+
assert(shrink >= 0.0);
|
|
141
|
+
assert(grow <= 1.0);
|
|
142
|
+
if (shrink > grow/2.0f)
|
|
143
|
+
shrink = grow / 2.0f; // otherwise we thrash hashtable size
|
|
144
|
+
set_shrink_factor(shrink);
|
|
145
|
+
set_enlarge_factor(grow);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// This is the smallest size a hashtable can be without being too crowded
|
|
149
|
+
// If you like, you can give a min #buckets as well as a min #elts
|
|
150
|
+
size_type min_buckets(size_type num_elts, size_type min_buckets_wanted) {
|
|
151
|
+
float enlarge = enlarge_factor();
|
|
152
|
+
size_type sz = HT_MIN_BUCKETS; // min buckets allowed
|
|
153
|
+
while ( sz < min_buckets_wanted ||
|
|
154
|
+
num_elts >= static_cast<size_type>(sz * enlarge) ) {
|
|
155
|
+
// This just prevents overflowing size_type, since sz can exceed
|
|
156
|
+
// max_size() here.
|
|
157
|
+
if (static_cast<size_type>(sz * 2) < sz) {
|
|
158
|
+
throw std::length_error("resize overflow"); // protect against overflow
|
|
159
|
+
}
|
|
160
|
+
sz *= 2;
|
|
161
|
+
}
|
|
162
|
+
return sz;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
private:
|
|
166
|
+
size_type enlarge_threshold_; // table.size() * enlarge_factor
|
|
167
|
+
size_type shrink_threshold_; // table.size() * shrink_factor
|
|
168
|
+
float enlarge_factor_; // how full before resize
|
|
169
|
+
float shrink_factor_; // how empty before resize
|
|
170
|
+
// consider_shrink=true if we should try to shrink before next insert
|
|
171
|
+
bool consider_shrink_;
|
|
172
|
+
bool use_empty_; // used only by densehashtable, not sparsehashtable
|
|
173
|
+
bool use_deleted_; // false until delkey has been set
|
|
174
|
+
// num_ht_copies is a counter incremented every Copy/Move
|
|
175
|
+
unsigned int num_ht_copies_;
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
#endif // UTIL_GTL_HASHTABLE_COMMON_H_
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
// Copyright (c) 2010, Google Inc.
|
|
2
|
+
// All rights reserved.
|
|
3
|
+
//
|
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
|
5
|
+
// modification, are permitted provided that the following conditions are
|
|
6
|
+
// met:
|
|
7
|
+
//
|
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
|
12
|
+
// in the documentation and/or other materials provided with the
|
|
13
|
+
// distribution.
|
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
|
16
|
+
// this software without specific prior written permission.
|
|
17
|
+
//
|
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29
|
+
|
|
30
|
+
// ---
|
|
31
|
+
// Author: Guilin Chen
|
|
32
|
+
|
|
33
|
+
#ifndef UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
|
|
34
|
+
#define UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
|
|
35
|
+
|
|
36
|
+
#include <google/sparsehash/sparseconfig.h>
|
|
37
|
+
|
|
38
|
+
#include <stdlib.h> // for malloc/realloc/free
|
|
39
|
+
#include <stddef.h> // for ptrdiff_t
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
_START_GOOGLE_NAMESPACE_
|
|
43
|
+
|
|
44
|
+
template<class T>
|
|
45
|
+
class libc_allocator_with_realloc {
|
|
46
|
+
public:
|
|
47
|
+
typedef T value_type;
|
|
48
|
+
typedef size_t size_type;
|
|
49
|
+
typedef ptrdiff_t difference_type;
|
|
50
|
+
|
|
51
|
+
typedef T* pointer;
|
|
52
|
+
typedef const T* const_pointer;
|
|
53
|
+
typedef T& reference;
|
|
54
|
+
typedef const T& const_reference;
|
|
55
|
+
|
|
56
|
+
libc_allocator_with_realloc() {}
|
|
57
|
+
libc_allocator_with_realloc(const libc_allocator_with_realloc&) {}
|
|
58
|
+
~libc_allocator_with_realloc() {}
|
|
59
|
+
|
|
60
|
+
pointer address(reference r) const { return &r; }
|
|
61
|
+
const_pointer address(const_reference r) const { return &r; }
|
|
62
|
+
|
|
63
|
+
pointer allocate(size_type n, const_pointer = 0) {
|
|
64
|
+
return static_cast<pointer>(malloc(n * sizeof(value_type)));
|
|
65
|
+
}
|
|
66
|
+
void deallocate(pointer p, size_type) {
|
|
67
|
+
free(p);
|
|
68
|
+
}
|
|
69
|
+
pointer reallocate(pointer p, size_type n) {
|
|
70
|
+
return static_cast<pointer>(realloc(p, n * sizeof(value_type)));
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
size_type max_size() const {
|
|
74
|
+
return static_cast<size_type>(-1) / sizeof(value_type);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void construct(pointer p, const value_type& val) {
|
|
78
|
+
new(p) value_type(val);
|
|
79
|
+
}
|
|
80
|
+
void destroy(pointer p) { p->~value_type(); }
|
|
81
|
+
|
|
82
|
+
template <class U>
|
|
83
|
+
libc_allocator_with_realloc(const libc_allocator_with_realloc<U>&) {}
|
|
84
|
+
|
|
85
|
+
template<class U>
|
|
86
|
+
struct rebind {
|
|
87
|
+
typedef libc_allocator_with_realloc<U> other;
|
|
88
|
+
};
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
// libc_allocator_with_realloc<void> specialization.
|
|
92
|
+
template<>
|
|
93
|
+
class libc_allocator_with_realloc<void> {
|
|
94
|
+
public:
|
|
95
|
+
typedef void value_type;
|
|
96
|
+
typedef size_t size_type;
|
|
97
|
+
typedef ptrdiff_t difference_type;
|
|
98
|
+
typedef void* pointer;
|
|
99
|
+
typedef const void* const_pointer;
|
|
100
|
+
|
|
101
|
+
template<class U>
|
|
102
|
+
struct rebind {
|
|
103
|
+
typedef libc_allocator_with_realloc<U> other;
|
|
104
|
+
};
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
template<class T>
|
|
108
|
+
inline bool operator==(const libc_allocator_with_realloc<T>&,
|
|
109
|
+
const libc_allocator_with_realloc<T>&) {
|
|
110
|
+
return true;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
template<class T>
|
|
114
|
+
inline bool operator!=(const libc_allocator_with_realloc<T>&,
|
|
115
|
+
const libc_allocator_with_realloc<T>&) {
|
|
116
|
+
return false;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
_END_GOOGLE_NAMESPACE_
|
|
120
|
+
|
|
121
|
+
#endif // UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_
|
|
@@ -59,20 +59,20 @@
|
|
|
59
59
|
// <google/sparse_hash_table> or <google/sparse_hash_set> instead.
|
|
60
60
|
//
|
|
61
61
|
// You can modify the following, below:
|
|
62
|
-
//
|
|
63
|
-
//
|
|
62
|
+
// HT_OCCUPANCY_PCT -- how full before we double size
|
|
63
|
+
// HT_EMPTY_PCT -- how empty before we halve size
|
|
64
64
|
// HT_MIN_BUCKETS -- smallest bucket size
|
|
65
65
|
// HT_DEFAULT_STARTING_BUCKETS -- default bucket size at construct-time
|
|
66
66
|
//
|
|
67
|
-
// You can also change
|
|
68
|
-
//
|
|
69
|
-
//
|
|
67
|
+
// You can also change enlarge_factor (which defaults to
|
|
68
|
+
// HT_OCCUPANCY_PCT), and shrink_factor (which defaults to
|
|
69
|
+
// HT_EMPTY_PCT) with set_resizing_parameters().
|
|
70
70
|
//
|
|
71
71
|
// How to decide what values to use?
|
|
72
|
-
//
|
|
72
|
+
// shrink_factor's default of .4 * OCCUPANCY_PCT, is probably good.
|
|
73
73
|
// HT_MIN_BUCKETS is probably unnecessary since you can specify
|
|
74
74
|
// (indirectly) the starting number of buckets at construct-time.
|
|
75
|
-
// For
|
|
75
|
+
// For enlarge_factor, you can use this chart to try to trade-off
|
|
76
76
|
// expected lookup time to the space taken up. By default, this
|
|
77
77
|
// code uses quadratic probing, though you can change it to linear
|
|
78
78
|
// via _JUMP below if you really want to.
|
|
@@ -82,7 +82,7 @@
|
|
|
82
82
|
// Quadratic collision resolution 1 - ln(1-L) - L/2 1/(1-L) - L - ln(1-L)
|
|
83
83
|
// Linear collision resolution [1+1/(1-L)]/2 [1+1/(1-L)2]/2
|
|
84
84
|
//
|
|
85
|
-
// --
|
|
85
|
+
// -- enlarge_factor -- 0.10 0.50 0.60 0.75 0.80 0.90 0.99
|
|
86
86
|
// QUADRATIC COLLISION RES.
|
|
87
87
|
// probes/successful lookup 1.05 1.44 1.62 2.01 2.21 2.85 5.11
|
|
88
88
|
// probes/unsuccessful lookup 1.11 2.19 2.82 4.64 5.81 11.4 103.6
|
|
@@ -103,21 +103,29 @@
|
|
|
103
103
|
// The probing method
|
|
104
104
|
// Linear probing
|
|
105
105
|
// #define JUMP_(key, num_probes) ( 1 )
|
|
106
|
-
// Quadratic
|
|
106
|
+
// Quadratic probing
|
|
107
107
|
#define JUMP_(key, num_probes) ( num_probes )
|
|
108
108
|
|
|
109
|
-
|
|
110
109
|
#include <google/sparsehash/sparseconfig.h>
|
|
111
110
|
#include <assert.h>
|
|
112
111
|
#include <algorithm> // For swap(), eg
|
|
112
|
+
#include <stdexcept> // For length_error
|
|
113
113
|
#include <iterator> // for facts about iterator tags
|
|
114
|
+
#include <limits> // for numeric_limits<>
|
|
114
115
|
#include <utility> // for pair<>
|
|
116
|
+
#include <google/sparsehash/hashtable-common.h>
|
|
115
117
|
#include <google/sparsetable> // Since that's basically what we are
|
|
116
118
|
|
|
117
119
|
_START_GOOGLE_NAMESPACE_
|
|
118
120
|
|
|
119
121
|
using STL_NAMESPACE::pair;
|
|
120
122
|
|
|
123
|
+
// The smaller this is, the faster lookup is (because the group bitmap is
|
|
124
|
+
// smaller) and the faster insert is, because there's less to move.
|
|
125
|
+
// On the other hand, there are more groups. Since group::size_type is
|
|
126
|
+
// a short, this number should be of the form 32*x + 16 to avoid waste.
|
|
127
|
+
static const u_int16_t DEFAULT_GROUP_SIZE = 48; // fits in 1.5 words
|
|
128
|
+
|
|
121
129
|
// Hashtable class, used to implement the hashed associative containers
|
|
122
130
|
// hash_set and hash_map.
|
|
123
131
|
//
|
|
@@ -131,7 +139,7 @@ using STL_NAMESPACE::pair;
|
|
|
131
139
|
// with key == deleted_key.
|
|
132
140
|
// EqualKey: Given two Keys, says whether they are the same (that is,
|
|
133
141
|
// if they are both associated with the same Value).
|
|
134
|
-
// Alloc: STL allocator to use to allocate memory.
|
|
142
|
+
// Alloc: STL allocator to use to allocate memory.
|
|
135
143
|
|
|
136
144
|
template <class Value, class Key, class HashFcn,
|
|
137
145
|
class ExtractKey, class SetKey, class EqualKey, class Alloc>
|
|
@@ -147,17 +155,21 @@ struct sparse_hashtable_const_iterator;
|
|
|
147
155
|
// that skips over deleted elements.
|
|
148
156
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
149
157
|
struct sparse_hashtable_iterator {
|
|
158
|
+
private:
|
|
159
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
|
160
|
+
|
|
150
161
|
public:
|
|
151
162
|
typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
|
152
163
|
typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
|
|
153
|
-
typedef typename sparsetable<V>::nonempty_iterator
|
|
164
|
+
typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::nonempty_iterator
|
|
165
|
+
st_iterator;
|
|
154
166
|
|
|
155
167
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
|
156
168
|
typedef V value_type;
|
|
157
|
-
typedef
|
|
158
|
-
typedef
|
|
159
|
-
typedef
|
|
160
|
-
typedef
|
|
169
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
170
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
171
|
+
typedef typename value_alloc_type::reference reference;
|
|
172
|
+
typedef typename value_alloc_type::pointer pointer;
|
|
161
173
|
|
|
162
174
|
// "Real" constructor and default constructor
|
|
163
175
|
sparse_hashtable_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
|
|
@@ -195,17 +207,21 @@ struct sparse_hashtable_iterator {
|
|
|
195
207
|
// Now do it all again, but with const-ness!
|
|
196
208
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
197
209
|
struct sparse_hashtable_const_iterator {
|
|
210
|
+
private:
|
|
211
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
|
212
|
+
|
|
198
213
|
public:
|
|
199
214
|
typedef sparse_hashtable_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
|
200
215
|
typedef sparse_hashtable_const_iterator<V,K,HF,ExK,SetK,EqK,A> const_iterator;
|
|
201
|
-
typedef typename sparsetable<V>::const_nonempty_iterator
|
|
216
|
+
typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::const_nonempty_iterator
|
|
217
|
+
st_iterator;
|
|
202
218
|
|
|
203
219
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
|
204
220
|
typedef V value_type;
|
|
205
|
-
typedef
|
|
206
|
-
typedef
|
|
207
|
-
typedef
|
|
208
|
-
typedef
|
|
221
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
222
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
223
|
+
typedef typename value_alloc_type::const_reference reference;
|
|
224
|
+
typedef typename value_alloc_type::const_pointer pointer;
|
|
209
225
|
|
|
210
226
|
// "Real" constructor and default constructor
|
|
211
227
|
sparse_hashtable_const_iterator(const sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> *h,
|
|
@@ -246,16 +262,20 @@ struct sparse_hashtable_const_iterator {
|
|
|
246
262
|
// And once again, but this time freeing up memory as we iterate
|
|
247
263
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
248
264
|
struct sparse_hashtable_destructive_iterator {
|
|
265
|
+
private:
|
|
266
|
+
typedef typename A::template rebind<V>::other value_alloc_type;
|
|
267
|
+
|
|
249
268
|
public:
|
|
250
269
|
typedef sparse_hashtable_destructive_iterator<V,K,HF,ExK,SetK,EqK,A> iterator;
|
|
251
|
-
typedef typename sparsetable<V>::destructive_iterator
|
|
270
|
+
typedef typename sparsetable<V,DEFAULT_GROUP_SIZE,A>::destructive_iterator
|
|
271
|
+
st_iterator;
|
|
252
272
|
|
|
253
273
|
typedef STL_NAMESPACE::forward_iterator_tag iterator_category;
|
|
254
274
|
typedef V value_type;
|
|
255
|
-
typedef
|
|
256
|
-
typedef
|
|
257
|
-
typedef
|
|
258
|
-
typedef
|
|
275
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
276
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
277
|
+
typedef typename value_alloc_type::reference reference;
|
|
278
|
+
typedef typename value_alloc_type::pointer pointer;
|
|
259
279
|
|
|
260
280
|
// "Real" constructor and default constructor
|
|
261
281
|
sparse_hashtable_destructive_iterator(const
|
|
@@ -295,18 +315,22 @@ struct sparse_hashtable_destructive_iterator {
|
|
|
295
315
|
template <class Value, class Key, class HashFcn,
|
|
296
316
|
class ExtractKey, class SetKey, class EqualKey, class Alloc>
|
|
297
317
|
class sparse_hashtable {
|
|
318
|
+
private:
|
|
319
|
+
typedef typename Alloc::template rebind<Value>::other value_alloc_type;
|
|
320
|
+
|
|
298
321
|
public:
|
|
299
322
|
typedef Key key_type;
|
|
300
323
|
typedef Value value_type;
|
|
301
324
|
typedef HashFcn hasher;
|
|
302
325
|
typedef EqualKey key_equal;
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
typedef
|
|
306
|
-
typedef
|
|
307
|
-
typedef
|
|
308
|
-
typedef
|
|
309
|
-
typedef
|
|
326
|
+
typedef Alloc allocator_type;
|
|
327
|
+
|
|
328
|
+
typedef typename value_alloc_type::size_type size_type;
|
|
329
|
+
typedef typename value_alloc_type::difference_type difference_type;
|
|
330
|
+
typedef typename value_alloc_type::reference reference;
|
|
331
|
+
typedef typename value_alloc_type::const_reference const_reference;
|
|
332
|
+
typedef typename value_alloc_type::pointer pointer;
|
|
333
|
+
typedef typename value_alloc_type::const_pointer const_pointer;
|
|
310
334
|
typedef sparse_hashtable_iterator<Value, Key, HashFcn, ExtractKey,
|
|
311
335
|
SetKey, EqualKey, Alloc>
|
|
312
336
|
iterator;
|
|
@@ -326,22 +350,23 @@ class sparse_hashtable {
|
|
|
326
350
|
// How full we let the table get before we resize, by default.
|
|
327
351
|
// Knuth says .8 is good -- higher causes us to probe too much,
|
|
328
352
|
// though it saves memory.
|
|
329
|
-
static const
|
|
353
|
+
static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
|
|
330
354
|
|
|
331
355
|
// How empty we let the table get before we resize lower, by default.
|
|
332
|
-
//
|
|
333
|
-
|
|
356
|
+
// (0.0 means never resize lower.)
|
|
357
|
+
// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
|
|
358
|
+
static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
|
|
334
359
|
|
|
335
360
|
// Minimum size we're willing to let hashtables be.
|
|
336
361
|
// Must be a power of two, and at least 4.
|
|
337
|
-
// Note, however, that for a given hashtable, the
|
|
338
|
-
//
|
|
339
|
-
static const
|
|
362
|
+
// Note, however, that for a given hashtable, the initial size is a
|
|
363
|
+
// function of the first constructor arg, and may be >HT_MIN_BUCKETS.
|
|
364
|
+
static const size_type HT_MIN_BUCKETS = 4;
|
|
340
365
|
|
|
341
366
|
// By default, if you don't specify a hashtable size at
|
|
342
367
|
// construction-time, we use this size. Must be a power of two, and
|
|
343
368
|
// at least HT_MIN_BUCKETS.
|
|
344
|
-
static const
|
|
369
|
+
static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
|
|
345
370
|
|
|
346
371
|
// ITERATOR FUNCTIONS
|
|
347
372
|
iterator begin() { return iterator(this, table.nonempty_begin(),
|
|
@@ -399,8 +424,12 @@ class sparse_hashtable {
|
|
|
399
424
|
|
|
400
425
|
|
|
401
426
|
// ACCESSOR FUNCTIONS for the things we templatize on, basically
|
|
402
|
-
hasher hash_funct() const
|
|
403
|
-
key_equal key_eq() const
|
|
427
|
+
hasher hash_funct() const { return settings; }
|
|
428
|
+
key_equal key_eq() const { return key_info; }
|
|
429
|
+
allocator_type get_allocator() const { return table.get_allocator(); }
|
|
430
|
+
|
|
431
|
+
// Accessor function for statistics gathering.
|
|
432
|
+
int num_table_copies() const { return settings.num_ht_copies(); }
|
|
404
433
|
|
|
405
434
|
private:
|
|
406
435
|
// We need to copy values when we set the special marker for deleted
|
|
@@ -408,7 +437,7 @@ class sparse_hashtable {
|
|
|
408
437
|
// operator because value_type might not be assignable (it's often
|
|
409
438
|
// pair<const X, Y>). We use explicit destructor invocation and
|
|
410
439
|
// placement new to get around this. Arg.
|
|
411
|
-
void set_value(
|
|
440
|
+
void set_value(pointer dst, const_reference src) {
|
|
412
441
|
dst->~value_type(); // delete the old value, if any
|
|
413
442
|
new(dst) value_type(src);
|
|
414
443
|
}
|
|
@@ -419,7 +448,6 @@ class sparse_hashtable {
|
|
|
419
448
|
// can't do a destructive copy, we make the typename private.
|
|
420
449
|
enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
|
|
421
450
|
|
|
422
|
-
|
|
423
451
|
// DELETE HELPER FUNCTIONS
|
|
424
452
|
// This lets the user describe a key that will indicate deleted
|
|
425
453
|
// table entries. This key should be an "impossible" entry --
|
|
@@ -435,59 +463,88 @@ class sparse_hashtable {
|
|
|
435
463
|
assert(num_deleted == 0);
|
|
436
464
|
}
|
|
437
465
|
|
|
466
|
+
bool test_deleted_key(const key_type& key) const {
|
|
467
|
+
// The num_deleted test is crucial for read(): after read(), the ht values
|
|
468
|
+
// are garbage, and we don't want to think some of them are deleted.
|
|
469
|
+
// Invariant: !use_deleted implies num_deleted is 0.
|
|
470
|
+
assert(settings.use_deleted() || num_deleted == 0);
|
|
471
|
+
return num_deleted > 0 && equals(key_info.delkey, key);
|
|
472
|
+
}
|
|
473
|
+
|
|
438
474
|
public:
|
|
439
475
|
void set_deleted_key(const key_type &key) {
|
|
440
476
|
// It's only safe to change what "deleted" means if we purge deleted guys
|
|
441
477
|
squash_deleted();
|
|
442
|
-
|
|
443
|
-
delkey = key;
|
|
478
|
+
settings.set_use_deleted(true);
|
|
479
|
+
key_info.delkey = key;
|
|
444
480
|
}
|
|
445
481
|
void clear_deleted_key() {
|
|
446
482
|
squash_deleted();
|
|
447
|
-
|
|
483
|
+
settings.set_use_deleted(false);
|
|
484
|
+
}
|
|
485
|
+
key_type deleted_key() const {
|
|
486
|
+
assert(settings.use_deleted()
|
|
487
|
+
&& "Must set deleted key before calling deleted_key");
|
|
488
|
+
return key_info.delkey;
|
|
448
489
|
}
|
|
449
490
|
|
|
450
491
|
// These are public so the iterators can use them
|
|
451
492
|
// True if the item at position bucknum is "deleted" marker
|
|
452
493
|
bool test_deleted(size_type bucknum) const {
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
return (use_deleted && num_deleted > 0 && table.test(bucknum) &&
|
|
456
|
-
equals(delkey, get_key(table.unsafe_get(bucknum))));
|
|
494
|
+
if (num_deleted == 0 || !table.test(bucknum)) return false;
|
|
495
|
+
return test_deleted_key(get_key(table.unsafe_get(bucknum)));
|
|
457
496
|
}
|
|
458
497
|
bool test_deleted(const iterator &it) const {
|
|
459
|
-
|
|
460
|
-
|
|
498
|
+
if (!settings.use_deleted()) return false;
|
|
499
|
+
return test_deleted_key(get_key(*it));
|
|
461
500
|
}
|
|
462
501
|
bool test_deleted(const const_iterator &it) const {
|
|
463
|
-
|
|
464
|
-
|
|
502
|
+
if (!settings.use_deleted()) return false;
|
|
503
|
+
return test_deleted_key(get_key(*it));
|
|
465
504
|
}
|
|
466
505
|
bool test_deleted(const destructive_iterator &it) const {
|
|
467
|
-
|
|
468
|
-
|
|
506
|
+
if (!settings.use_deleted()) return false;
|
|
507
|
+
return test_deleted_key(get_key(*it));
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
private:
|
|
511
|
+
// Set it so test_deleted is true. true if object didn't used to be deleted.
|
|
512
|
+
// TODO(csilvers): make these private (also in densehashtable.h)
|
|
513
|
+
bool set_deleted(iterator &it) {
|
|
514
|
+
assert(settings.use_deleted());
|
|
515
|
+
bool retval = !test_deleted(it);
|
|
516
|
+
// &* converts from iterator to value-type.
|
|
517
|
+
set_key(&(*it), key_info.delkey);
|
|
518
|
+
return retval;
|
|
469
519
|
}
|
|
470
|
-
// Set it so test_deleted is
|
|
471
|
-
|
|
520
|
+
// Set it so test_deleted is false. true if object used to be deleted.
|
|
521
|
+
bool clear_deleted(iterator &it) {
|
|
522
|
+
assert(settings.use_deleted());
|
|
523
|
+
// Happens automatically when we assign something else in its place.
|
|
524
|
+
return test_deleted(it);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// We also allow to set/clear the deleted bit on a const iterator.
|
|
528
|
+
// We allow a const_iterator for the same reason you can delete a
|
|
529
|
+
// const pointer: it's convenient, and semantically you can't use
|
|
530
|
+
// 'it' after it's been deleted anyway, so its const-ness doesn't
|
|
531
|
+
// really matter.
|
|
472
532
|
bool set_deleted(const_iterator &it) {
|
|
473
|
-
assert(use_deleted);
|
|
533
|
+
assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called
|
|
474
534
|
bool retval = !test_deleted(it);
|
|
475
|
-
|
|
476
|
-
set_key(const_cast<value_type*>(&(*it)), delkey);
|
|
535
|
+
set_key(const_cast<pointer>(&(*it)), key_info.delkey);
|
|
477
536
|
return retval;
|
|
478
537
|
}
|
|
479
|
-
// Set it so test_deleted is false. true if object used to be deleted
|
|
538
|
+
// Set it so test_deleted is false. true if object used to be deleted.
|
|
480
539
|
bool clear_deleted(const_iterator &it) {
|
|
481
|
-
assert(use_deleted);
|
|
482
|
-
// happens automatically when we assign something else in its place
|
|
540
|
+
assert(settings.use_deleted()); // bad if set_deleted_key() wasn't called
|
|
483
541
|
return test_deleted(it);
|
|
484
542
|
}
|
|
485
543
|
|
|
486
|
-
|
|
487
544
|
// FUNCTIONS CONCERNING SIZE
|
|
545
|
+
public:
|
|
488
546
|
size_type size() const { return table.num_nonempty() - num_deleted; }
|
|
489
|
-
|
|
490
|
-
size_type max_size() const { return (size_type(-1) >> 1U) + 1; }
|
|
547
|
+
size_type max_size() const { return table.max_size(); }
|
|
491
548
|
bool empty() const { return size() == 0; }
|
|
492
549
|
size_type bucket_count() const { return table.size(); }
|
|
493
550
|
size_type max_bucket_count() const { return max_size(); }
|
|
@@ -497,54 +554,57 @@ class sparse_hashtable {
|
|
|
497
554
|
return begin(i) == end(i) ? 0 : 1;
|
|
498
555
|
}
|
|
499
556
|
|
|
500
|
-
|
|
501
557
|
private:
|
|
502
558
|
// Because of the above, size_type(-1) is never legal; use it for errors
|
|
503
559
|
static const size_type ILLEGAL_BUCKET = size_type(-1);
|
|
504
560
|
|
|
505
|
-
|
|
506
|
-
//
|
|
507
|
-
//
|
|
508
|
-
|
|
509
|
-
size_type sz = HT_MIN_BUCKETS;
|
|
510
|
-
while ( sz < min_buckets_wanted || num_elts >= sz * enlarge_resize_percent )
|
|
511
|
-
sz *= 2;
|
|
512
|
-
return sz;
|
|
513
|
-
}
|
|
514
|
-
|
|
515
|
-
// Used after a string of deletes
|
|
516
|
-
void maybe_shrink() {
|
|
561
|
+
// Used after a string of deletes. Returns true if we actually shrunk.
|
|
562
|
+
// TODO(csilvers): take a delta so we can take into account inserts
|
|
563
|
+
// done after shrinking. Maybe make part of the Settings class?
|
|
564
|
+
bool maybe_shrink() {
|
|
517
565
|
assert(table.num_nonempty() >= num_deleted);
|
|
518
566
|
assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
|
|
519
567
|
assert(bucket_count() >= HT_MIN_BUCKETS);
|
|
568
|
+
bool retval = false;
|
|
520
569
|
|
|
521
570
|
// If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
|
|
522
571
|
// we'll never shrink until you get relatively big, and we'll never
|
|
523
572
|
// shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
|
|
524
573
|
// like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
|
|
525
574
|
// shrink us down to HT_MIN_BUCKETS buckets, which is too small.
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
575
|
+
const size_type num_remain = table.num_nonempty() - num_deleted;
|
|
576
|
+
const size_type shrink_threshold = settings.shrink_threshold();
|
|
577
|
+
if (shrink_threshold > 0 && num_remain < shrink_threshold &&
|
|
578
|
+
bucket_count() > HT_DEFAULT_STARTING_BUCKETS) {
|
|
579
|
+
const float shrink_factor = settings.shrink_factor();
|
|
529
580
|
size_type sz = bucket_count() / 2; // find how much we should shrink
|
|
530
|
-
while (
|
|
531
|
-
|
|
532
|
-
shrink_resize_percent )
|
|
581
|
+
while (sz > HT_DEFAULT_STARTING_BUCKETS &&
|
|
582
|
+
num_remain < static_cast<size_type>(sz * shrink_factor)) {
|
|
533
583
|
sz /= 2; // stay a power of 2
|
|
584
|
+
}
|
|
534
585
|
sparse_hashtable tmp(MoveDontCopy, *this, sz);
|
|
535
586
|
swap(tmp); // now we are tmp
|
|
587
|
+
retval = true;
|
|
536
588
|
}
|
|
537
|
-
|
|
589
|
+
settings.set_consider_shrink(false); // because we just considered it
|
|
590
|
+
return retval;
|
|
538
591
|
}
|
|
539
592
|
|
|
540
593
|
// We'll let you resize a hashtable -- though this makes us copy all!
|
|
541
594
|
// When you resize, you say, "make it big enough for this many more elements"
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
595
|
+
// Returns true if we actually resized, false if size was already ok.
|
|
596
|
+
bool resize_delta(size_type delta) {
|
|
597
|
+
bool did_resize = false;
|
|
598
|
+
if ( settings.consider_shrink() ) { // see if lots of deletes happened
|
|
599
|
+
if ( maybe_shrink() )
|
|
600
|
+
did_resize = true;
|
|
601
|
+
}
|
|
602
|
+
if (table.num_nonempty() >=
|
|
603
|
+
(STL_NAMESPACE::numeric_limits<size_type>::max)() - delta)
|
|
604
|
+
throw std::length_error("resize overflow");
|
|
545
605
|
if ( bucket_count() >= HT_MIN_BUCKETS &&
|
|
546
|
-
(table.num_nonempty() + delta) <= enlarge_threshold )
|
|
547
|
-
return;
|
|
606
|
+
(table.num_nonempty() + delta) <= settings.enlarge_threshold() )
|
|
607
|
+
return did_resize; // we're ok as we are
|
|
548
608
|
|
|
549
609
|
// Sometimes, we need to resize just to get rid of all the
|
|
550
610
|
// "deleted" buckets that are clogging up the hashtable. So when
|
|
@@ -552,13 +612,34 @@ class sparse_hashtable {
|
|
|
552
612
|
// are currently taking up room). But later, when we decide what
|
|
553
613
|
// size to resize to, *don't* count deleted buckets, since they
|
|
554
614
|
// get discarded during the resize.
|
|
555
|
-
const size_type needed_size =
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
615
|
+
const size_type needed_size =
|
|
616
|
+
settings.min_buckets(table.num_nonempty() + delta, 0);
|
|
617
|
+
if ( needed_size <= bucket_count() ) // we have enough buckets
|
|
618
|
+
return did_resize;
|
|
619
|
+
|
|
620
|
+
size_type resize_to =
|
|
621
|
+
settings.min_buckets(table.num_nonempty() - num_deleted + delta,
|
|
622
|
+
bucket_count());
|
|
623
|
+
if (resize_to < needed_size && // may double resize_to
|
|
624
|
+
resize_to < (STL_NAMESPACE::numeric_limits<size_type>::max)() / 2) {
|
|
625
|
+
// This situation means that we have enough deleted elements,
|
|
626
|
+
// that once we purge them, we won't actually have needed to
|
|
627
|
+
// grow. But we may want to grow anyway: if we just purge one
|
|
628
|
+
// element, say, we'll have to grow anyway next time we
|
|
629
|
+
// insert. Might as well grow now, since we're already going
|
|
630
|
+
// through the trouble of copying (in order to purge the
|
|
631
|
+
// deleted elements).
|
|
632
|
+
const size_type target =
|
|
633
|
+
static_cast<size_type>(settings.shrink_size(resize_to*2));
|
|
634
|
+
if (table.num_nonempty() - num_deleted + delta >= target) {
|
|
635
|
+
// Good, we won't be below the shrink threshhold even if we double.
|
|
636
|
+
resize_to *= 2;
|
|
637
|
+
}
|
|
561
638
|
}
|
|
639
|
+
|
|
640
|
+
sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
|
|
641
|
+
swap(tmp); // now we are tmp
|
|
642
|
+
return true;
|
|
562
643
|
}
|
|
563
644
|
|
|
564
645
|
// Used to actually do the rehashing when we grow/shrink a hashtable
|
|
@@ -566,16 +647,17 @@ class sparse_hashtable {
|
|
|
566
647
|
clear(); // clear table, set num_deleted to 0
|
|
567
648
|
|
|
568
649
|
// If we need to change the size of our table, do it now
|
|
569
|
-
const size_type resize_to =
|
|
650
|
+
const size_type resize_to =
|
|
651
|
+
settings.min_buckets(ht.size(), min_buckets_wanted);
|
|
570
652
|
if ( resize_to > bucket_count() ) { // we don't have enough buckets
|
|
571
653
|
table.resize(resize_to); // sets the number of buckets
|
|
572
|
-
reset_thresholds();
|
|
654
|
+
settings.reset_thresholds(bucket_count());
|
|
573
655
|
}
|
|
574
656
|
|
|
575
657
|
// We use a normal iterator to get non-deleted bcks from ht
|
|
576
658
|
// We could use insert() here, but since we know there are
|
|
577
659
|
// no duplicates and no deleted items, we can be more efficient
|
|
578
|
-
assert(
|
|
660
|
+
assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
|
|
579
661
|
for ( const_iterator it = ht.begin(); it != ht.end(); ++it ) {
|
|
580
662
|
size_type num_probes = 0; // how many times we've probed
|
|
581
663
|
size_type bucknum;
|
|
@@ -584,10 +666,12 @@ class sparse_hashtable {
|
|
|
584
666
|
table.test(bucknum); // not empty
|
|
585
667
|
bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one) {
|
|
586
668
|
++num_probes;
|
|
587
|
-
assert(num_probes < bucket_count()
|
|
669
|
+
assert(num_probes < bucket_count()
|
|
670
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
|
588
671
|
}
|
|
589
672
|
table.set(bucknum, *it); // copies the value to here
|
|
590
673
|
}
|
|
674
|
+
settings.inc_num_ht_copies();
|
|
591
675
|
}
|
|
592
676
|
|
|
593
677
|
// Implementation is like copy_from, but it destroys the table of the
|
|
@@ -598,14 +682,14 @@ class sparse_hashtable {
|
|
|
598
682
|
clear(); // clear table, set num_deleted to 0
|
|
599
683
|
|
|
600
684
|
// If we need to change the size of our table, do it now
|
|
601
|
-
|
|
685
|
+
size_type resize_to;
|
|
602
686
|
if ( mover == MoveDontGrow )
|
|
603
687
|
resize_to = ht.bucket_count(); // keep same size as old ht
|
|
604
688
|
else // MoveDontCopy
|
|
605
|
-
resize_to =
|
|
689
|
+
resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
|
|
606
690
|
if ( resize_to > bucket_count() ) { // we don't have enough buckets
|
|
607
691
|
table.resize(resize_to); // sets the number of buckets
|
|
608
|
-
reset_thresholds();
|
|
692
|
+
settings.reset_thresholds(bucket_count());
|
|
609
693
|
}
|
|
610
694
|
|
|
611
695
|
// We use a normal iterator to get non-deleted bcks from ht
|
|
@@ -621,10 +705,12 @@ class sparse_hashtable {
|
|
|
621
705
|
table.test(bucknum); // not empty
|
|
622
706
|
bucknum = (bucknum + JUMP_(key, num_probes)) & (bucket_count()-1) ) {
|
|
623
707
|
++num_probes;
|
|
624
|
-
assert(num_probes < bucket_count()
|
|
708
|
+
assert(num_probes < bucket_count()
|
|
709
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
|
625
710
|
}
|
|
626
711
|
table.set(bucknum, *it); // copies the value to here
|
|
627
712
|
}
|
|
713
|
+
settings.inc_num_ht_copies();
|
|
628
714
|
}
|
|
629
715
|
|
|
630
716
|
|
|
@@ -634,28 +720,23 @@ class sparse_hashtable {
|
|
|
634
720
|
// more useful as num_elements. As a special feature, calling with
|
|
635
721
|
// req_elements==0 will cause us to shrink if we can, saving space.
|
|
636
722
|
void resize(size_type req_elements) { // resize to this or larger
|
|
637
|
-
if ( consider_shrink || req_elements == 0 )
|
|
723
|
+
if ( settings.consider_shrink() || req_elements == 0 )
|
|
638
724
|
maybe_shrink();
|
|
639
725
|
if ( req_elements > table.num_nonempty() ) // we only grow
|
|
640
726
|
resize_delta(req_elements - table.num_nonempty());
|
|
641
727
|
}
|
|
642
728
|
|
|
643
|
-
// Get and change the value of
|
|
644
|
-
//
|
|
645
|
-
//
|
|
646
|
-
//
|
|
729
|
+
// Get and change the value of shrink_factor and enlarge_factor. The
|
|
730
|
+
// description at the beginning of this file explains how to choose
|
|
731
|
+
// the values. Setting the shrink parameter to 0.0 ensures that the
|
|
732
|
+
// table never shrinks.
|
|
647
733
|
void get_resizing_parameters(float* shrink, float* grow) const {
|
|
648
|
-
*shrink =
|
|
649
|
-
*grow =
|
|
734
|
+
*shrink = settings.shrink_factor();
|
|
735
|
+
*grow = settings.enlarge_factor();
|
|
650
736
|
}
|
|
651
737
|
void set_resizing_parameters(float shrink, float grow) {
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
if (shrink > grow/2.0f)
|
|
655
|
-
shrink = grow / 2.0f; // otherwise we thrash hashtable size
|
|
656
|
-
shrink_resize_percent = shrink;
|
|
657
|
-
enlarge_resize_percent = grow;
|
|
658
|
-
reset_thresholds();
|
|
738
|
+
settings.set_resizing_parameters(shrink, grow);
|
|
739
|
+
settings.reset_thresholds(bucket_count());
|
|
659
740
|
}
|
|
660
741
|
|
|
661
742
|
// CONSTRUCTORS -- as required by the specs, we take a size,
|
|
@@ -665,15 +746,17 @@ class sparse_hashtable {
|
|
|
665
746
|
explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
|
|
666
747
|
const HashFcn& hf = HashFcn(),
|
|
667
748
|
const EqualKey& eql = EqualKey(),
|
|
749
|
+
const ExtractKey& ext = ExtractKey(),
|
|
668
750
|
const SetKey& set = SetKey(),
|
|
669
|
-
const
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
751
|
+
const Alloc& alloc = Alloc())
|
|
752
|
+
: settings(hf),
|
|
753
|
+
key_info(ext, set, eql),
|
|
754
|
+
num_deleted(0),
|
|
755
|
+
table((expected_max_items_in_table == 0
|
|
756
|
+
? HT_DEFAULT_STARTING_BUCKETS
|
|
757
|
+
: settings.min_buckets(expected_max_items_in_table, 0)),
|
|
758
|
+
alloc) {
|
|
759
|
+
settings.reset_thresholds(bucket_count());
|
|
677
760
|
}
|
|
678
761
|
|
|
679
762
|
// As a convenience for resize(), we allow an optional second argument
|
|
@@ -682,63 +765,51 @@ class sparse_hashtable {
|
|
|
682
765
|
// into us instead of copying.
|
|
683
766
|
sparse_hashtable(const sparse_hashtable& ht,
|
|
684
767
|
size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
table() {
|
|
691
|
-
reset_thresholds();
|
|
768
|
+
: settings(ht.settings),
|
|
769
|
+
key_info(ht.key_info),
|
|
770
|
+
num_deleted(0),
|
|
771
|
+
table(0, ht.get_allocator()) {
|
|
772
|
+
settings.reset_thresholds(bucket_count());
|
|
692
773
|
copy_from(ht, min_buckets_wanted); // copy_from() ignores deleted entries
|
|
693
774
|
}
|
|
694
775
|
sparse_hashtable(MoveDontCopyT mover, sparse_hashtable& ht,
|
|
695
776
|
size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
reset_thresholds();
|
|
777
|
+
: settings(ht.settings),
|
|
778
|
+
key_info(ht.key_info),
|
|
779
|
+
num_deleted(0),
|
|
780
|
+
table(0, ht.get_allocator()) {
|
|
781
|
+
settings.reset_thresholds(bucket_count());
|
|
702
782
|
move_from(mover, ht, min_buckets_wanted); // ignores deleted entries
|
|
703
783
|
}
|
|
704
784
|
|
|
705
785
|
sparse_hashtable& operator= (const sparse_hashtable& ht) {
|
|
706
786
|
if (&ht == this) return *this; // don't copy onto ourselves
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
delkey = ht.delkey;
|
|
714
|
-
copy_from(ht, HT_MIN_BUCKETS); // sets num_deleted to 0 too
|
|
787
|
+
settings = ht.settings;
|
|
788
|
+
key_info = ht.key_info;
|
|
789
|
+
num_deleted = ht.num_deleted;
|
|
790
|
+
// copy_from() calls clear and sets num_deleted to 0 too
|
|
791
|
+
copy_from(ht, HT_MIN_BUCKETS);
|
|
792
|
+
// we purposefully don't copy the allocator, which may not be copyable
|
|
715
793
|
return *this;
|
|
716
794
|
}
|
|
717
795
|
|
|
718
796
|
// Many STL algorithms use swap instead of copy constructors
|
|
719
797
|
void swap(sparse_hashtable& ht) {
|
|
720
|
-
STL_NAMESPACE::swap(
|
|
721
|
-
STL_NAMESPACE::swap(
|
|
722
|
-
STL_NAMESPACE::swap(get_key, ht.get_key);
|
|
723
|
-
STL_NAMESPACE::swap(set_key, ht.set_key);
|
|
798
|
+
STL_NAMESPACE::swap(settings, ht.settings);
|
|
799
|
+
STL_NAMESPACE::swap(key_info, ht.key_info);
|
|
724
800
|
STL_NAMESPACE::swap(num_deleted, ht.num_deleted);
|
|
725
|
-
STL_NAMESPACE::swap(use_deleted, ht.use_deleted);
|
|
726
|
-
STL_NAMESPACE::swap(enlarge_resize_percent, ht.enlarge_resize_percent);
|
|
727
|
-
STL_NAMESPACE::swap(shrink_resize_percent, ht.shrink_resize_percent);
|
|
728
|
-
STL_NAMESPACE::swap(delkey, ht.delkey);
|
|
729
801
|
table.swap(ht.table);
|
|
730
|
-
reset_thresholds();
|
|
731
|
-
ht.reset_thresholds();
|
|
732
802
|
}
|
|
733
803
|
|
|
734
804
|
// It's always nice to be able to clear a table without deallocating it
|
|
735
805
|
void clear() {
|
|
736
|
-
|
|
737
|
-
|
|
806
|
+
if (!empty() || (num_deleted != 0)) {
|
|
807
|
+
table.clear();
|
|
808
|
+
}
|
|
809
|
+
settings.reset_thresholds(bucket_count());
|
|
738
810
|
num_deleted = 0;
|
|
739
811
|
}
|
|
740
812
|
|
|
741
|
-
|
|
742
813
|
// LOOKUP ROUTINES
|
|
743
814
|
private:
|
|
744
815
|
// Returns a pair of positions: 1st where the object is, 2nd where
|
|
@@ -770,7 +841,8 @@ class sparse_hashtable {
|
|
|
770
841
|
}
|
|
771
842
|
++num_probes; // we're doing another probe
|
|
772
843
|
bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
|
|
773
|
-
assert(num_probes < bucket_count()
|
|
844
|
+
assert(num_probes < bucket_count()
|
|
845
|
+
&& "Hashtable is full: an error in key_equal<> or hash<>");
|
|
774
846
|
}
|
|
775
847
|
}
|
|
776
848
|
|
|
@@ -830,32 +902,58 @@ class sparse_hashtable {
|
|
|
830
902
|
|
|
831
903
|
// INSERTION ROUTINES
|
|
832
904
|
private:
|
|
905
|
+
// Private method used by insert_noresize and find_or_insert.
|
|
906
|
+
iterator insert_at(const_reference obj, size_type pos) {
|
|
907
|
+
if (size() >= max_size())
|
|
908
|
+
throw std::length_error("insert overflow");
|
|
909
|
+
if ( test_deleted(pos) ) { // just replace if it's been deleted
|
|
910
|
+
// The set() below will undelete this object. We just worry about stats
|
|
911
|
+
assert(num_deleted > 0);
|
|
912
|
+
--num_deleted; // used to be, now it isn't
|
|
913
|
+
}
|
|
914
|
+
table.set(pos, obj);
|
|
915
|
+
return iterator(this, table.get_iter(pos), table.nonempty_end());
|
|
916
|
+
}
|
|
917
|
+
|
|
833
918
|
// If you know *this is big enough to hold obj, use this routine
|
|
834
|
-
pair<iterator, bool> insert_noresize(
|
|
919
|
+
pair<iterator, bool> insert_noresize(const_reference obj) {
|
|
835
920
|
// First, double-check we're not inserting delkey
|
|
836
|
-
assert(!use_deleted || !equals(get_key(obj), delkey))
|
|
921
|
+
assert((!settings.use_deleted() || !equals(get_key(obj), key_info.delkey))
|
|
922
|
+
&& "Inserting the deleted key");
|
|
837
923
|
const pair<size_type,size_type> pos = find_position(get_key(obj));
|
|
838
924
|
if ( pos.first != ILLEGAL_BUCKET) { // object was already there
|
|
839
925
|
return pair<iterator,bool>(iterator(this, table.get_iter(pos.first),
|
|
840
926
|
table.nonempty_end()),
|
|
841
927
|
false); // false: we didn't insert
|
|
842
928
|
} else { // pos.second says where to put it
|
|
843
|
-
|
|
844
|
-
// The set() below will undelete this object. We just worry about stats
|
|
845
|
-
assert(num_deleted > 0);
|
|
846
|
-
--num_deleted; // used to be, now it isn't
|
|
847
|
-
}
|
|
848
|
-
table.set(pos.second, obj);
|
|
849
|
-
return pair<iterator,bool>(iterator(this, table.get_iter(pos.second),
|
|
850
|
-
table.nonempty_end()),
|
|
851
|
-
true); // true: we did insert
|
|
929
|
+
return pair<iterator,bool>(insert_at(obj, pos.second), true);
|
|
852
930
|
}
|
|
853
931
|
}
|
|
854
932
|
|
|
933
|
+
// Specializations of insert(it, it) depending on the power of the iterator:
|
|
934
|
+
// (1) Iterator supports operator-, resize before inserting
|
|
935
|
+
template <class ForwardIterator>
|
|
936
|
+
void insert(ForwardIterator f, ForwardIterator l, STL_NAMESPACE::forward_iterator_tag) {
|
|
937
|
+
size_t dist = STL_NAMESPACE::distance(f, l);
|
|
938
|
+
if (dist >= (std::numeric_limits<size_type>::max)())
|
|
939
|
+
throw std::length_error("insert-range overflow");
|
|
940
|
+
resize_delta(static_cast<size_type>(dist));
|
|
941
|
+
for ( ; dist > 0; --dist, ++f) {
|
|
942
|
+
insert_noresize(*f);
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
// (2) Arbitrary iterator, can't tell how much to resize
|
|
947
|
+
template <class InputIterator>
|
|
948
|
+
void insert(InputIterator f, InputIterator l, STL_NAMESPACE::input_iterator_tag) {
|
|
949
|
+
for ( ; f != l; ++f)
|
|
950
|
+
insert(*f);
|
|
951
|
+
}
|
|
952
|
+
|
|
855
953
|
public:
|
|
856
954
|
// This is the normal insert routine, used by the outside world
|
|
857
|
-
pair<iterator, bool> insert(
|
|
858
|
-
resize_delta(1);
|
|
955
|
+
pair<iterator, bool> insert(const_reference obj) {
|
|
956
|
+
resize_delta(1); // adding an object, grow if need be
|
|
859
957
|
return insert_noresize(obj);
|
|
860
958
|
}
|
|
861
959
|
|
|
@@ -866,66 +964,102 @@ class sparse_hashtable {
|
|
|
866
964
|
insert(f, l, typename STL_NAMESPACE::iterator_traits<InputIterator>::iterator_category());
|
|
867
965
|
}
|
|
868
966
|
|
|
869
|
-
//
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
967
|
+
// This is public only because sparse_hash_map::operator[] uses it.
|
|
968
|
+
// It does the minimal amount of work to implement operator[].
|
|
969
|
+
template <class DataType>
|
|
970
|
+
DataType& find_or_insert(const key_type& key) {
|
|
971
|
+
// First, double-check we're not inserting delkey
|
|
972
|
+
assert((!settings.use_deleted() || !equals(key, key_info.delkey))
|
|
973
|
+
&& "Inserting the deleted key");
|
|
974
|
+
const pair<size_type,size_type> pos = find_position(key);
|
|
975
|
+
if ( pos.first != ILLEGAL_BUCKET) { // object was already there
|
|
976
|
+
return table.get_iter(pos.first)->second;
|
|
977
|
+
} else if (resize_delta(1)) { // needed to rehash to make room
|
|
978
|
+
// Since we resized, we can't use pos, so recalculate where to insert.
|
|
979
|
+
return insert_noresize(value_type(key, DataType())).first->second;
|
|
980
|
+
} else { // no need to rehash, insert right here
|
|
981
|
+
return insert_at(value_type(key, DataType()), pos.second)->second;
|
|
982
|
+
}
|
|
885
983
|
}
|
|
886
984
|
|
|
887
|
-
|
|
888
985
|
// DELETION ROUTINES
|
|
889
986
|
size_type erase(const key_type& key) {
|
|
890
|
-
// First, double-check we're not erasing delkey
|
|
891
|
-
assert(!use_deleted || !equals(key, delkey))
|
|
987
|
+
// First, double-check we're not erasing delkey.
|
|
988
|
+
assert((!settings.use_deleted() || !equals(key, key_info.delkey))
|
|
989
|
+
&& "Erasing the deleted key");
|
|
990
|
+
assert(!settings.use_deleted() || !equals(key, key_info.delkey));
|
|
892
991
|
const_iterator pos = find(key); // shrug: shouldn't need to be const
|
|
893
992
|
if ( pos != end() ) {
|
|
894
993
|
assert(!test_deleted(pos)); // or find() shouldn't have returned it
|
|
895
994
|
set_deleted(pos);
|
|
896
995
|
++num_deleted;
|
|
897
|
-
|
|
996
|
+
// will think about shrink after next insert
|
|
997
|
+
settings.set_consider_shrink(true);
|
|
898
998
|
return 1; // because we deleted one thing
|
|
899
999
|
} else {
|
|
900
1000
|
return 0; // because we deleted nothing
|
|
901
1001
|
}
|
|
902
1002
|
}
|
|
903
1003
|
|
|
904
|
-
//
|
|
905
|
-
|
|
906
|
-
// Since that's a moot issue for deleted keys, we allow const_iterators
|
|
907
|
-
void erase(const_iterator pos) {
|
|
1004
|
+
// We return the iterator past the deleted item.
|
|
1005
|
+
void erase(iterator pos) {
|
|
908
1006
|
if ( pos == end() ) return; // sanity check
|
|
909
1007
|
if ( set_deleted(pos) ) { // true if object has been newly deleted
|
|
910
1008
|
++num_deleted;
|
|
911
|
-
|
|
1009
|
+
// will think about shrink after next insert
|
|
1010
|
+
settings.set_consider_shrink(true);
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
void erase(iterator f, iterator l) {
|
|
1015
|
+
for ( ; f != l; ++f) {
|
|
1016
|
+
if ( set_deleted(f) ) // should always be true
|
|
1017
|
+
++num_deleted;
|
|
912
1018
|
}
|
|
1019
|
+
// will think about shrink after next insert
|
|
1020
|
+
settings.set_consider_shrink(true);
|
|
913
1021
|
}
|
|
914
1022
|
|
|
1023
|
+
// We allow you to erase a const_iterator just like we allow you to
|
|
1024
|
+
// erase an iterator. This is in parallel to 'delete': you can delete
|
|
1025
|
+
// a const pointer just like a non-const pointer. The logic is that
|
|
1026
|
+
// you can't use the object after it's erased anyway, so it doesn't matter
|
|
1027
|
+
// if it's const or not.
|
|
1028
|
+
void erase(const_iterator pos) {
|
|
1029
|
+
if ( pos == end() ) return; // sanity check
|
|
1030
|
+
if ( set_deleted(pos) ) { // true if object has been newly deleted
|
|
1031
|
+
++num_deleted;
|
|
1032
|
+
// will think about shrink after next insert
|
|
1033
|
+
settings.set_consider_shrink(true);
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
915
1036
|
void erase(const_iterator f, const_iterator l) {
|
|
916
1037
|
for ( ; f != l; ++f) {
|
|
917
1038
|
if ( set_deleted(f) ) // should always be true
|
|
918
1039
|
++num_deleted;
|
|
919
1040
|
}
|
|
920
|
-
|
|
1041
|
+
// will think about shrink after next insert
|
|
1042
|
+
settings.set_consider_shrink(true);
|
|
921
1043
|
}
|
|
922
1044
|
|
|
923
1045
|
|
|
924
1046
|
// COMPARISON
|
|
925
1047
|
bool operator==(const sparse_hashtable& ht) const {
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
1048
|
+
if (size() != ht.size()) {
|
|
1049
|
+
return false;
|
|
1050
|
+
} else if (this == &ht) {
|
|
1051
|
+
return true;
|
|
1052
|
+
} else {
|
|
1053
|
+
// Iterate through the elements in "this" and see if the
|
|
1054
|
+
// corresponding element is in ht
|
|
1055
|
+
for ( const_iterator it = begin(); it != end(); ++it ) {
|
|
1056
|
+
const_iterator it2 = ht.find(get_key(*it));
|
|
1057
|
+
if ((it2 == ht.end()) || (*it != *it2)) {
|
|
1058
|
+
return false;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
return true;
|
|
1062
|
+
}
|
|
929
1063
|
}
|
|
930
1064
|
bool operator!=(const sparse_hashtable& ht) const {
|
|
931
1065
|
return !(*this == ht);
|
|
@@ -946,7 +1080,7 @@ class sparse_hashtable {
|
|
|
946
1080
|
bool read_metadata(FILE *fp) {
|
|
947
1081
|
num_deleted = 0; // since we got rid before writing
|
|
948
1082
|
bool result = table.read_metadata(fp);
|
|
949
|
-
reset_thresholds();
|
|
1083
|
+
settings.reset_thresholds(bucket_count());
|
|
950
1084
|
return result;
|
|
951
1085
|
}
|
|
952
1086
|
|
|
@@ -961,31 +1095,67 @@ class sparse_hashtable {
|
|
|
961
1095
|
}
|
|
962
1096
|
|
|
963
1097
|
private:
|
|
964
|
-
//
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
1098
|
+
// Table is the main storage class.
|
|
1099
|
+
typedef sparsetable<value_type, DEFAULT_GROUP_SIZE, value_alloc_type> Table;
|
|
1100
|
+
|
|
1101
|
+
// Package templated functors with the other types to eliminate memory
|
|
1102
|
+
// needed for storing these zero-size operators. Since ExtractKey and
|
|
1103
|
+
// hasher's operator() might have the same function signature, they
|
|
1104
|
+
// must be packaged in different classes.
|
|
1105
|
+
struct Settings :
|
|
1106
|
+
sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS> {
|
|
1107
|
+
explicit Settings(const hasher& hf)
|
|
1108
|
+
: sh_hashtable_settings<key_type, hasher, size_type, HT_MIN_BUCKETS>(
|
|
1109
|
+
hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
|
|
1110
|
+
};
|
|
1111
|
+
|
|
1112
|
+
// KeyInfo stores delete key and packages zero-size functors:
|
|
1113
|
+
// ExtractKey and SetKey.
|
|
1114
|
+
class KeyInfo : public ExtractKey, public SetKey, public key_equal {
|
|
1115
|
+
public:
|
|
1116
|
+
KeyInfo(const ExtractKey& ek, const SetKey& sk, const key_equal& eq)
|
|
1117
|
+
: ExtractKey(ek),
|
|
1118
|
+
SetKey(sk),
|
|
1119
|
+
key_equal(eq) {
|
|
1120
|
+
}
|
|
1121
|
+
const key_type get_key(const_reference v) const {
|
|
1122
|
+
return ExtractKey::operator()(v);
|
|
1123
|
+
}
|
|
1124
|
+
void set_key(pointer v, const key_type& k) const {
|
|
1125
|
+
SetKey::operator()(v, k);
|
|
1126
|
+
}
|
|
1127
|
+
bool equals(const key_type& a, const key_type& b) const {
|
|
1128
|
+
return key_equal::operator()(a, b);
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
// Which key marks deleted entries.
|
|
1132
|
+
// TODO(csilvers): make a pointer, and get rid of use_deleted (benchmark!)
|
|
1133
|
+
typename remove_const<key_type>::type delkey;
|
|
1134
|
+
};
|
|
1135
|
+
|
|
1136
|
+
// Utility functions to access the templated operators
|
|
1137
|
+
size_type hash(const key_type& v) const {
|
|
1138
|
+
return settings.hash(v);
|
|
986
1139
|
}
|
|
1140
|
+
bool equals(const key_type& a, const key_type& b) const {
|
|
1141
|
+
return key_info.equals(a, b);
|
|
1142
|
+
}
|
|
1143
|
+
const key_type get_key(const_reference v) const {
|
|
1144
|
+
return key_info.get_key(v);
|
|
1145
|
+
}
|
|
1146
|
+
void set_key(pointer v, const key_type& k) const {
|
|
1147
|
+
key_info.set_key(v, k);
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
private:
|
|
1151
|
+
// Actual data
|
|
1152
|
+
Settings settings;
|
|
1153
|
+
KeyInfo key_info;
|
|
1154
|
+
size_type num_deleted; // how many occupied buckets are marked deleted
|
|
1155
|
+
Table table; // holds num_buckets and num_elements too
|
|
987
1156
|
};
|
|
988
1157
|
|
|
1158
|
+
|
|
989
1159
|
// We need a global swap as well
|
|
990
1160
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
991
1161
|
inline void swap(sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
|
|
@@ -1002,13 +1172,14 @@ const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
|
|
|
1002
1172
|
// How full we let the table get before we resize. Knuth says .8 is
|
|
1003
1173
|
// good -- higher causes us to probe too much, though saves memory
|
|
1004
1174
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
1005
|
-
const
|
|
1175
|
+
const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 80;
|
|
1006
1176
|
|
|
1007
1177
|
// How empty we let the table get before we resize lower.
|
|
1008
|
-
// It should be less than
|
|
1178
|
+
// It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
|
|
1009
1179
|
template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
|
|
1010
|
-
const
|
|
1011
|
-
|
|
1180
|
+
const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
|
|
1181
|
+
= static_cast<int>(0.4 *
|
|
1182
|
+
sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
|
|
1012
1183
|
|
|
1013
1184
|
_END_GOOGLE_NAMESPACE_
|
|
1014
1185
|
|