sparsam 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ #ifndef __SERIALIZER_H__
2
+ #include <ruby.h>
3
+ #include <ruby/intern.h>
4
+ #ifndef NUM2SHORT
5
+ #define NUM2SHORT NUM2INT
6
+ #endif
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ enum Proto {
12
+ compact = 0,
13
+ binary = 1,
14
+ };
15
+
16
+ enum TOType {
17
+ t_union = 0,
18
+ t_struct = 1,
19
+ };
20
+
21
+ enum ValidateStrictness { normal = 0, strict = 1, recursive = 2 };
22
+
23
+ void serializer_free(void *data);
24
+ void *serializer_create();
25
+ void serializer_init(void *serializer, int protocol, void *str_arg1,
26
+ uint32_t len);
27
+
28
+ VALUE serializer_readStruct(VALUE self, VALUE klass);
29
+ VALUE serializer_writeStruct(VALUE self, VALUE klass, VALUE data);
30
+
31
+ VALUE cache_fields(VALUE self, VALUE klass);
32
+
33
+ VALUE serializer_validate(VALUE self, VALUE klass, VALUE data,
34
+ VALUE strictness);
35
+
36
+ void initialize_constants();
37
+ void initialize_runtime_constants();
38
+
39
+ #ifdef __cplusplus
40
+ } // end extern "C"
41
+
42
+ #include <boost/shared_ptr.hpp>
43
+ #include <map>
44
+ #include <string>
45
+ #include <thrift/protocol/TProtocol.h>
46
+ #include <thrift/transport/TBufferTransports.h>
47
+ #include <unordered_set>
48
+ #include "third-party/sparsepp/sparsepp/spp.h"
49
+
50
+ using ::apache::thrift::protocol::TType;
51
+
52
+ typedef uint16_t FieldIdIndex;
53
+ typedef uint16_t KlassIndex;
54
+
55
+ typedef int16_t FieldID;
56
+
57
+ typedef struct FieldBegin {
58
+ TType ftype;
59
+ FieldID fid;
60
+ } FieldBegin;
61
+
62
+ typedef struct FieldInfo {
63
+ TType ftype;
64
+ VALUE klass; // set if TTYPE is struct or union
65
+ ID ivarName; // set if field is on struct
66
+ VALUE symName; // set if field is on struct/union
67
+ bool isOptional;
68
+ bool isBinaryString;
69
+ FieldInfo *elementType; // element of list or set, or map
70
+ FieldInfo *keyType; // type of key in maps
71
+ } FieldInfo;
72
+
73
+ typedef std::map<FieldID, FieldInfo *> FieldInfoMap;
74
+ typedef spp::sparse_hash_map<VALUE, FieldInfoMap *> KlassFieldsCache;
75
+
76
+ class ThriftSerializer {
77
+ public:
78
+ ThriftSerializer(){};
79
+ boost::shared_ptr< ::apache::thrift::protocol::TProtocol > tprot;
80
+ boost::shared_ptr< ::apache::thrift::transport::TMemoryBuffer > tmb;
81
+
82
+ VALUE readStruct(VALUE klass);
83
+ void writeStruct(VALUE klass, VALUE data);
84
+
85
+ private:
86
+ VALUE readUnion(VALUE klass);
87
+ VALUE readAny(TType ttype, FieldInfo *field_info);
88
+ void writeAny(TType ttype, FieldInfo *field_info, VALUE data);
89
+ void skip_n_type(uint32_t n, TType ttype);
90
+ void skip_n_pair(uint32_t n, TType type_a, TType type_b);
91
+ };
92
+
93
+ bool validateStruct(VALUE klass, VALUE data, bool validateContainerTypes,
94
+ bool recursive);
95
+ bool validateAny(FieldInfo *type, VALUE val, bool recursive);
96
+ FieldInfoMap *FindOrCreateFieldInfoMap(VALUE klass);
97
+ FieldInfo *CreateFieldInfo(VALUE field_map_entry);
98
+ FieldInfoMap *CreateFieldInfoMap(VALUE klass);
99
+
100
+ #endif
101
+ #endif
@@ -0,0 +1,4347 @@
1
+ #if !defined(sparsepp_h_guard_)
2
+ #define sparsepp_h_guard_
3
+
4
+
5
+ // ----------------------------------------------------------------------
6
+ // Copyright (c) 2016, Gregory Popovitch - greg7mdp@gmail.com
7
+ // All rights reserved.
8
+ //
9
+ // This work is derived from Google's sparsehash library
10
+ //
11
+ // Copyright (c) 2005, Google Inc.
12
+ // All rights reserved.
13
+ //
14
+ // Redistribution and use in source and binary forms, with or without
15
+ // modification, are permitted provided that the following conditions are
16
+ // met:
17
+ //
18
+ // * Redistributions of source code must retain the above copyright
19
+ // notice, this list of conditions and the following disclaimer.
20
+ // * Redistributions in binary form must reproduce the above
21
+ // copyright notice, this list of conditions and the following disclaimer
22
+ // in the documentation and/or other materials provided with the
23
+ // distribution.
24
+ // * Neither the name of Google Inc. nor the names of its
25
+ // contributors may be used to endorse or promote products derived from
26
+ // this software without specific prior written permission.
27
+ //
28
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
+ // ----------------------------------------------------------------------
40
+
41
+
42
+ // some macros for portability
43
+ // ---------------------------
44
+ // includes
45
+ // --------
46
+ #include <cassert>
47
+ #include <cstring>
48
+ #include <string>
49
+ #include <limits> // for numeric_limits
50
+ #include <algorithm> // For swap(), eg
51
+ #include <iterator> // for iterator tags
52
+ #include <functional> // for equal_to<>, select1st<>, std::unary_function, etc
53
+ #include <memory> // for alloc, uninitialized_copy, uninitialized_fill
54
+ #include <cstdlib> // for malloc/realloc/free
55
+ #include <cstddef> // for ptrdiff_t
56
+ #include <new> // for placement new
57
+ #include <stdexcept> // For length_error
58
+ #include <utility> // for pair<>
59
+ #include <cstdio>
60
+ #include <iosfwd>
61
+ #include <ios>
62
+
63
+ #include <sparsepp/spp_stdint.h> // includes spp_config.h
64
+ #include <sparsepp/spp_traits.h>
65
+ #include <sparsepp/spp_utils.h>
66
+
67
+ #ifdef SPP_INCLUDE_SPP_ALLOC
68
+ #include <sparsepp/spp_dlalloc.h>
69
+ #endif
70
+
71
+ #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
72
+ #include <initializer_list>
73
+ #endif
74
+
75
+ #if (SPP_GROUP_SIZE == 32)
76
+ #define SPP_SHIFT_ 5
77
+ #define SPP_MASK_ 0x1F
78
+ typedef uint32_t group_bm_type;
79
+ #elif (SPP_GROUP_SIZE == 64)
80
+ #define SPP_SHIFT_ 6
81
+ #define SPP_MASK_ 0x3F
82
+ typedef uint64_t group_bm_type;
83
+ #else
84
+ #error "SPP_GROUP_SIZE must be either 32 or 64"
85
+ #endif
86
+
87
+ namespace spp_ {
88
+
89
+ // ----------------------------------------------------------------------
90
+ // U T I L F U N C T I O N S
91
+ // ----------------------------------------------------------------------
92
+ template <class E>
93
+ inline void throw_exception(const E& exception)
94
+ {
95
+ #if !defined(SPP_NO_EXCEPTIONS)
96
+ throw exception;
97
+ #else
98
+ assert(0);
99
+ abort();
100
+ #endif
101
+ }
102
+
103
+ // ----------------------------------------------------------------------
104
+ // M U T A B L E P A I R H A C K
105
+ // turn std::pair<const K, V> into mutable std::pair<K, V>
106
+ // ----------------------------------------------------------------------
107
+ template <class T>
108
+ struct cvt
109
+ {
110
+ typedef T type;
111
+ };
112
+
113
+ template <class K, class V>
114
+ struct cvt<std::pair<const K, V> >
115
+ {
116
+ typedef std::pair<K, V> type;
117
+ };
118
+
119
+ template <class K, class V>
120
+ struct cvt<const std::pair<const K, V> >
121
+ {
122
+ typedef const std::pair<K, V> type;
123
+ };
124
+
125
+ // ----------------------------------------------------------------------
126
+ // M O V E I T E R A T O R
127
+ // ----------------------------------------------------------------------
128
+ #ifdef SPP_NO_CXX11_RVALUE_REFERENCES
129
+ #define MK_MOVE_IT(p) (p)
130
+ #else
131
+ #define MK_MOVE_IT(p) std::make_move_iterator(p)
132
+ #endif
133
+
134
+
135
+ // ----------------------------------------------------------------------
136
+ // I N T E R N A L S T U F F
137
+ // ----------------------------------------------------------------------
138
+ #ifdef SPP_NO_CXX11_STATIC_ASSERT
139
+ template <bool> struct SppCompileAssert { };
140
+ #define SPP_COMPILE_ASSERT(expr, msg) \
141
+ SPP_ATTRIBUTE_UNUSED typedef SppCompileAssert<(bool(expr))> spp_bogus_[bool(expr) ? 1 : -1]
142
+ #else
143
+ #define SPP_COMPILE_ASSERT static_assert
144
+ #endif
145
+
146
+ namespace sparsehash_internal
147
+ {
148
+
149
+ // Adaptor methods for reading/writing data from an INPUT or OUPTUT
150
+ // variable passed to serialize() or unserialize(). For now we
151
+ // have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note
152
+ // they are pointers, unlike typical use), or else a pointer to
153
+ // something that supports a Read()/Write() method.
154
+ //
155
+ // For technical reasons, we implement read_data/write_data in two
156
+ // stages. The actual work is done in *_data_internal, which takes
157
+ // the stream argument twice: once as a template type, and once with
158
+ // normal type information. (We only use the second version.) We do
159
+ // this because of how C++ picks what function overload to use. If we
160
+ // implemented this the naive way:
161
+ // bool read_data(istream* is, const void* data, size_t length);
162
+ // template<typename T> read_data(T* fp, const void* data, size_t length);
163
+ // C++ would prefer the second version for every stream type except
164
+ // istream. However, we want C++ to prefer the first version for
165
+ // streams that are *subclasses* of istream, such as istringstream.
166
+ // This is not possible given the way template types are resolved. So
167
+ // we split the stream argument in two, one of which is templated and
168
+ // one of which is not. The specialized functions (like the istream
169
+ // version above) ignore the template arg and use the second, 'type'
170
+ // arg, getting subclass matching as normal. The 'catch-all'
171
+ // functions (the second version above) use the template arg to deduce
172
+ // the type, and use a second, void* arg to achieve the desired
173
+ // 'catch-all' semantics.
174
+
175
+ // ----- low-level I/O for FILE* ----
176
+
177
+ template<typename Ignored>
178
+ inline bool read_data_internal(Ignored* /*unused*/, FILE* fp,
179
+ void* data, size_t length)
180
+ {
181
+ return fread(data, length, 1, fp) == 1;
182
+ }
183
+
184
+ template<typename Ignored>
185
+ inline bool write_data_internal(Ignored* /*unused*/, FILE* fp,
186
+ const void* data, size_t length)
187
+ {
188
+ return fwrite(data, length, 1, fp) == 1;
189
+ }
190
+
191
+ // ----- low-level I/O for iostream ----
192
+
193
+ // We want the caller to be responsible for #including <iostream>, not
194
+ // us, because iostream is a big header! According to the standard,
195
+ // it's only legal to delay the instantiation the way we want to if
196
+ // the istream/ostream is a template type. So we jump through hoops.
197
+ template<typename ISTREAM>
198
+ inline bool read_data_internal_for_istream(ISTREAM* fp,
199
+ void* data, size_t length)
200
+ {
201
+ return fp->read(reinterpret_cast<char*>(data),
202
+ static_cast<std::streamsize>(length)).good();
203
+ }
204
+ template<typename Ignored>
205
+ inline bool read_data_internal(Ignored* /*unused*/, std::istream* fp,
206
+ void* data, size_t length)
207
+ {
208
+ return read_data_internal_for_istream(fp, data, length);
209
+ }
210
+
211
+ template<typename OSTREAM>
212
+ inline bool write_data_internal_for_ostream(OSTREAM* fp,
213
+ const void* data, size_t length)
214
+ {
215
+ return fp->write(reinterpret_cast<const char*>(data),
216
+ static_cast<std::streamsize>(length)).good();
217
+ }
218
+ template<typename Ignored>
219
+ inline bool write_data_internal(Ignored* /*unused*/, std::ostream* fp,
220
+ const void* data, size_t length)
221
+ {
222
+ return write_data_internal_for_ostream(fp, data, length);
223
+ }
224
+
225
+ // ----- low-level I/O for custom streams ----
226
+
227
+ // The INPUT type needs to support a Read() method that takes a
228
+ // buffer and a length and returns the number of bytes read.
229
+ template <typename INPUT>
230
+ inline bool read_data_internal(INPUT* fp, void* /*unused*/,
231
+ void* data, size_t length)
232
+ {
233
+ return static_cast<size_t>(fp->Read(data, length)) == length;
234
+ }
235
+
236
+ // The OUTPUT type needs to support a Write() operation that takes
237
+ // a buffer and a length and returns the number of bytes written.
238
+ template <typename OUTPUT>
239
+ inline bool write_data_internal(OUTPUT* fp, void* /*unused*/,
240
+ const void* data, size_t length)
241
+ {
242
+ return static_cast<size_t>(fp->Write(data, length)) == length;
243
+ }
244
+
245
+ // ----- low-level I/O: the public API ----
246
+
247
+ template <typename INPUT>
248
+ inline bool read_data(INPUT* fp, void* data, size_t length)
249
+ {
250
+ return read_data_internal(fp, fp, data, length);
251
+ }
252
+
253
+ template <typename OUTPUT>
254
+ inline bool write_data(OUTPUT* fp, const void* data, size_t length)
255
+ {
256
+ return write_data_internal(fp, fp, data, length);
257
+ }
258
+
259
+ // Uses read_data() and write_data() to read/write an integer.
260
+ // length is the number of bytes to read/write (which may differ
261
+ // from sizeof(IntType), allowing us to save on a 32-bit system
262
+ // and load on a 64-bit system). Excess bytes are taken to be 0.
263
+ // INPUT and OUTPUT must match legal inputs to read/write_data (above).
264
+ // --------------------------------------------------------------------
265
+ template <typename INPUT, typename IntType>
266
+ bool read_bigendian_number(INPUT* fp, IntType* value, size_t length)
267
+ {
268
+ *value = 0;
269
+ unsigned char byte;
270
+ // We require IntType to be unsigned or else the shifting gets all screwy.
271
+ SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
272
+ for (size_t i = 0; i < length; ++i)
273
+ {
274
+ if (!read_data(fp, &byte, sizeof(byte)))
275
+ return false;
276
+ *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
277
+ }
278
+ return true;
279
+ }
280
+
281
+ template <typename OUTPUT, typename IntType>
282
+ bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length)
283
+ {
284
+ unsigned char byte;
285
+ // We require IntType to be unsigned or else the shifting gets all screwy.
286
+ SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
287
+ for (size_t i = 0; i < length; ++i)
288
+ {
289
+ byte = (sizeof(value) <= length-1 - i)
290
+ ? static_cast<unsigned char>(0) : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
291
+ if (!write_data(fp, &byte, sizeof(byte))) return false;
292
+ }
293
+ return true;
294
+ }
295
+
296
+ // If your keys and values are simple enough, you can pass this
297
+ // serializer to serialize()/unserialize(). "Simple enough" means
298
+ // value_type is a POD type that contains no pointers. Note,
299
+ // however, we don't try to normalize endianness.
300
+ // This is the type used for NopointerSerializer.
301
+ // ---------------------------------------------------------------
302
+ template <typename value_type> struct pod_serializer
303
+ {
304
+ template <typename INPUT>
305
+ bool operator()(INPUT* fp, value_type* value) const
306
+ {
307
+ return read_data(fp, value, sizeof(*value));
308
+ }
309
+
310
+ template <typename OUTPUT>
311
+ bool operator()(OUTPUT* fp, const value_type& value) const
312
+ {
313
+ return write_data(fp, &value, sizeof(value));
314
+ }
315
+ };
316
+
317
+
318
+ // Settings contains parameters for growing and shrinking the table.
319
+ // It also packages zero-size functor (ie. hasher).
320
+ //
321
+ // It does some munging of the hash value for the cases where
322
+ // the original hash function is not be very good.
323
+ // ---------------------------------------------------------------
324
+ template<typename Key, typename HashFunc, typename SizeType, int HT_MIN_BUCKETS>
325
+ class sh_hashtable_settings : public HashFunc
326
+ {
327
+ private:
328
+ #ifndef SPP_MIX_HASH
329
+ template <class T, int sz> struct Mixer
330
+ {
331
+ inline T operator()(T h) const { return h; }
332
+ };
333
+ #else
334
+ template <class T, int sz> struct Mixer
335
+ {
336
+ inline T operator()(T h) const;
337
+ };
338
+
339
+ template <class T> struct Mixer<T, 4>
340
+ {
341
+ inline T operator()(T h) const
342
+ {
343
+ // from Thomas Wang - https://gist.github.com/badboy/6267743
344
+ // ---------------------------------------------------------
345
+ h = (h ^ 61) ^ (h >> 16);
346
+ h = h + (h << 3);
347
+ h = h ^ (h >> 4);
348
+ h = h * 0x27d4eb2d;
349
+ h = h ^ (h >> 15);
350
+ return h;
351
+ }
352
+ };
353
+
354
+ template <class T> struct Mixer<T, 8>
355
+ {
356
+ inline T operator()(T h) const
357
+ {
358
+ // from Thomas Wang - https://gist.github.com/badboy/6267743
359
+ // ---------------------------------------------------------
360
+ h = (~h) + (h << 21); // h = (h << 21) - h - 1;
361
+ h = h ^ (h >> 24);
362
+ h = (h + (h << 3)) + (h << 8); // h * 265
363
+ h = h ^ (h >> 14);
364
+ h = (h + (h << 2)) + (h << 4); // h * 21
365
+ h = h ^ (h >> 28);
366
+ h = h + (h << 31);
367
+ return h;
368
+ }
369
+ };
370
+ #endif
371
+
372
+ public:
373
+ typedef Key key_type;
374
+ typedef HashFunc hasher;
375
+ typedef SizeType size_type;
376
+
377
+ public:
378
+ sh_hashtable_settings(const hasher& hf,
379
+ const float ht_occupancy_flt,
380
+ const float ht_empty_flt)
381
+ : hasher(hf),
382
+ enlarge_threshold_(0),
383
+ shrink_threshold_(0),
384
+ consider_shrink_(false),
385
+ num_ht_copies_(0)
386
+ {
387
+ set_enlarge_factor(ht_occupancy_flt);
388
+ set_shrink_factor(ht_empty_flt);
389
+ }
390
+
391
+ size_t hash(const key_type& v) const
392
+ {
393
+ size_t h = hasher::operator()(v);
394
+ Mixer<size_t, sizeof(size_t)> mixer;
395
+
396
+ return mixer(h);
397
+ }
398
+
399
+ float enlarge_factor() const { return enlarge_factor_; }
400
+ void set_enlarge_factor(float f) { enlarge_factor_ = f; }
401
+ float shrink_factor() const { return shrink_factor_; }
402
+ void set_shrink_factor(float f) { shrink_factor_ = f; }
403
+
404
+ size_type enlarge_threshold() const { return enlarge_threshold_; }
405
+ void set_enlarge_threshold(size_type t) { enlarge_threshold_ = t; }
406
+ size_type shrink_threshold() const { return shrink_threshold_; }
407
+ void set_shrink_threshold(size_type t) { shrink_threshold_ = t; }
408
+
409
+ size_type enlarge_size(size_type x) const { return static_cast<size_type>(x * enlarge_factor_); }
410
+ size_type shrink_size(size_type x) const { return static_cast<size_type>(x * shrink_factor_); }
411
+
412
+ bool consider_shrink() const { return consider_shrink_; }
413
+ void set_consider_shrink(bool t) { consider_shrink_ = t; }
414
+
415
+ unsigned int num_ht_copies() const { return num_ht_copies_; }
416
+ void inc_num_ht_copies() { ++num_ht_copies_; }
417
+
418
+ // Reset the enlarge and shrink thresholds
419
+ void reset_thresholds(size_type num_buckets)
420
+ {
421
+ set_enlarge_threshold(enlarge_size(num_buckets));
422
+ set_shrink_threshold(shrink_size(num_buckets));
423
+ // whatever caused us to reset already considered
424
+ set_consider_shrink(false);
425
+ }
426
+
427
+ // Caller is resposible for calling reset_threshold right after
428
+ // set_resizing_parameters.
429
+ // ------------------------------------------------------------
430
+ void set_resizing_parameters(float shrink, float grow)
431
+ {
432
+ assert(shrink >= 0);
433
+ assert(grow <= 1);
434
+ if (shrink > grow/2.0f)
435
+ shrink = grow / 2.0f; // otherwise we thrash hashtable size
436
+ set_shrink_factor(shrink);
437
+ set_enlarge_factor(grow);
438
+ }
439
+
440
+ // This is the smallest size a hashtable can be without being too crowded
441
+ // If you like, you can give a min #buckets as well as a min #elts
442
+ // ----------------------------------------------------------------------
443
+ size_type min_buckets(size_type num_elts, size_type min_buckets_wanted)
444
+ {
445
+ float enlarge = enlarge_factor();
446
+ size_type sz = HT_MIN_BUCKETS; // min buckets allowed
447
+ while (sz < min_buckets_wanted ||
448
+ num_elts >= static_cast<size_type>(sz * enlarge))
449
+ {
450
+ // This just prevents overflowing size_type, since sz can exceed
451
+ // max_size() here.
452
+ // -------------------------------------------------------------
453
+ if (static_cast<size_type>(sz * 2) < sz)
454
+ throw_exception(std::length_error("resize overflow")); // protect against overflow
455
+ sz *= 2;
456
+ }
457
+ return sz;
458
+ }
459
+
460
+ private:
461
+ size_type enlarge_threshold_; // table.size() * enlarge_factor
462
+ size_type shrink_threshold_; // table.size() * shrink_factor
463
+ float enlarge_factor_; // how full before resize
464
+ float shrink_factor_; // how empty before resize
465
+ bool consider_shrink_; // if we should try to shrink before next insert
466
+
467
+ unsigned int num_ht_copies_; // num_ht_copies is a counter incremented every Copy/Move
468
+ };
469
+
470
+ } // namespace sparsehash_internal
471
+
472
+ #undef SPP_COMPILE_ASSERT
473
+
474
+ // ----------------------------------------------------------------------
475
+ // S P A R S E T A B L E
476
+ // ----------------------------------------------------------------------
477
+ //
478
+ // A sparsetable is a random container that implements a sparse array,
479
+ // that is, an array that uses very little memory to store unassigned
480
+ // indices (in this case, between 1-2 bits per unassigned index). For
481
+ // instance, if you allocate an array of size 5 and assign a[2] = <big
482
+ // struct>, then a[2] will take up a lot of memory but a[0], a[1],
483
+ // a[3], and a[4] will not. Array elements that have a value are
484
+ // called "assigned". Array elements that have no value yet, or have
485
+ // had their value cleared using erase() or clear(), are called
486
+ // "unassigned".
487
+ //
488
+ // Unassigned values seem to have the default value of T (see below).
489
+ // Nevertheless, there is a difference between an unassigned index and
490
+ // one explicitly assigned the value of T(). The latter is considered
491
+ // assigned.
492
+ //
493
+ // Access to an array element is constant time, as is insertion and
494
+ // deletion. Insertion and deletion may be fairly slow, however:
495
+ // because of this container's memory economy, each insert and delete
496
+ // causes a memory reallocation.
497
+ //
498
+ // NOTE: You should not test(), get(), or set() any index that is
499
+ // greater than sparsetable.size(). If you need to do that, call
500
+ // resize() first.
501
+ //
502
+ // --- Template parameters
503
+ // PARAMETER DESCRIPTION DEFAULT
504
+ // T The value of the array: the type of --
505
+ // object that is stored in the array.
506
+ //
507
+ // Alloc: Allocator to use to allocate memory.
508
+ //
509
+ // --- Model of
510
+ // Random Access Container
511
+ //
512
+ // --- Type requirements
513
+ // T must be Copy Constructible. It need not be Assignable.
514
+ //
515
+ // --- Public base classes
516
+ // None.
517
+ //
518
+ // --- Members
519
+ //
520
+ // [*] All iterators are const in a sparsetable (though nonempty_iterators
521
+ // may not be). Use get() and set() to assign values, not iterators.
522
+ //
523
+ // [+] iterators are random-access iterators. nonempty_iterators are
524
+ // bidirectional iterators.
525
+
526
+ // [*] If you shrink a sparsetable using resize(), assigned elements
527
+ // past the end of the table are removed using erase(). If you grow
528
+ // a sparsetable, new unassigned indices are created.
529
+ //
530
+ // [+] Note that operator[] returns a const reference. You must use
531
+ // set() to change the value of a table element.
532
+ //
533
+ // [!] Unassignment also calls the destructor.
534
+ //
535
+ // Iterators are invalidated whenever an item is inserted or
536
+ // deleted (ie set() or erase() is used) or when the size of
537
+ // the table changes (ie resize() or clear() is used).
538
+
539
+
540
+
541
+ // ---------------------------------------------------------------------------
542
+ // Our iterator as simple as iterators can be: basically it's just
543
+ // the index into our table. Dereference, the only complicated
544
+ // thing, we punt to the table class. This just goes to show how
545
+ // much machinery STL requires to do even the most trivial tasks.
546
+ //
547
+ // A NOTE ON ASSIGNING:
548
+ // A sparse table does not actually allocate memory for entries
549
+ // that are not filled. Because of this, it becomes complicated
550
+ // to have a non-const iterator: we don't know, if the iterator points
551
+ // to a not-filled bucket, whether you plan to fill it with something
552
+ // or whether you plan to read its value (in which case you'll get
553
+ // the default bucket value). Therefore, while we can define const
554
+ // operations in a pretty 'normal' way, for non-const operations, we
555
+ // define something that returns a helper object with operator= and
556
+ // operator& that allocate a bucket lazily. We use this for table[]
557
+ // and also for regular table iterators.
558
+
559
+ // ---------------------------------------------------------------------------
560
+ // ---------------------------------------------------------------------------
561
+ // Our iterator as simple as iterators can be: basically it's just
562
+ // the index into our table. Dereference, the only complicated
563
+ // thing, we punt to the table class. This just goes to show how
564
+ // much machinery STL requires to do even the most trivial tasks.
565
+ //
566
+ // By templatizing over tabletype, we have one iterator type which
567
+ // we can use for both sparsetables and sparsebins. In fact it
568
+ // works on any class that allows size() and operator[] (eg vector),
569
+ // as long as it does the standard STL typedefs too (eg value_type).
570
+
571
+ // ---------------------------------------------------------------------------
572
+ // ---------------------------------------------------------------------------
573
+ template <class tabletype>
574
+ class table_iterator
575
+ {
576
+ public:
577
+ typedef table_iterator iterator;
578
+
579
+ typedef std::random_access_iterator_tag iterator_category;
580
+ typedef typename tabletype::value_type value_type;
581
+ typedef typename tabletype::difference_type difference_type;
582
+ typedef typename tabletype::size_type size_type;
583
+
584
+ explicit table_iterator(tabletype *tbl = 0, size_type p = 0) :
585
+ table(tbl), pos(p)
586
+ { }
587
+
588
+ // Helper function to assert things are ok; eg pos is still in range
589
+ void check() const
590
+ {
591
+ assert(table);
592
+ assert(pos <= table->size());
593
+ }
594
+
595
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
596
+ // do bounds checking, since STL doesn't consider that its job. :-)
597
+ iterator& operator+=(size_type t) { pos += t; check(); return *this; }
598
+ iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
599
+ iterator& operator++() { ++pos; check(); return *this; }
600
+ iterator& operator--() { --pos; check(); return *this; }
601
+ iterator operator++(int)
602
+ {
603
+ iterator tmp(*this); // for x++
604
+ ++pos; check(); return tmp;
605
+ }
606
+
607
+ iterator operator--(int)
608
+ {
609
+ iterator tmp(*this); // for x--
610
+ --pos; check(); return tmp;
611
+ }
612
+
613
+ iterator operator+(difference_type i) const
614
+ {
615
+ iterator tmp(*this);
616
+ tmp += i; return tmp;
617
+ }
618
+
619
+ iterator operator-(difference_type i) const
620
+ {
621
+ iterator tmp(*this);
622
+ tmp -= i; return tmp;
623
+ }
624
+
625
+ difference_type operator-(iterator it) const
626
+ {
627
+ // for "x = it2 - it"
628
+ assert(table == it.table);
629
+ return pos - it.pos;
630
+ }
631
+
632
+ // Comparisons.
633
+ bool operator==(const iterator& it) const
634
+ {
635
+ return table == it.table && pos == it.pos;
636
+ }
637
+
638
+ bool operator<(const iterator& it) const
639
+ {
640
+ assert(table == it.table); // life is bad bad bad otherwise
641
+ return pos < it.pos;
642
+ }
643
+
644
+ bool operator!=(const iterator& it) const { return !(*this == it); }
645
+ bool operator<=(const iterator& it) const { return !(it < *this); }
646
+ bool operator>(const iterator& it) const { return it < *this; }
647
+ bool operator>=(const iterator& it) const { return !(*this < it); }
648
+
649
+ // Here's the info we actually need to be an iterator
650
+ tabletype *table; // so we can dereference and bounds-check
651
+ size_type pos; // index into the table
652
+ };
653
+
654
+ // ---------------------------------------------------------------------------
655
+ // ---------------------------------------------------------------------------
656
+ template <class tabletype>
657
+ class const_table_iterator
658
+ {
659
+ public:
660
+ typedef table_iterator<tabletype> iterator;
661
+ typedef const_table_iterator const_iterator;
662
+
663
+ typedef std::random_access_iterator_tag iterator_category;
664
+ typedef typename tabletype::value_type value_type;
665
+ typedef typename tabletype::difference_type difference_type;
666
+ typedef typename tabletype::size_type size_type;
667
+ typedef typename tabletype::const_reference reference; // we're const-only
668
+ typedef typename tabletype::const_pointer pointer;
669
+
670
+ // The "real" constructor
671
+ const_table_iterator(const tabletype *tbl, size_type p)
672
+ : table(tbl), pos(p) { }
673
+
674
+ // The default constructor, used when I define vars of type table::iterator
675
+ const_table_iterator() : table(NULL), pos(0) { }
676
+
677
+ // The copy constructor, for when I say table::iterator foo = tbl.begin()
678
+ // Also converts normal iterators to const iterators // not explicit on purpose
679
+ const_table_iterator(const iterator &from)
680
+ : table(from.table), pos(from.pos) { }
681
+
682
+ // The default destructor is fine; we don't define one
683
+ // The default operator= is fine; we don't define one
684
+
685
+ // The main thing our iterator does is dereference. If the table entry
686
+ // we point to is empty, we return the default value type.
687
+ reference operator*() const { return (*table)[pos]; }
688
+ pointer operator->() const { return &(operator*()); }
689
+
690
+ // Helper function to assert things are ok; eg pos is still in range
691
+ void check() const
692
+ {
693
+ assert(table);
694
+ assert(pos <= table->size());
695
+ }
696
+
697
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
698
+ // do bounds checking, since STL doesn't consider that its job. :-)
699
+ const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
700
+ const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
701
+ const_iterator& operator++() { ++pos; check(); return *this; }
702
+ const_iterator& operator--() { --pos; check(); return *this; }
703
+ const_iterator operator++(int)
704
+ {
705
+ const_iterator tmp(*this); // for x++
706
+ ++pos; check();
707
+ return tmp;
708
+ }
709
+ const_iterator operator--(int)
710
+ {
711
+ const_iterator tmp(*this); // for x--
712
+ --pos; check();
713
+ return tmp;
714
+ }
715
+ const_iterator operator+(difference_type i) const
716
+ {
717
+ const_iterator tmp(*this);
718
+ tmp += i;
719
+ return tmp;
720
+ }
721
+ const_iterator operator-(difference_type i) const
722
+ {
723
+ const_iterator tmp(*this);
724
+ tmp -= i;
725
+ return tmp;
726
+ }
727
+ difference_type operator-(const_iterator it) const
728
+ {
729
+ // for "x = it2 - it"
730
+ assert(table == it.table);
731
+ return pos - it.pos;
732
+ }
733
+ reference operator[](difference_type n) const
734
+ {
735
+ return *(*this + n); // simple though not totally efficient
736
+ }
737
+
738
+ // Comparisons.
739
+ bool operator==(const const_iterator& it) const
740
+ {
741
+ return table == it.table && pos == it.pos;
742
+ }
743
+
744
+ bool operator<(const const_iterator& it) const
745
+ {
746
+ assert(table == it.table); // life is bad bad bad otherwise
747
+ return pos < it.pos;
748
+ }
749
+ bool operator!=(const const_iterator& it) const { return !(*this == it); }
750
+ bool operator<=(const const_iterator& it) const { return !(it < *this); }
751
+ bool operator>(const const_iterator& it) const { return it < *this; }
752
+ bool operator>=(const const_iterator& it) const { return !(*this < it); }
753
+
754
+ // Here's the info we actually need to be an iterator
755
+ const tabletype *table; // so we can dereference and bounds-check
756
+ size_type pos; // index into the table
757
+ };
758
+
759
+ // ---------------------------------------------------------------------------
760
+ // This is a 2-D iterator. You specify a begin and end over a list
761
+ // of *containers*. We iterate over each container by iterating over
762
+ // it. It's actually simple:
763
+ // VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---,
764
+ // | ________________________________________________/
765
+ // | \_> VECTOR[1].begin() --------> VECTOR[1].end() -,
766
+ // | ___________________________________________________/
767
+ // v \_> ......
768
+ // VECTOR.end()
769
+ //
770
+ // It's impossible to do random access on one of these things in constant
771
+ // time, so it's just a bidirectional iterator.
772
+ //
773
+ // Unfortunately, because we need to use this for a non-empty iterator,
774
+ // we use ne_begin() and ne_end() instead of begin() and end()
775
+ // (though only going across, not down).
776
+ // ---------------------------------------------------------------------------
777
+
778
+ // ---------------------------------------------------------------------------
779
+ // ---------------------------------------------------------------------------
780
+ template <class T, class row_it, class col_it, class iter_type>
781
+ class Two_d_iterator : public std::iterator<iter_type, T>
782
+ {
783
+ public:
784
+ typedef Two_d_iterator iterator;
785
+ typedef T value_type;
786
+
787
+ explicit Two_d_iterator(row_it curr) : row_current(curr), col_current(0)
788
+ {
789
+ if (row_current && !row_current->is_marked())
790
+ {
791
+ col_current = row_current->ne_begin();
792
+ advance_past_end(); // in case cur->begin() == cur->end()
793
+ }
794
+ }
795
+
796
+ explicit Two_d_iterator(row_it curr, col_it col) : row_current(curr), col_current(col)
797
+ {
798
+ assert(col);
799
+ }
800
+
801
+ // The default constructor
802
+ Two_d_iterator() : row_current(0), col_current(0) { }
803
+
804
+ // Need this explicitly so we can convert normal iterators <=> const iterators
805
+ // not explicit on purpose
806
+ // ---------------------------------------------------------------------------
807
+ template <class T2, class row_it2, class col_it2, class iter_type2>
808
+ Two_d_iterator(const Two_d_iterator<T2, row_it2, col_it2, iter_type2>& it) :
809
+ row_current (*(row_it *)&it.row_current),
810
+ col_current (*(col_it *)&it.col_current)
811
+ { }
812
+
813
+ // The default destructor is fine; we don't define one
814
+ // The default operator= is fine; we don't define one
815
+
816
+ value_type& operator*() const { return *(col_current); }
817
+ value_type* operator->() const { return &(operator*()); }
818
+
819
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
820
+ // do bounds checking, since STL doesn't consider that its job. :-)
821
+ // NOTE: this is not amortized constant time! What do we do about it?
822
+ // ------------------------------------------------------------------
823
+ void advance_past_end()
824
+ {
825
+ // used when col_current points to end()
826
+ while (col_current == row_current->ne_end())
827
+ {
828
+ // end of current row
829
+ // ------------------
830
+ ++row_current; // go to beginning of next
831
+ if (!row_current->is_marked()) // col is irrelevant at end
832
+ col_current = row_current->ne_begin();
833
+ else
834
+ break; // don't go past row_end
835
+ }
836
+ }
837
+
838
+ friend size_t operator-(iterator l, iterator f)
839
+ {
840
+ if (f.row_current->is_marked())
841
+ return 0;
842
+
843
+ size_t diff(0);
844
+ while (f != l)
845
+ {
846
+ ++diff;
847
+ ++f;
848
+ }
849
+ return diff;
850
+ }
851
+
852
+ iterator& operator++()
853
+ {
854
+ // assert(!row_current->is_marked()); // how to ++ from there?
855
+ ++col_current;
856
+ advance_past_end(); // in case col_current is at end()
857
+ return *this;
858
+ }
859
+
860
+ iterator& operator--()
861
+ {
862
+ while (row_current->is_marked() ||
863
+ col_current == row_current->ne_begin())
864
+ {
865
+ --row_current;
866
+ col_current = row_current->ne_end(); // this is 1 too far
867
+ }
868
+ --col_current;
869
+ return *this;
870
+ }
871
+ iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
872
+ iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
873
+
874
+
875
+ // Comparisons.
876
+ bool operator==(const iterator& it) const
877
+ {
878
+ return (row_current == it.row_current &&
879
+ (!row_current || row_current->is_marked() || col_current == it.col_current));
880
+ }
881
+
882
+ bool operator!=(const iterator& it) const { return !(*this == it); }
883
+
884
+ // Here's the info we actually need to be an iterator
885
+ // These need to be public so we convert from iterator to const_iterator
886
+ // ---------------------------------------------------------------------
887
+ row_it row_current;
888
+ col_it col_current;
889
+ };
890
+
891
+
892
+ // ---------------------------------------------------------------------------
893
+ // ---------------------------------------------------------------------------
894
+ template <class T, class row_it, class col_it, class iter_type, class Alloc>
895
+ class Two_d_destructive_iterator : public Two_d_iterator<T, row_it, col_it, iter_type>
896
+ {
897
+ public:
898
+ typedef Two_d_destructive_iterator iterator;
899
+
900
+ Two_d_destructive_iterator(Alloc &alloc, row_it curr) :
901
+ _alloc(alloc)
902
+ {
903
+ this->row_current = curr;
904
+ this->col_current = 0;
905
+ if (this->row_current && !this->row_current->is_marked())
906
+ {
907
+ this->col_current = this->row_current->ne_begin();
908
+ advance_past_end(); // in case cur->begin() == cur->end()
909
+ }
910
+ }
911
+
912
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
913
+ // do bounds checking, since STL doesn't consider that its job. :-)
914
+ // NOTE: this is not amortized constant time! What do we do about it?
915
+ // ------------------------------------------------------------------
916
+ void advance_past_end()
917
+ {
918
+ // used when col_current points to end()
919
+ while (this->col_current == this->row_current->ne_end())
920
+ {
921
+ this->row_current->clear(_alloc, true); // This is what differs from non-destructive iterators above
922
+
923
+ // end of current row
924
+ // ------------------
925
+ ++this->row_current; // go to beginning of next
926
+ if (!this->row_current->is_marked()) // col is irrelevant at end
927
+ this->col_current = this->row_current->ne_begin();
928
+ else
929
+ break; // don't go past row_end
930
+ }
931
+ }
932
+
933
+ iterator& operator++()
934
+ {
935
+ // assert(!this->row_current->is_marked()); // how to ++ from there?
936
+ ++this->col_current;
937
+ advance_past_end(); // in case col_current is at end()
938
+ return *this;
939
+ }
940
+
941
+ private:
942
+ Two_d_destructive_iterator& operator=(const Two_d_destructive_iterator &o);
943
+
944
+ Alloc &_alloc;
945
+ };
946
+
947
+
948
+ // ---------------------------------------------------------------------------
949
+ // ---------------------------------------------------------------------------
950
+ #if defined(SPP_POPCNT_CHECK)
951
+ static inline bool spp_popcount_check()
952
+ {
953
+ int cpuInfo[4] = { -1 };
954
+ spp_cpuid(cpuInfo, 1);
955
+ if (cpuInfo[2] & (1 << 23))
956
+ return true; // means SPP_POPCNT supported
957
+ return false;
958
+ }
959
+ #endif
960
+
961
+ #if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT)
962
+
963
+ static inline uint32_t spp_popcount(uint32_t i)
964
+ {
965
+ static const bool s_ok = spp_popcount_check();
966
+ return s_ok ? SPP_POPCNT(i) : s_spp_popcount_default(i);
967
+ }
968
+
969
+ #else
970
+
971
+ static inline uint32_t spp_popcount(uint32_t i)
972
+ {
973
+ #if defined(SPP_POPCNT)
974
+ return static_cast<uint32_t>(SPP_POPCNT(i));
975
+ #else
976
+ return s_spp_popcount_default(i);
977
+ #endif
978
+ }
979
+
980
+ #endif
981
+
982
+ #if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT64)
983
+
984
+ static inline uint32_t spp_popcount(uint64_t i)
985
+ {
986
+ static const bool s_ok = spp_popcount_check();
987
+ return s_ok ? (uint32_t)SPP_POPCNT64(i) : s_spp_popcount_default(i);
988
+ }
989
+
990
+ #else
991
+
992
+ static inline uint32_t spp_popcount(uint64_t i)
993
+ {
994
+ #if defined(SPP_POPCNT64)
995
+ return static_cast<uint32_t>(SPP_POPCNT64(i));
996
+ #elif 1
997
+ return s_spp_popcount_default(i);
998
+ #endif
999
+ }
1000
+
1001
+ #endif
1002
+
1003
+ // ---------------------------------------------------------------------------
1004
+ // SPARSE-TABLE
1005
+ // ------------
1006
+ // The idea is that a table with (logically) t buckets is divided
1007
+ // into t/M *groups* of M buckets each. (M is a constant, typically
1008
+ // 32) Each group is stored sparsely.
1009
+ // Thus, inserting into the table causes some array to grow, which is
1010
+ // slow but still constant time. Lookup involves doing a
1011
+ // logical-position-to-sparse-position lookup, which is also slow but
1012
+ // constant time. The larger M is, the slower these operations are
1013
+ // but the less overhead (slightly).
1014
+ //
1015
+ // To store the sparse array, we store a bitmap B, where B[i] = 1 iff
1016
+ // bucket i is non-empty. Then to look up bucket i we really look up
1017
+ // array[# of 1s before i in B]. This is constant time for fixed M.
1018
+ //
1019
+ // Terminology: the position of an item in the overall table (from
1020
+ // 1 .. t) is called its "location." The logical position in a group
1021
+ // (from 1 .. M) is called its "position." The actual location in
1022
+ // the array (from 1 .. # of non-empty buckets in the group) is
1023
+ // called its "offset."
1024
+ // ---------------------------------------------------------------------------
1025
+
1026
+ template <class T, class Alloc>
1027
+ class sparsegroup
1028
+ {
1029
+ public:
1030
+ // Basic types
1031
+ typedef T value_type;
1032
+ typedef Alloc allocator_type;
1033
+ typedef value_type& reference;
1034
+ typedef const value_type& const_reference;
1035
+ typedef value_type* pointer;
1036
+ typedef const value_type* const_pointer;
1037
+
1038
+ typedef uint8_t size_type; // max # of buckets
1039
+
1040
+ // These are our special iterators, that go over non-empty buckets in a
1041
+ // group. These aren't const-only because you can change non-empty bcks.
1042
+ // ---------------------------------------------------------------------
1043
+ typedef pointer ne_iterator;
1044
+ typedef const_pointer const_ne_iterator;
1045
+ typedef std::reverse_iterator<ne_iterator> reverse_ne_iterator;
1046
+ typedef std::reverse_iterator<const_ne_iterator> const_reverse_ne_iterator;
1047
+
1048
+ // We'll have versions for our special non-empty iterator too
1049
+ // ----------------------------------------------------------
1050
+ ne_iterator ne_begin() { return reinterpret_cast<pointer>(_group); }
1051
+ const_ne_iterator ne_begin() const { return reinterpret_cast<pointer>(_group); }
1052
+ const_ne_iterator ne_cbegin() const { return reinterpret_cast<pointer>(_group); }
1053
+ ne_iterator ne_end() { return reinterpret_cast<pointer>(_group + _num_items()); }
1054
+ const_ne_iterator ne_end() const { return reinterpret_cast<pointer>(_group + _num_items()); }
1055
+ const_ne_iterator ne_cend() const { return reinterpret_cast<pointer>(_group + _num_items()); }
1056
+ reverse_ne_iterator ne_rbegin() { return reverse_ne_iterator(ne_end()); }
1057
+ const_reverse_ne_iterator ne_rbegin() const { return const_reverse_ne_iterator(ne_cend()); }
1058
+ const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_cend()); }
1059
+ reverse_ne_iterator ne_rend() { return reverse_ne_iterator(ne_begin()); }
1060
+ const_reverse_ne_iterator ne_rend() const { return const_reverse_ne_iterator(ne_cbegin()); }
1061
+ const_reverse_ne_iterator ne_crend() const { return const_reverse_ne_iterator(ne_cbegin()); }
1062
+
1063
+ private:
1064
+ // T can be std::pair<const K, V>, but sometime we need to cast to a mutable type
1065
+ // ------------------------------------------------------------------------------
1066
+ typedef typename spp_::cvt<T>::type mutable_value_type;
1067
+ typedef mutable_value_type * mutable_pointer;
1068
+ typedef const mutable_value_type * const_mutable_pointer;
1069
+
1070
+ bool _bmtest(size_type i) const { return !!(_bitmap & (static_cast<group_bm_type>(1) << i)); }
1071
+ void _bmset(size_type i) { _bitmap |= static_cast<group_bm_type>(1) << i; }
1072
+ void _bmclear(size_type i) { _bitmap &= ~(static_cast<group_bm_type>(1) << i); }
1073
+
1074
+ bool _bme_test(size_type i) const { return !!(_bm_erased & (static_cast<group_bm_type>(1) << i)); }
1075
+ void _bme_set(size_type i) { _bm_erased |= static_cast<group_bm_type>(1) << i; }
1076
+ void _bme_clear(size_type i) { _bm_erased &= ~(static_cast<group_bm_type>(1) << i); }
1077
+
1078
+ bool _bmtest_strict(size_type i) const
1079
+ { return !!((_bitmap | _bm_erased) & (static_cast<group_bm_type>(1) << i)); }
1080
+
1081
+
1082
+ static uint32_t _sizing(uint32_t n)
1083
+ {
1084
+ #if !defined(SPP_ALLOC_SZ) || (SPP_ALLOC_SZ == 0)
1085
+ // aggressive allocation first, then decreasing as sparsegroups fill up
1086
+ // --------------------------------------------------------------------
1087
+ static uint8_t s_alloc_batch_sz[SPP_GROUP_SIZE] = { 0 };
1088
+ if (!s_alloc_batch_sz[0])
1089
+ {
1090
+ // 32 bit bitmap
1091
+ // ........ .... .... .. .. .. .. . . . . . . . .
1092
+ // 8 12 16 18 20 22 24 25 26 ... 32
1093
+ // ------------------------------------------------------
1094
+ uint8_t group_sz = SPP_GROUP_SIZE / 4;
1095
+ uint8_t group_start_alloc = SPP_GROUP_SIZE / 8; //4;
1096
+ uint8_t alloc_sz = group_start_alloc;
1097
+ for (int i=0; i<4; ++i)
1098
+ {
1099
+ for (int j=0; j<group_sz; ++j)
1100
+ {
1101
+ if (j && j % group_start_alloc == 0)
1102
+ alloc_sz += group_start_alloc;
1103
+ s_alloc_batch_sz[i * group_sz + j] = alloc_sz;
1104
+ }
1105
+ if (group_start_alloc > 2)
1106
+ group_start_alloc /= 2;
1107
+ alloc_sz += group_start_alloc;
1108
+ }
1109
+ }
1110
+
1111
+ return n ? static_cast<uint32_t>(s_alloc_batch_sz[n-1]) : 0; // more aggressive alloc at the beginning
1112
+
1113
+ #elif (SPP_ALLOC_SZ == 1)
1114
+ // use as little memory as possible - slowest insert/delete in table
1115
+ // -----------------------------------------------------------------
1116
+ return n;
1117
+ #else
1118
+ // decent compromise when SPP_ALLOC_SZ == 2
1119
+ // ----------------------------------------
1120
+ static size_type sz_minus_1 = SPP_ALLOC_SZ - 1;
1121
+ return (n + sz_minus_1) & ~sz_minus_1;
1122
+ #endif
1123
+ }
1124
+
1125
+ pointer _allocate_group(allocator_type &alloc, uint32_t n /* , bool tight = false */)
1126
+ {
1127
+ // ignore tight since we don't store num_alloc
1128
+ // num_alloc = (uint8_t)(tight ? n : _sizing(n));
1129
+
1130
+ uint32_t num_alloc = (uint8_t)_sizing(n);
1131
+ _set_num_alloc(num_alloc);
1132
+ pointer retval = alloc.allocate(static_cast<size_type>(num_alloc));
1133
+ if (retval == NULL)
1134
+ {
1135
+ // the allocator is supposed to throw an exception if the allocation fails.
1136
+ fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %d groups\n", num_alloc);
1137
+ exit(1);
1138
+ }
1139
+ return retval;
1140
+ }
1141
+
1142
+ void _free_group(allocator_type &alloc, uint32_t num_alloc)
1143
+ {
1144
+ if (_group)
1145
+ {
1146
+ uint32_t num_buckets = _num_items();
1147
+ if (num_buckets)
1148
+ {
1149
+ mutable_pointer end_it = (mutable_pointer)(_group + num_buckets);
1150
+ for (mutable_pointer p = (mutable_pointer)_group; p != end_it; ++p)
1151
+ p->~mutable_value_type();
1152
+ }
1153
+ alloc.deallocate(_group, (typename allocator_type::size_type)num_alloc);
1154
+ _group = NULL;
1155
+ }
1156
+ }
1157
+
1158
+ // private because should not be called - no allocator!
1159
+ sparsegroup &operator=(const sparsegroup& x);
1160
+
1161
+ static size_type _pos_to_offset(group_bm_type bm, size_type pos)
1162
+ {
1163
+ //return (size_type)((uint32_t)~((int32_t(-1) + pos) >> 31) & spp_popcount(bm << (SPP_GROUP_SIZE - pos)));
1164
+ //return (size_type)(pos ? spp_popcount(bm << (SPP_GROUP_SIZE - pos)) : 0);
1165
+ return static_cast<size_type>(spp_popcount(bm & ((static_cast<group_bm_type>(1) << pos) - 1)));
1166
+ }
1167
+
1168
+ public:
1169
+
1170
+ // get_iter() in sparsetable needs it
1171
+ size_type pos_to_offset(size_type pos) const
1172
+ {
1173
+ return _pos_to_offset(_bitmap, pos);
1174
+ }
1175
+
1176
+ #ifdef _MSC_VER
1177
+ #pragma warning(push)
1178
+ #pragma warning(disable : 4146)
1179
+ #endif
1180
+
1181
+ // Returns the (logical) position in the bm[] array, i, such that
1182
+ // bm[i] is the offset-th set bit in the array. It is the inverse
1183
+ // of pos_to_offset. get_pos() uses this function to find the index
1184
+ // of an ne_iterator in the table. Bit-twiddling from
1185
+ // http://hackersdelight.org/basics.pdf
1186
+ // -----------------------------------------------------------------
1187
+ static size_type offset_to_pos(group_bm_type bm, size_type offset)
1188
+ {
1189
+ for (; offset > 0; offset--)
1190
+ bm &= (bm-1); // remove right-most set bit
1191
+
1192
+ // Clear all bits to the left of the rightmost bit (the &),
1193
+ // and then clear the rightmost bit but set all bits to the
1194
+ // right of it (the -1).
1195
+ // --------------------------------------------------------
1196
+ bm = (bm & -bm) - 1;
1197
+ return static_cast<size_type>(spp_popcount(bm));
1198
+ }
1199
+
1200
+ #ifdef _MSC_VER
1201
+ #pragma warning(pop)
1202
+ #endif
1203
+
1204
+ size_type offset_to_pos(size_type offset) const
1205
+ {
1206
+ return offset_to_pos(_bitmap, offset);
1207
+ }
1208
+
1209
+ public:
1210
+ // Constructors -- default and copy -- and destructor
1211
+ explicit sparsegroup() :
1212
+ _group(0), _bitmap(0), _bm_erased(0)
1213
+ {
1214
+ _set_num_items(0);
1215
+ _set_num_alloc(0);
1216
+ }
1217
+
1218
+ sparsegroup(const sparsegroup& x) :
1219
+ _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
1220
+ {
1221
+ _set_num_items(0);
1222
+ _set_num_alloc(0);
1223
+ assert(_group == 0); if (_group) exit(1);
1224
+ }
1225
+
1226
+ sparsegroup(const sparsegroup& x, allocator_type& a) :
1227
+ _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
1228
+ {
1229
+ _set_num_items(0);
1230
+ _set_num_alloc(0);
1231
+
1232
+ uint32_t num_items = x._num_items();
1233
+ if (num_items)
1234
+ {
1235
+ _group = _allocate_group(a, num_items /* , true */);
1236
+ _set_num_items(num_items);
1237
+ std::uninitialized_copy(x._group, x._group + num_items, _group);
1238
+ }
1239
+ }
1240
+
1241
+ ~sparsegroup() { assert(_group == 0); if (_group) exit(1); }
1242
+
1243
+ void destruct(allocator_type& a) { _free_group(a, _num_alloc()); }
1244
+
1245
+ // Many STL algorithms use swap instead of copy constructors
1246
+ void swap(sparsegroup& x)
1247
+ {
1248
+ using std::swap;
1249
+
1250
+ swap(_group, x._group);
1251
+ swap(_bitmap, x._bitmap);
1252
+ swap(_bm_erased, x._bm_erased);
1253
+ #ifdef SPP_STORE_NUM_ITEMS
1254
+ swap(_num_buckets, x._num_buckets);
1255
+ swap(_num_allocated, x._num_allocated);
1256
+ #endif
1257
+ }
1258
+
1259
+ // It's always nice to be able to clear a table without deallocating it
1260
+ void clear(allocator_type &alloc, bool erased)
1261
+ {
1262
+ _free_group(alloc, _num_alloc());
1263
+ _bitmap = 0;
1264
+ if (erased)
1265
+ _bm_erased = 0;
1266
+ _set_num_items(0);
1267
+ _set_num_alloc(0);
1268
+ }
1269
+
1270
+ // Functions that tell you about size. Alas, these aren't so useful
1271
+ // because our table is always fixed size.
1272
+ size_type size() const { return static_cast<size_type>(SPP_GROUP_SIZE); }
1273
+ size_type max_size() const { return static_cast<size_type>(SPP_GROUP_SIZE); }
1274
+
1275
+ bool empty() const { return false; }
1276
+
1277
+ // We also may want to know how many *used* buckets there are
1278
+ size_type num_nonempty() const { return (size_type)_num_items(); }
1279
+
1280
+ // TODO(csilvers): make protected + friend
1281
+ // This is used by sparse_hashtable to get an element from the table
1282
+ // when we know it exists.
1283
+ reference unsafe_get(size_type i) const
1284
+ {
1285
+ // assert(_bmtest(i));
1286
+ return (reference)_group[pos_to_offset(i)];
1287
+ }
1288
+
1289
+ typedef std::pair<pointer, bool> SetResult;
1290
+
1291
+ private:
1292
+ //typedef spp_::integral_constant<bool, spp_::is_relocatable<value_type>::value> check_relocatable;
1293
+ typedef spp_::true_type realloc_ok_type;
1294
+ typedef spp_::false_type realloc_not_ok_type;
1295
+
1296
+ //typedef spp_::zero_type libc_reloc_type;
1297
+ //typedef spp_::one_type spp_reloc_type;
1298
+ //typedef spp_::two_type spp_not_reloc_type;
1299
+ //typedef spp_::three_type generic_alloc_type;
1300
+
1301
+ #if 1
1302
+ typedef typename if_<((spp_::is_same<allocator_type, libc_allocator<value_type> >::value ||
1303
+ spp_::is_same<allocator_type, spp_allocator<value_type> >::value) &&
1304
+ spp_::is_relocatable<value_type>::value), realloc_ok_type, realloc_not_ok_type>::type
1305
+ check_alloc_type;
1306
+ #else
1307
+ typedef typename if_<spp_::is_same<allocator_type, spp_allocator<value_type> >::value,
1308
+ typename if_<spp_::is_relocatable<value_type>::value, spp_reloc_type, spp_not_reloc_type>::type,
1309
+ typename if_<(spp_::is_same<allocator_type, libc_allocator<value_type> >::value &&
1310
+ spp_::is_relocatable<value_type>::value), libc_reloc_type, generic_alloc_type>::type >::type
1311
+ check_alloc_type;
1312
+ #endif
1313
+
1314
+
1315
+ //typedef if_<spp_::is_same<allocator_type, libc_allocator<value_type> >::value,
1316
+ // libc_alloc_type,
1317
+ // if_<spp_::is_same<allocator_type, spp_allocator<value_type> >::value,
1318
+ // spp_alloc_type, user_alloc_type> > check_alloc_type;
1319
+
1320
+ //typedef spp_::integral_constant<bool,
1321
+ // (spp_::is_relocatable<value_type>::value &&
1322
+ // (spp_::is_same<allocator_type, spp_allocator<value_type> >::value ||
1323
+ // spp_::is_same<allocator_type, libc_allocator<value_type> >::value)) >
1324
+ // realloc_and_memmove_ok;
1325
+
1326
+ // ------------------------- memory at *p is uninitialized => need to construct
1327
+ void _init_val(mutable_value_type *p, reference val)
1328
+ {
1329
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
1330
+ ::new (p) value_type(std::move(val));
1331
+ #else
1332
+ ::new (p) value_type(val);
1333
+ #endif
1334
+ }
1335
+
1336
+ // ------------------------- memory at *p is uninitialized => need to construct
1337
+ void _init_val(mutable_value_type *p, const_reference val)
1338
+ {
1339
+ ::new (p) value_type(val);
1340
+ }
1341
+
1342
+ // ------------------------------------------------ memory at *p is initialized
1343
+ void _set_val(value_type *p, reference val)
1344
+ {
1345
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
1346
+ *(mutable_pointer)p = std::move(val);
1347
+ #else
1348
+ using std::swap;
1349
+ swap(*(mutable_pointer)p, *(mutable_pointer)&val);
1350
+ #endif
1351
+ }
1352
+
1353
+ // ------------------------------------------------ memory at *p is initialized
1354
+ void _set_val(value_type *p, const_reference val)
1355
+ {
1356
+ *(mutable_pointer)p = *(const_mutable_pointer)&val;
1357
+ }
1358
+
1359
+ // Create space at _group[offset], assuming value_type is relocatable, and the
1360
+ // allocator_type is the spp allocator.
1361
+ // return true if the slot was constructed (i.e. contains a valid value_type
1362
+ // ---------------------------------------------------------------------------------
1363
+ template <class Val>
1364
+ void _set_aux(allocator_type &alloc, size_type offset, Val &val, realloc_ok_type)
1365
+ {
1366
+ //static int x=0; if (++x < 10) printf("x\n"); // check we are getting here
1367
+
1368
+ uint32_t num_items = _num_items();
1369
+ uint32_t num_alloc = _sizing(num_items);
1370
+
1371
+ if (num_items == num_alloc)
1372
+ {
1373
+ num_alloc = _sizing(num_items + 1);
1374
+ _group = alloc.reallocate(_group, num_alloc);
1375
+ _set_num_alloc(num_alloc);
1376
+ }
1377
+
1378
+ for (uint32_t i = num_items; i > offset; --i)
1379
+ memcpy(_group + i, _group + i-1, sizeof(*_group));
1380
+
1381
+ _init_val((mutable_pointer)(_group + offset), val);
1382
+ }
1383
+
1384
+ // Create space at _group[offset], assuming value_type is *not* relocatable, and the
1385
+ // allocator_type is the spp allocator.
1386
+ // return true if the slot was constructed (i.e. contains a valid value_type
1387
+ // ---------------------------------------------------------------------------------
1388
+ template <class Val>
1389
+ void _set_aux(allocator_type &alloc, size_type offset, Val &val, realloc_not_ok_type)
1390
+ {
1391
+ uint32_t num_items = _num_items();
1392
+ uint32_t num_alloc = _sizing(num_items);
1393
+
1394
+ //assert(num_alloc == (uint32_t)_num_allocated);
1395
+ if (num_items < num_alloc)
1396
+ {
1397
+ // create new object at end and rotate it to position
1398
+ _init_val((mutable_pointer)&_group[num_items], val);
1399
+ std::rotate((mutable_pointer)(_group + offset),
1400
+ (mutable_pointer)(_group + num_items),
1401
+ (mutable_pointer)(_group + num_items + 1));
1402
+ return;
1403
+ }
1404
+
1405
+ // This is valid because 0 <= offset <= num_items
1406
+ pointer p = _allocate_group(alloc, _sizing(num_items + 1));
1407
+ if (offset)
1408
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)_group),
1409
+ MK_MOVE_IT((mutable_pointer)(_group + offset)),
1410
+ (mutable_pointer)p);
1411
+ if (num_items > offset)
1412
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset)),
1413
+ MK_MOVE_IT((mutable_pointer)(_group + num_items)),
1414
+ (mutable_pointer)(p + offset + 1));
1415
+ _init_val((mutable_pointer)(p + offset), val);
1416
+ _free_group(alloc, num_alloc);
1417
+ _group = p;
1418
+ }
1419
+
1420
+ // ----------------------------------------------------------------------------------
1421
+ template <class Val>
1422
+ void _set(allocator_type &alloc, size_type i, size_type offset, Val &val)
1423
+ {
1424
+ if (!_bmtest(i))
1425
+ {
1426
+ _set_aux(alloc, offset, val, check_alloc_type());
1427
+ _incr_num_items();
1428
+ _bmset(i);
1429
+ }
1430
+ else
1431
+ _set_val(&_group[offset], val);
1432
+ }
1433
+
1434
+ public:
1435
+
1436
+ // This returns the pointer to the inserted item
1437
+ // ---------------------------------------------
1438
+ template <class Val>
1439
+ pointer set(allocator_type &alloc, size_type i, Val &val)
1440
+ {
1441
+ _bme_clear(i); // in case this was an "erased" location
1442
+
1443
+ size_type offset = pos_to_offset(i);
1444
+ _set(alloc, i, offset, val); // may change _group pointer
1445
+ return (pointer)(_group + offset);
1446
+ }
1447
+
1448
+ // We let you see if a bucket is non-empty without retrieving it
1449
+ // -------------------------------------------------------------
1450
+ bool test(size_type i) const
1451
+ {
1452
+ return _bmtest(i);
1453
+ }
1454
+
1455
+ // also tests for erased values
1456
+ // ----------------------------
1457
+ bool test_strict(size_type i) const
1458
+ {
1459
+ return _bmtest_strict(i);
1460
+ }
1461
+
1462
+ private:
1463
+ // Shrink the array, assuming value_type is relocatable, and the
1464
+ // allocator_type is the libc allocator (supporting reallocate).
1465
+ // -------------------------------------------------------------
1466
+ void _group_erase_aux(allocator_type &alloc, size_type offset, realloc_ok_type)
1467
+ {
1468
+ // static int x=0; if (++x < 10) printf("Y\n"); // check we are getting here
1469
+ uint32_t num_items = _num_items();
1470
+ uint32_t num_alloc = _sizing(num_items);
1471
+
1472
+ if (num_items == 1)
1473
+ {
1474
+ assert(offset == 0);
1475
+ _free_group(alloc, num_alloc);
1476
+ _set_num_alloc(0);
1477
+ return;
1478
+ }
1479
+
1480
+ _group[offset].~value_type();
1481
+
1482
+ for (size_type i = offset; i < num_items - 1; ++i)
1483
+ memcpy(_group + i, _group + i + 1, sizeof(*_group));
1484
+
1485
+ if (_sizing(num_items - 1) != num_alloc)
1486
+ {
1487
+ num_alloc = _sizing(num_items - 1);
1488
+ assert(num_alloc); // because we have at least 1 item left
1489
+ _set_num_alloc(num_alloc);
1490
+ _group = alloc.reallocate(_group, num_alloc);
1491
+ }
1492
+ }
1493
+
1494
+ // Shrink the array, without any special assumptions about value_type and
1495
+ // allocator_type.
1496
+ // --------------------------------------------------------------------------
1497
+ void _group_erase_aux(allocator_type &alloc, size_type offset, realloc_not_ok_type)
1498
+ {
1499
+ uint32_t num_items = _num_items();
1500
+ uint32_t num_alloc = _sizing(num_items);
1501
+
1502
+ if (_sizing(num_items - 1) != num_alloc)
1503
+ {
1504
+ pointer p = 0;
1505
+ if (num_items > 1)
1506
+ {
1507
+ p = _allocate_group(alloc, num_items - 1);
1508
+ if (offset)
1509
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group)),
1510
+ MK_MOVE_IT((mutable_pointer)(_group + offset)),
1511
+ (mutable_pointer)(p));
1512
+ if (static_cast<uint32_t>(offset + 1) < num_items)
1513
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset + 1)),
1514
+ MK_MOVE_IT((mutable_pointer)(_group + num_items)),
1515
+ (mutable_pointer)(p + offset));
1516
+ }
1517
+ else
1518
+ {
1519
+ assert(offset == 0);
1520
+ _set_num_alloc(0);
1521
+ }
1522
+ _free_group(alloc, num_alloc);
1523
+ _group = p;
1524
+ }
1525
+ else
1526
+ {
1527
+ std::rotate((mutable_pointer)(_group + offset),
1528
+ (mutable_pointer)(_group + offset + 1),
1529
+ (mutable_pointer)(_group + num_items));
1530
+ ((mutable_pointer)(_group + num_items - 1))->~mutable_value_type();
1531
+ }
1532
+ }
1533
+
1534
+ void _group_erase(allocator_type &alloc, size_type offset)
1535
+ {
1536
+ _group_erase_aux(alloc, offset, check_alloc_type());
1537
+ }
1538
+
1539
+ public:
1540
+ template <class twod_iter>
1541
+ bool erase_ne(allocator_type &alloc, twod_iter &it)
1542
+ {
1543
+ assert(_group && it.col_current != ne_end());
1544
+ size_type offset = (size_type)(it.col_current - ne_begin());
1545
+ size_type pos = offset_to_pos(offset);
1546
+
1547
+ if (_num_items() <= 1)
1548
+ {
1549
+ clear(alloc, false);
1550
+ it.col_current = 0;
1551
+ }
1552
+ else
1553
+ {
1554
+ _group_erase(alloc, offset);
1555
+ _decr_num_items();
1556
+ _bmclear(pos);
1557
+
1558
+ // in case _group_erase reallocated the buffer
1559
+ it.col_current = reinterpret_cast<pointer>(_group) + offset;
1560
+ }
1561
+ _bme_set(pos); // remember that this position has been erased
1562
+ it.advance_past_end();
1563
+ return true;
1564
+ }
1565
+
1566
+
1567
+ // This takes the specified elements out of the group. This is
1568
+ // "undefining", rather than "clearing".
1569
+ // TODO(austern): Make this exception safe: handle exceptions from
1570
+ // value_type's copy constructor.
1571
+ // ---------------------------------------------------------------
1572
+ void erase(allocator_type &alloc, size_type i)
1573
+ {
1574
+ if (_bmtest(i))
1575
+ {
1576
+ // trivial to erase empty bucket
1577
+ if (_num_items() == 1)
1578
+ clear(alloc, false);
1579
+ else
1580
+ {
1581
+ _group_erase(alloc, pos_to_offset(i));
1582
+ _decr_num_items();
1583
+ _bmclear(i);
1584
+ }
1585
+ _bme_set(i); // remember that this position has been erased
1586
+ }
1587
+ }
1588
+
1589
+ // I/O
1590
+ // We support reading and writing groups to disk. We don't store
1591
+ // the actual array contents (which we don't know how to store),
1592
+ // just the bitmap and size. Meant to be used with table I/O.
1593
+ // --------------------------------------------------------------
1594
+ template <typename OUTPUT> bool write_metadata(OUTPUT *fp) const
1595
+ {
1596
+ // warning: we write 4 or 8 bytes for the bitmap, instead of 6 in the
1597
+ // original google sparsehash
1598
+ // ------------------------------------------------------------------
1599
+ if (!sparsehash_internal::write_data(fp, &_bitmap, sizeof(_bitmap)))
1600
+ return false;
1601
+
1602
+ return true;
1603
+ }
1604
+
1605
+ // Reading destroys the old group contents! Returns true if all was ok.
1606
+ template <typename INPUT> bool read_metadata(allocator_type &alloc, INPUT *fp)
1607
+ {
1608
+ clear(alloc, true);
1609
+
1610
+ if (!sparsehash_internal::read_data(fp, &_bitmap, sizeof(_bitmap)))
1611
+ return false;
1612
+
1613
+ // We'll allocate the space, but we won't fill it: it will be
1614
+ // left as uninitialized raw memory.
1615
+ uint32_t num_items = spp_popcount(_bitmap); // yes, _num_buckets not set
1616
+ _set_num_items(num_items);
1617
+ _group = num_items ? _allocate_group(alloc, num_items/* , true */) : 0;
1618
+ return true;
1619
+ }
1620
+
1621
+ // Again, only meaningful if value_type is a POD.
1622
+ template <typename INPUT> bool read_nopointer_data(INPUT *fp)
1623
+ {
1624
+ for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
1625
+ if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it)))
1626
+ return false;
1627
+ return true;
1628
+ }
1629
+
1630
+ // If your keys and values are simple enough, we can write them
1631
+ // to disk for you. "simple enough" means POD and no pointers.
1632
+ // However, we don't try to normalize endianness.
1633
+ // ------------------------------------------------------------
1634
+ template <typename OUTPUT> bool write_nopointer_data(OUTPUT *fp) const
1635
+ {
1636
+ for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
1637
+ if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it)))
1638
+ return false;
1639
+ return true;
1640
+ }
1641
+
1642
+
1643
+ // Comparisons. We only need to define == and < -- we get
1644
+ // != > <= >= via relops.h (which we happily included above).
1645
+ // Note the comparisons are pretty arbitrary: we compare
1646
+ // values of the first index that isn't equal (using default
1647
+ // value for empty buckets).
1648
+ // ---------------------------------------------------------
1649
+ bool operator==(const sparsegroup& x) const
1650
+ {
1651
+ return (_bitmap == x._bitmap &&
1652
+ _bm_erased == x._bm_erased &&
1653
+ std::equal(_group, _group + _num_items(), x._group));
1654
+ }
1655
+
1656
+ bool operator<(const sparsegroup& x) const
1657
+ {
1658
+ // also from <algorithm>
1659
+ return std::lexicographical_compare(_group, _group + _num_items(),
1660
+ x._group, x._group + x._num_items());
1661
+ }
1662
+
1663
+ bool operator!=(const sparsegroup& x) const { return !(*this == x); }
1664
+ bool operator<=(const sparsegroup& x) const { return !(x < *this); }
1665
+ bool operator> (const sparsegroup& x) const { return x < *this; }
1666
+ bool operator>=(const sparsegroup& x) const { return !(*this < x); }
1667
+
1668
+ void mark() { _group = (value_type *)static_cast<uintptr_t>(-1); }
1669
+ bool is_marked() const { return _group == (value_type *)static_cast<uintptr_t>(-1); }
1670
+
1671
+ private:
1672
+ // ---------------------------------------------------------------------------
1673
+ template <class A>
1674
+ class alloc_impl : public A
1675
+ {
1676
+ public:
1677
+ typedef typename A::pointer pointer;
1678
+ typedef typename A::size_type size_type;
1679
+
1680
+ // Convert a normal allocator to one that has realloc_or_die()
1681
+ explicit alloc_impl(const A& a) : A(a) { }
1682
+
1683
+ // realloc_or_die should only be used when using the default
1684
+ // allocator (spp::spp_allocator).
1685
+ pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/)
1686
+ {
1687
+ fprintf(stderr, "realloc_or_die is only supported for "
1688
+ "spp::spp_allocator\n");
1689
+ exit(1);
1690
+ return NULL;
1691
+ }
1692
+ };
1693
+
1694
+ // A template specialization of alloc_impl for
1695
+ // spp::libc_allocator that can handle realloc_or_die.
1696
+ // -----------------------------------------------------------
1697
+ template <class A>
1698
+ class alloc_impl<spp_::libc_allocator<A> > : public spp_::libc_allocator<A>
1699
+ {
1700
+ public:
1701
+ typedef typename spp_::libc_allocator<A>::pointer pointer;
1702
+ typedef typename spp_::libc_allocator<A>::size_type size_type;
1703
+
1704
+ explicit alloc_impl(const spp_::libc_allocator<A>& a)
1705
+ : spp_::libc_allocator<A>(a)
1706
+ { }
1707
+
1708
+ pointer realloc_or_die(pointer ptr, size_type n)
1709
+ {
1710
+ pointer retval = this->reallocate(ptr, n);
1711
+ if (retval == NULL)
1712
+ {
1713
+ fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
1714
+ "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
1715
+ exit(1);
1716
+ }
1717
+ return retval;
1718
+ }
1719
+ };
1720
+
1721
+ // A template specialization of alloc_impl for
1722
+ // spp::spp_allocator that can handle realloc_or_die.
1723
+ // -----------------------------------------------------------
1724
+ template <class A>
1725
+ class alloc_impl<spp_::spp_allocator<A> > : public spp_::spp_allocator<A>
1726
+ {
1727
+ public:
1728
+ typedef typename spp_::spp_allocator<A>::pointer pointer;
1729
+ typedef typename spp_::spp_allocator<A>::size_type size_type;
1730
+
1731
+ explicit alloc_impl(const spp_::spp_allocator<A>& a)
1732
+ : spp_::spp_allocator<A>(a)
1733
+ { }
1734
+
1735
+ pointer realloc_or_die(pointer ptr, size_type n)
1736
+ {
1737
+ pointer retval = this->reallocate(ptr, n);
1738
+ if (retval == NULL)
1739
+ {
1740
+ fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
1741
+ "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
1742
+ exit(1);
1743
+ }
1744
+ return retval;
1745
+ }
1746
+ };
1747
+
1748
+
1749
+ #ifdef SPP_STORE_NUM_ITEMS
1750
+ uint32_t _num_items() const { return (uint32_t)_num_buckets; }
1751
+ void _set_num_items(uint32_t val) { _num_buckets = static_cast<size_type>(val); }
1752
+ void _incr_num_items() { ++_num_buckets; }
1753
+ void _decr_num_items() { --_num_buckets; }
1754
+ uint32_t _num_alloc() const { return (uint32_t)_num_allocated; }
1755
+ void _set_num_alloc(uint32_t val) { _num_allocated = static_cast<size_type>(val); }
1756
+ #else
1757
+ uint32_t _num_items() const { return spp_popcount(_bitmap); }
1758
+ void _set_num_items(uint32_t ) { }
1759
+ void _incr_num_items() { }
1760
+ void _decr_num_items() { }
1761
+ uint32_t _num_alloc() const { return _sizing(_num_items()); }
1762
+ void _set_num_alloc(uint32_t val) { }
1763
+ #endif
1764
+
1765
+ // The actual data
1766
+ // ---------------
1767
+ value_type * _group; // (small) array of T's
1768
+ group_bm_type _bitmap;
1769
+ group_bm_type _bm_erased; // ones where items have been erased
1770
+
1771
+ #ifdef SPP_STORE_NUM_ITEMS
1772
+ size_type _num_buckets;
1773
+ size_type _num_allocated;
1774
+ #endif
1775
+ };
1776
+
1777
+ // ---------------------------------------------------------------------------
1778
+ // ---------------------------------------------------------------------------
1779
+ template <class T, class Alloc>
1780
+ class sparsetable
1781
+ {
1782
+ public:
1783
+ typedef T value_type;
1784
+ typedef Alloc allocator_type;
1785
+ typedef sparsegroup<value_type, allocator_type> group_type;
1786
+
1787
+ private:
1788
+ typedef typename Alloc::template rebind<group_type>::other group_alloc_type;
1789
+ typedef typename group_alloc_type::size_type group_size_type;
1790
+
1791
+ public:
1792
+ // Basic types
1793
+ // -----------
1794
+ typedef typename allocator_type::size_type size_type;
1795
+ typedef typename allocator_type::difference_type difference_type;
1796
+ typedef value_type& reference;
1797
+ typedef const value_type& const_reference;
1798
+ typedef value_type* pointer;
1799
+ typedef const value_type* const_pointer;
1800
+
1801
+ typedef group_type& GroupsReference;
1802
+ typedef const group_type& GroupsConstReference;
1803
+
1804
+ typedef typename group_type::ne_iterator ColIterator;
1805
+ typedef typename group_type::const_ne_iterator ColConstIterator;
1806
+
1807
+ typedef table_iterator<sparsetable<T, allocator_type> > iterator; // defined with index
1808
+ typedef const_table_iterator<sparsetable<T, allocator_type> > const_iterator; // defined with index
1809
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
1810
+ typedef std::reverse_iterator<iterator> reverse_iterator;
1811
+
1812
+ // These are our special iterators, that go over non-empty buckets in a
1813
+ // table. These aren't const only because you can change non-empty bcks.
1814
+ // ----------------------------------------------------------------------
1815
+ typedef Two_d_iterator<T,
1816
+ group_type *,
1817
+ ColIterator,
1818
+ std::bidirectional_iterator_tag> ne_iterator;
1819
+
1820
+ typedef Two_d_iterator<const T,
1821
+ const group_type *,
1822
+ ColConstIterator,
1823
+ std::bidirectional_iterator_tag> const_ne_iterator;
1824
+
1825
+ // Another special iterator: it frees memory as it iterates (used to resize).
1826
+ // Obviously, you can only iterate over it once, which is why it's an input iterator
1827
+ // ---------------------------------------------------------------------------------
1828
+ typedef Two_d_destructive_iterator<T,
1829
+ group_type *,
1830
+ ColIterator,
1831
+ std::input_iterator_tag,
1832
+ allocator_type> destructive_iterator;
1833
+
1834
+ typedef std::reverse_iterator<ne_iterator> reverse_ne_iterator;
1835
+ typedef std::reverse_iterator<const_ne_iterator> const_reverse_ne_iterator;
1836
+
1837
+
1838
+ // Iterator functions
1839
+ // ------------------
1840
+ iterator begin() { return iterator(this, 0); }
1841
+ const_iterator begin() const { return const_iterator(this, 0); }
1842
+ const_iterator cbegin() const { return const_iterator(this, 0); }
1843
+ iterator end() { return iterator(this, size()); }
1844
+ const_iterator end() const { return const_iterator(this, size()); }
1845
+ const_iterator cend() const { return const_iterator(this, size()); }
1846
+ reverse_iterator rbegin() { return reverse_iterator(end()); }
1847
+ const_reverse_iterator rbegin() const { return const_reverse_iterator(cend()); }
1848
+ const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
1849
+ reverse_iterator rend() { return reverse_iterator(begin()); }
1850
+ const_reverse_iterator rend() const { return const_reverse_iterator(cbegin()); }
1851
+ const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
1852
+
1853
+ // Versions for our special non-empty iterator
1854
+ // ------------------------------------------
1855
+ ne_iterator ne_begin() { return ne_iterator (_first_group); }
1856
+ const_ne_iterator ne_begin() const { return const_ne_iterator(_first_group); }
1857
+ const_ne_iterator ne_cbegin() const { return const_ne_iterator(_first_group); }
1858
+ ne_iterator ne_end() { return ne_iterator (_last_group); }
1859
+ const_ne_iterator ne_end() const { return const_ne_iterator(_last_group); }
1860
+ const_ne_iterator ne_cend() const { return const_ne_iterator(_last_group); }
1861
+
1862
+ reverse_ne_iterator ne_rbegin() { return reverse_ne_iterator(ne_end()); }
1863
+ const_reverse_ne_iterator ne_rbegin() const { return const_reverse_ne_iterator(ne_end()); }
1864
+ const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_end()); }
1865
+ reverse_ne_iterator ne_rend() { return reverse_ne_iterator(ne_begin()); }
1866
+ const_reverse_ne_iterator ne_rend() const { return const_reverse_ne_iterator(ne_begin()); }
1867
+ const_reverse_ne_iterator ne_crend() const { return const_reverse_ne_iterator(ne_begin()); }
1868
+
1869
+ destructive_iterator destructive_begin()
1870
+ {
1871
+ return destructive_iterator(_alloc, _first_group);
1872
+ }
1873
+
1874
+ destructive_iterator destructive_end()
1875
+ {
1876
+ return destructive_iterator(_alloc, _last_group);
1877
+ }
1878
+
1879
+ // How to deal with the proper group
1880
+ static group_size_type num_groups(size_type num)
1881
+ {
1882
+ // how many to hold num buckets
1883
+ return num == 0 ? (group_size_type)0 :
1884
+ (group_size_type)(((num-1) / SPP_GROUP_SIZE) + 1);
1885
+ }
1886
+
1887
+ typename group_type::size_type pos_in_group(size_type i) const
1888
+ {
1889
+ return static_cast<typename group_type::size_type>(i & SPP_MASK_);
1890
+ }
1891
+
1892
+ size_type group_num(size_type i) const
1893
+ {
1894
+ return (size_type)(i >> SPP_SHIFT_);
1895
+ }
1896
+
1897
+ GroupsReference which_group(size_type i)
1898
+ {
1899
+ return _first_group[group_num(i)];
1900
+ }
1901
+
1902
+ GroupsConstReference which_group(size_type i) const
1903
+ {
1904
+ return _first_group[group_num(i)];
1905
+ }
1906
+
1907
+ void _alloc_group_array(group_size_type sz, group_type *&first, group_type *&last)
1908
+ {
1909
+ if (sz)
1910
+ {
1911
+ first = _group_alloc.allocate((size_type)(sz + 1)); // + 1 for end marker
1912
+ first[sz].mark(); // for the ne_iterator
1913
+ last = first + sz;
1914
+ }
1915
+ }
1916
+
1917
+ void _free_group_array(group_type *&first, group_type *&last)
1918
+ {
1919
+ if (first)
1920
+ {
1921
+ _group_alloc.deallocate(first, (group_size_type)(last - first + 1)); // + 1 for end marker
1922
+ first = last = 0;
1923
+ }
1924
+ }
1925
+
1926
+ void _allocate_groups(size_type sz)
1927
+ {
1928
+ if (sz)
1929
+ {
1930
+ _alloc_group_array(sz, _first_group, _last_group);
1931
+ std::uninitialized_fill(_first_group, _last_group, group_type());
1932
+ }
1933
+ }
1934
+
1935
+ void _free_groups()
1936
+ {
1937
+ if (_first_group)
1938
+ {
1939
+ for (group_type *g = _first_group; g != _last_group; ++g)
1940
+ g->destruct(_alloc);
1941
+ _free_group_array(_first_group, _last_group);
1942
+ }
1943
+ }
1944
+
1945
+ void _cleanup()
1946
+ {
1947
+ _free_groups(); // sets _first_group = _last_group = 0
1948
+ _table_size = 0;
1949
+ _num_buckets = 0;
1950
+ }
1951
+
1952
+ void _init()
1953
+ {
1954
+ _first_group = 0;
1955
+ _last_group = 0;
1956
+ _table_size = 0;
1957
+ _num_buckets = 0;
1958
+ }
1959
+
1960
+ void _copy(const sparsetable &o)
1961
+ {
1962
+ _table_size = o._table_size;
1963
+ _num_buckets = o._num_buckets;
1964
+ _alloc = o._alloc; // todo - copy or move allocator according to...
1965
+ _group_alloc = o._group_alloc; // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
1966
+
1967
+ group_size_type sz = (group_size_type)(o._last_group - o._first_group);
1968
+ if (sz)
1969
+ {
1970
+ _alloc_group_array(sz, _first_group, _last_group);
1971
+ for (group_size_type i=0; i<sz; ++i)
1972
+ new (_first_group + i) group_type(o._first_group[i], _alloc);
1973
+ }
1974
+ }
1975
+
1976
+ public:
1977
+ // Constructors -- default, normal (when you specify size), and copy
1978
+ explicit sparsetable(size_type sz = 0, const allocator_type &alloc = allocator_type()) :
1979
+ _first_group(0),
1980
+ _last_group(0),
1981
+ _table_size(sz),
1982
+ _num_buckets(0),
1983
+ _alloc(alloc)
1984
+ // todo - copy or move allocator according to
1985
+ // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
1986
+ {
1987
+ _allocate_groups(num_groups(sz));
1988
+ }
1989
+
1990
+ ~sparsetable()
1991
+ {
1992
+ _free_groups();
1993
+ }
1994
+
1995
+ sparsetable(const sparsetable &o)
1996
+ {
1997
+ _init();
1998
+ _copy(o);
1999
+ }
2000
+
2001
+ sparsetable& operator=(const sparsetable &o)
2002
+ {
2003
+ _cleanup();
2004
+ _copy(o);
2005
+ return *this;
2006
+ }
2007
+
2008
+
2009
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
2010
+ sparsetable(sparsetable&& o)
2011
+ {
2012
+ _init();
2013
+ this->swap(o);
2014
+ }
2015
+
2016
+ sparsetable(sparsetable&& o, const allocator_type &alloc)
2017
+ {
2018
+ _init();
2019
+ this->swap(o);
2020
+ _alloc = alloc; // [gp todo] is this correct?
2021
+ }
2022
+
2023
+ sparsetable& operator=(sparsetable&& o)
2024
+ {
2025
+ _cleanup();
2026
+ this->swap(o);
2027
+ return *this;
2028
+ }
2029
+ #endif
2030
+
2031
+ // Many STL algorithms use swap instead of copy constructors
2032
+ void swap(sparsetable& o)
2033
+ {
2034
+ using std::swap;
2035
+
2036
+ swap(_first_group, o._first_group);
2037
+ swap(_last_group, o._last_group);
2038
+ swap(_table_size, o._table_size);
2039
+ swap(_num_buckets, o._num_buckets);
2040
+ if (_alloc != o._alloc)
2041
+ swap(_alloc, o._alloc);
2042
+ if (_group_alloc != o._group_alloc)
2043
+ swap(_group_alloc, o._group_alloc);
2044
+ }
2045
+
2046
+ // It's always nice to be able to clear a table without deallocating it
2047
+ void clear()
2048
+ {
2049
+ _free_groups();
2050
+ _num_buckets = 0;
2051
+ _table_size = 0;
2052
+ }
2053
+
2054
+ inline allocator_type get_allocator() const
2055
+ {
2056
+ return _alloc;
2057
+ }
2058
+
2059
+
2060
+ // Functions that tell you about size.
2061
+ // NOTE: empty() is non-intuitive! It does not tell you the number
2062
+ // of not-empty buckets (use num_nonempty() for that). Instead
2063
+ // it says whether you've allocated any buckets or not.
2064
+ // ----------------------------------------------------------------
2065
+ size_type size() const { return _table_size; }
2066
+ size_type max_size() const { return _alloc.max_size(); }
2067
+ bool empty() const { return _table_size == 0; }
2068
+ size_type num_nonempty() const { return _num_buckets; }
2069
+
2070
+ // OK, we'll let you resize one of these puppies
2071
+ void resize(size_type new_size)
2072
+ {
2073
+ group_size_type sz = num_groups(new_size);
2074
+ group_size_type old_sz = (group_size_type)(_last_group - _first_group);
2075
+
2076
+ if (sz != old_sz)
2077
+ {
2078
+ // resize group array
2079
+ // ------------------
2080
+ group_type *first = 0, *last = 0;
2081
+ if (sz)
2082
+ {
2083
+ _alloc_group_array(sz, first, last);
2084
+ memcpy(first, _first_group, sizeof(*first) * (std::min)(sz, old_sz));
2085
+ }
2086
+
2087
+ if (sz < old_sz)
2088
+ {
2089
+ for (group_type *g = _first_group + sz; g != _last_group; ++g)
2090
+ g->destruct(_alloc);
2091
+ }
2092
+ else
2093
+ std::uninitialized_fill(first + old_sz, last, group_type());
2094
+
2095
+ _free_group_array(_first_group, _last_group);
2096
+ _first_group = first;
2097
+ _last_group = last;
2098
+ }
2099
+ #if 0
2100
+ // used only in test program
2101
+ // todo: fix if sparsetable to be used directly
2102
+ // --------------------------------------------
2103
+ if (new_size < _table_size)
2104
+ {
2105
+ // lower num_buckets, clear last group
2106
+ if (pos_in_group(new_size) > 0) // need to clear inside last group
2107
+ groups.back().erase(_alloc, groups.back().begin() + pos_in_group(new_size),
2108
+ groups.back().end());
2109
+ _num_buckets = 0; // refigure # of used buckets
2110
+ for (const group_type *group = _first_group; group != _last_group; ++group)
2111
+ _num_buckets += group->num_nonempty();
2112
+ }
2113
+ #endif
2114
+ _table_size = new_size;
2115
+ }
2116
+
2117
+ // We let you see if a bucket is non-empty without retrieving it
2118
+ // -------------------------------------------------------------
2119
+ bool test(size_type i) const
2120
+ {
2121
+ // assert(i < _table_size);
2122
+ return which_group(i).test(pos_in_group(i));
2123
+ }
2124
+
2125
+ // also tests for erased values
2126
+ // ----------------------------
2127
+ bool test_strict(size_type i) const
2128
+ {
2129
+ // assert(i < _table_size);
2130
+ return which_group(i).test_strict(pos_in_group(i));
2131
+ }
2132
+
2133
+ friend struct GrpPos;
2134
+
2135
+ struct GrpPos
2136
+ {
2137
+ typedef typename sparsetable::ne_iterator ne_iter;
2138
+ GrpPos(const sparsetable &table, size_type i) :
2139
+ grp(table.which_group(i)), pos(table.pos_in_group(i)) {}
2140
+
2141
+ bool test_strict() const { return grp.test_strict(pos); }
2142
+ bool test() const { return grp.test(pos); }
2143
+ typename sparsetable::reference unsafe_get() const { return grp.unsafe_get(pos); }
2144
+ ne_iter get_iter(typename sparsetable::reference ref)
2145
+ {
2146
+ return ne_iter((group_type *)&grp, &ref);
2147
+ }
2148
+
2149
+ void erase(sparsetable &table) // item *must* be present
2150
+ {
2151
+ assert(table._num_buckets);
2152
+ ((group_type &)grp).erase(table._alloc, pos);
2153
+ --table._num_buckets;
2154
+ }
2155
+
2156
+ private:
2157
+ GrpPos* operator=(const GrpPos&);
2158
+
2159
+ const group_type &grp;
2160
+ typename group_type::size_type pos;
2161
+ };
2162
+
2163
+ bool test(iterator pos) const
2164
+ {
2165
+ return which_group(pos.pos).test(pos_in_group(pos.pos));
2166
+ }
2167
+
2168
+ bool test(const_iterator pos) const
2169
+ {
2170
+ return which_group(pos.pos).test(pos_in_group(pos.pos));
2171
+ }
2172
+
2173
+ // TODO(csilvers): make protected + friend
2174
+ // This is used by sparse_hashtable to get an element from the table
2175
+ // when we know it exists (because the caller has called test(i)).
2176
+ // -----------------------------------------------------------------
2177
+ reference unsafe_get(size_type i) const
2178
+ {
2179
+ assert(i < _table_size);
2180
+ // assert(test(i));
2181
+ return which_group(i).unsafe_get(pos_in_group(i));
2182
+ }
2183
+
2184
+ // Needed for hashtables, gets as a ne_iterator. Crashes for empty bcks
2185
+ const_ne_iterator get_iter(size_type i) const
2186
+ {
2187
+ //assert(test(i)); // how can a ne_iterator point to an empty bucket?
2188
+
2189
+ size_type grp_idx = group_num(i);
2190
+
2191
+ return const_ne_iterator(_first_group + grp_idx,
2192
+ (_first_group[grp_idx].ne_begin() +
2193
+ _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
2194
+ }
2195
+
2196
+ const_ne_iterator get_iter(size_type i, ColIterator col_it) const
2197
+ {
2198
+ return const_ne_iterator(_first_group + group_num(i), col_it);
2199
+ }
2200
+
2201
+ // For nonempty we can return a non-const version
2202
+ ne_iterator get_iter(size_type i)
2203
+ {
2204
+ //assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
2205
+
2206
+ size_type grp_idx = group_num(i);
2207
+
2208
+ return ne_iterator(_first_group + grp_idx,
2209
+ (_first_group[grp_idx].ne_begin() +
2210
+ _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
2211
+ }
2212
+
2213
+ ne_iterator get_iter(size_type i, ColIterator col_it)
2214
+ {
2215
+ return ne_iterator(_first_group + group_num(i), col_it);
2216
+ }
2217
+
2218
+ // And the reverse transformation.
2219
+ size_type get_pos(const const_ne_iterator& it) const
2220
+ {
2221
+ difference_type current_row = it.row_current - _first_group;
2222
+ difference_type current_col = (it.col_current - _first_group[current_row].ne_begin());
2223
+ return ((current_row * SPP_GROUP_SIZE) +
2224
+ _first_group[current_row].offset_to_pos(current_col));
2225
+ }
2226
+
2227
+ // Val can be reference or const_reference
2228
+ // ---------------------------------------
2229
+ template <class Val>
2230
+ reference set(size_type i, Val &val)
2231
+ {
2232
+ assert(i < _table_size);
2233
+ group_type &group = which_group(i);
2234
+ typename group_type::size_type old_numbuckets = group.num_nonempty();
2235
+ pointer p(group.set(_alloc, pos_in_group(i), val));
2236
+ _num_buckets += group.num_nonempty() - old_numbuckets;
2237
+ return *p;
2238
+ }
2239
+
2240
+ // used in _move_from (where we can move the old value instead of copying it
2241
+ void move(size_type i, reference val)
2242
+ {
2243
+ assert(i < _table_size);
2244
+ which_group(i).set(_alloc, pos_in_group(i), val);
2245
+ ++_num_buckets;
2246
+ }
2247
+
2248
+ // This takes the specified elements out of the table.
2249
+ // --------------------------------------------------
2250
+ void erase(size_type i)
2251
+ {
2252
+ assert(i < _table_size);
2253
+
2254
+ GroupsReference grp(which_group(i));
2255
+ typename group_type::size_type old_numbuckets = grp.num_nonempty();
2256
+ grp.erase(_alloc, pos_in_group(i));
2257
+ _num_buckets += grp.num_nonempty() - old_numbuckets;
2258
+ }
2259
+
2260
+ void erase(iterator pos)
2261
+ {
2262
+ erase(pos.pos);
2263
+ }
2264
+
2265
+ void erase(iterator start_it, iterator end_it)
2266
+ {
2267
+ // This could be more efficient, but then we'd need to figure
2268
+ // out if we spanned groups or not. Doesn't seem worth it.
2269
+ for (; start_it != end_it; ++start_it)
2270
+ erase(start_it);
2271
+ }
2272
+
2273
+ const_ne_iterator erase(const_ne_iterator it)
2274
+ {
2275
+ ne_iterator res(it);
2276
+ if (res.row_current->erase_ne(_alloc, res))
2277
+ _num_buckets--;
2278
+ return res;
2279
+ }
2280
+
2281
+ const_ne_iterator erase(const_ne_iterator f, const_ne_iterator l)
2282
+ {
2283
+ size_t diff = l - f;
2284
+ while (diff--)
2285
+ f = erase(f);
2286
+ return f;
2287
+ }
2288
+
2289
+ // We support reading and writing tables to disk. We don't store
2290
+ // the actual array contents (which we don't know how to store),
2291
+ // just the groups and sizes. Returns true if all went ok.
2292
+
2293
+ private:
2294
+ // Every time the disk format changes, this should probably change too
2295
+ typedef unsigned long MagicNumberType;
2296
+ static const MagicNumberType MAGIC_NUMBER = 0x24687531;
2297
+
2298
+ // Old versions of this code write all data in 32 bits. We need to
2299
+ // support these files as well as having support for 64-bit systems.
2300
+ // So we use the following encoding scheme: for values < 2^32-1, we
2301
+ // store in 4 bytes in big-endian order. For values > 2^32, we
2302
+ // store 0xFFFFFFF followed by 8 bytes in big-endian order. This
2303
+ // causes us to mis-read old-version code that stores exactly
2304
+ // 0xFFFFFFF, but I don't think that is likely to have happened for
2305
+ // these particular values.
2306
+ template <typename OUTPUT, typename IntType>
2307
+ static bool write_32_or_64(OUTPUT* fp, IntType value)
2308
+ {
2309
+ if (value < 0xFFFFFFFFULL) // fits in 4 bytes
2310
+ {
2311
+ if (!sparsehash_internal::write_bigendian_number(fp, value, 4))
2312
+ return false;
2313
+ }
2314
+ else
2315
+ {
2316
+ if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4))
2317
+ return false;
2318
+ if (!sparsehash_internal::write_bigendian_number(fp, value, 8))
2319
+ return false;
2320
+ }
2321
+ return true;
2322
+ }
2323
+
2324
+ template <typename INPUT, typename IntType>
2325
+ static bool read_32_or_64(INPUT* fp, IntType *value)
2326
+ {
2327
+ // reads into value
2328
+ MagicNumberType first4 = 0; // a convenient 32-bit unsigned type
2329
+ if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4))
2330
+ return false;
2331
+
2332
+ if (first4 < 0xFFFFFFFFULL)
2333
+ {
2334
+ *value = first4;
2335
+ }
2336
+ else
2337
+ {
2338
+ if (!sparsehash_internal::read_bigendian_number(fp, value, 8))
2339
+ return false;
2340
+ }
2341
+ return true;
2342
+ }
2343
+
2344
+ public:
2345
+ // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
2346
+ // Use serialize() and unserialize(), below, for new code.
2347
+
2348
+ template <typename OUTPUT>
2349
+ bool write_metadata(OUTPUT *fp) const
2350
+ {
2351
+ if (!write_32_or_64(fp, MAGIC_NUMBER)) return false;
2352
+ if (!write_32_or_64(fp, _table_size)) return false;
2353
+ if (!write_32_or_64(fp, _num_buckets)) return false;
2354
+
2355
+ for (const group_type *group = _first_group; group != _last_group; ++group)
2356
+ if (group->write_metadata(fp) == false)
2357
+ return false;
2358
+ return true;
2359
+ }
2360
+
2361
+ // Reading destroys the old table contents! Returns true if read ok.
2362
+ template <typename INPUT>
2363
+ bool read_metadata(INPUT *fp)
2364
+ {
2365
+ size_type magic_read = 0;
2366
+ if (!read_32_or_64(fp, &magic_read)) return false;
2367
+ if (magic_read != MAGIC_NUMBER)
2368
+ {
2369
+ clear(); // just to be consistent
2370
+ return false;
2371
+ }
2372
+
2373
+ if (!read_32_or_64(fp, &_table_size)) return false;
2374
+ if (!read_32_or_64(fp, &_num_buckets)) return false;
2375
+
2376
+ resize(_table_size); // so the vector's sized ok
2377
+ for (group_type *group = _first_group; group != _last_group; ++group)
2378
+ if (group->read_metadata(_alloc, fp) == false)
2379
+ return false;
2380
+ return true;
2381
+ }
2382
+
2383
+ // This code is identical to that for SparseGroup
2384
+ // If your keys and values are simple enough, we can write them
2385
+ // to disk for you. "simple enough" means no pointers.
2386
+ // However, we don't try to normalize endianness
2387
+ bool write_nopointer_data(FILE *fp) const
2388
+ {
2389
+ for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
2390
+ if (!fwrite(&*it, sizeof(*it), 1, fp))
2391
+ return false;
2392
+ return true;
2393
+ }
2394
+
2395
+ // When reading, we have to override the potential const-ness of *it
2396
+ bool read_nopointer_data(FILE *fp)
2397
+ {
2398
+ for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
2399
+ if (!fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp))
2400
+ return false;
2401
+ return true;
2402
+ }
2403
+
2404
+ // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
2405
+ // (istream, ostream, etc) *or* a class providing
2406
+ // Read(void*, size_t) and Write(const void*, size_t)
2407
+ // (respectively), which writes a buffer into a stream
2408
+ // (which the INPUT/OUTPUT instance presumably owns).
2409
+
2410
+ typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
2411
+
2412
+ // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
2413
+ template <typename ValueSerializer, typename OUTPUT>
2414
+ bool serialize(ValueSerializer serializer, OUTPUT *fp)
2415
+ {
2416
+ if (!write_metadata(fp))
2417
+ return false;
2418
+ for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
2419
+ if (!serializer(fp, *it))
2420
+ return false;
2421
+ return true;
2422
+ }
2423
+
2424
+ // ValueSerializer: a functor. operator()(INPUT*, value_type*)
2425
+ template <typename ValueSerializer, typename INPUT>
2426
+ bool unserialize(ValueSerializer serializer, INPUT *fp)
2427
+ {
2428
+ clear();
2429
+ if (!read_metadata(fp))
2430
+ return false;
2431
+ for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
2432
+ if (!serializer(fp, &*it))
2433
+ return false;
2434
+ return true;
2435
+ }
2436
+
2437
+ // Comparisons. Note the comparisons are pretty arbitrary: we
2438
+ // compare values of the first index that isn't equal (using default
2439
+ // value for empty buckets).
2440
+ bool operator==(const sparsetable& x) const
2441
+ {
2442
+ return (_table_size == x._table_size &&
2443
+ _num_buckets == x._num_buckets &&
2444
+ _first_group == x._first_group);
2445
+ }
2446
+
2447
+ bool operator<(const sparsetable& x) const
2448
+ {
2449
+ return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
2450
+ }
2451
+ bool operator!=(const sparsetable& x) const { return !(*this == x); }
2452
+ bool operator<=(const sparsetable& x) const { return !(x < *this); }
2453
+ bool operator>(const sparsetable& x) const { return x < *this; }
2454
+ bool operator>=(const sparsetable& x) const { return !(*this < x); }
2455
+
2456
+
2457
+ private:
2458
+ // The actual data
2459
+ // ---------------
2460
+ group_type * _first_group;
2461
+ group_type * _last_group;
2462
+ size_type _table_size; // how many buckets they want
2463
+ size_type _num_buckets; // number of non-empty buckets
2464
+ group_alloc_type _group_alloc;
2465
+ allocator_type _alloc;
2466
+ };
2467
+
2468
+ // ----------------------------------------------------------------------
2469
+ // S P A R S E _ H A S H T A B L E
2470
+ // ----------------------------------------------------------------------
2471
+ // Hashtable class, used to implement the hashed associative containers
2472
+ // hash_set and hash_map.
2473
+ //
2474
+ // Value: what is stored in the table (each bucket is a Value).
2475
+ // Key: something in a 1-to-1 correspondence to a Value, that can be used
2476
+ // to search for a Value in the table (find() takes a Key).
2477
+ // HashFcn: Takes a Key and returns an integer, the more unique the better.
2478
+ // ExtractKey: given a Value, returns the unique Key associated with it.
2479
+ // Must inherit from unary_function, or at least have a
2480
+ // result_type enum indicating the return type of operator().
2481
+ // EqualKey: Given two Keys, says whether they are the same (that is,
2482
+ // if they are both associated with the same Value).
2483
+ // Alloc: STL allocator to use to allocate memory.
2484
+ //
2485
+ // ----------------------------------------------------------------------
2486
+
2487
+ // The probing method
2488
+ // ------------------
2489
+ // Linear probing
2490
+ // #define JUMP_(key, num_probes) ( 1 )
2491
+ // Quadratic probing
2492
+ #define JUMP_(key, num_probes) ( num_probes )
2493
+
2494
+
2495
+ // -------------------------------------------------------------------
2496
+ // -------------------------------------------------------------------
2497
+ template <class Value, class Key, class HashFcn,
2498
+ class ExtractKey, class SetKey, class EqualKey, class Alloc>
2499
+ class sparse_hashtable
2500
+ {
2501
+ public:
2502
+ typedef Key key_type;
2503
+ typedef Value value_type;
2504
+ typedef HashFcn hasher; // user provided or spp_hash<Key>
2505
+ typedef EqualKey key_equal;
2506
+ typedef Alloc allocator_type;
2507
+
2508
+ typedef typename allocator_type::size_type size_type;
2509
+ typedef typename allocator_type::difference_type difference_type;
2510
+ typedef value_type& reference;
2511
+ typedef const value_type& const_reference;
2512
+ typedef value_type* pointer;
2513
+ typedef const value_type* const_pointer;
2514
+
2515
+ // Table is the main storage class.
2516
+ typedef sparsetable<value_type, allocator_type> Table;
2517
+ typedef typename Table::ne_iterator ne_it;
2518
+ typedef typename Table::const_ne_iterator cne_it;
2519
+ typedef typename Table::destructive_iterator dest_it;
2520
+ typedef typename Table::ColIterator ColIterator;
2521
+
2522
+ typedef ne_it iterator;
2523
+ typedef cne_it const_iterator;
2524
+ typedef dest_it destructive_iterator;
2525
+
2526
+ // These come from tr1. For us they're the same as regular iterators.
2527
+ // -------------------------------------------------------------------
2528
+ typedef iterator local_iterator;
2529
+ typedef const_iterator const_local_iterator;
2530
+
2531
+ // How full we let the table get before we resize
2532
+ // ----------------------------------------------
2533
+ static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
2534
+
2535
+ // How empty we let the table get before we resize lower, by default.
2536
+ // (0.0 means never resize lower.)
2537
+ // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
2538
+ // ------------------------------------------------------------------
2539
+ static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
2540
+
2541
+ // Minimum size we're willing to let hashtables be.
2542
+ // Must be a power of two, and at least 4.
2543
+ // Note, however, that for a given hashtable, the initial size is a
2544
+ // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
2545
+ // ------------------------------------------------------------------
2546
+ static const size_type HT_MIN_BUCKETS = 4;
2547
+
2548
+ // By default, if you don't specify a hashtable size at
2549
+ // construction-time, we use this size. Must be a power of two, and
2550
+ // at least HT_MIN_BUCKETS.
2551
+ // -----------------------------------------------------------------
2552
+ static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
2553
+
2554
+ // iterators
2555
+ // ---------
2556
+ iterator begin() { return _mk_iterator(table.ne_begin()); }
2557
+ iterator end() { return _mk_iterator(table.ne_end()); }
2558
+ const_iterator begin() const { return _mk_const_iterator(table.ne_cbegin()); }
2559
+ const_iterator end() const { return _mk_const_iterator(table.ne_cend()); }
2560
+ const_iterator cbegin() const { return _mk_const_iterator(table.ne_cbegin()); }
2561
+ const_iterator cend() const { return _mk_const_iterator(table.ne_cend()); }
2562
+
2563
+ // These come from tr1 unordered_map. They iterate over 'bucket' n.
2564
+ // For sparsehashtable, we could consider each 'group' to be a bucket,
2565
+ // I guess, but I don't really see the point. We'll just consider
2566
+ // bucket n to be the n-th element of the sparsetable, if it's occupied,
2567
+ // or some empty element, otherwise.
2568
+ // ---------------------------------------------------------------------
2569
+ local_iterator begin(size_type i)
2570
+ {
2571
+ return _mk_iterator(table.test(i) ? table.get_iter(i) : table.ne_end());
2572
+ }
2573
+
2574
+ local_iterator end(size_type i)
2575
+ {
2576
+ local_iterator it = begin(i);
2577
+ if (table.test(i))
2578
+ ++it;
2579
+ return _mk_iterator(it);
2580
+ }
2581
+
2582
+ const_local_iterator begin(size_type i) const
2583
+ {
2584
+ return _mk_const_iterator(table.test(i) ? table.get_iter(i) : table.ne_cend());
2585
+ }
2586
+
2587
+ const_local_iterator end(size_type i) const
2588
+ {
2589
+ const_local_iterator it = begin(i);
2590
+ if (table.test(i))
2591
+ ++it;
2592
+ return _mk_const_iterator(it);
2593
+ }
2594
+
2595
+ const_local_iterator cbegin(size_type i) const { return begin(i); }
2596
+ const_local_iterator cend(size_type i) const { return end(i); }
2597
+
2598
+ // This is used when resizing
2599
+ // --------------------------
2600
+ destructive_iterator destructive_begin() { return _mk_destructive_iterator(table.destructive_begin()); }
2601
+ destructive_iterator destructive_end() { return _mk_destructive_iterator(table.destructive_end()); }
2602
+
2603
+
2604
+ // accessor functions for the things we templatize on, basically
2605
+ // -------------------------------------------------------------
2606
+ hasher hash_funct() const { return settings; }
2607
+ key_equal key_eq() const { return key_info; }
2608
+ allocator_type get_allocator() const { return table.get_allocator(); }
2609
+
2610
+ // Accessor function for statistics gathering.
2611
+ unsigned int num_table_copies() const { return settings.num_ht_copies(); }
2612
+
2613
+ private:
2614
+ // This is used as a tag for the copy constructor, saying to destroy its
2615
+ // arg We have two ways of destructively copying: with potentially growing
2616
+ // the hashtable as we copy, and without. To make sure the outside world
2617
+ // can't do a destructive copy, we make the typename private.
2618
+ // -----------------------------------------------------------------------
2619
+ enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
2620
+
2621
+ void _squash_deleted()
2622
+ {
2623
+ // gets rid of any deleted entries we have
2624
+ // ---------------------------------------
2625
+ if (num_deleted)
2626
+ {
2627
+ // get rid of deleted before writing
2628
+ sparse_hashtable tmp(MoveDontGrow, *this);
2629
+ swap(tmp); // now we are tmp
2630
+ }
2631
+ assert(num_deleted == 0);
2632
+ }
2633
+
2634
+ // creating iterators from sparsetable::ne_iterators
2635
+ // -------------------------------------------------
2636
+ iterator _mk_iterator(ne_it it) const { return it; }
2637
+ const_iterator _mk_const_iterator(cne_it it) const { return it; }
2638
+ destructive_iterator _mk_destructive_iterator(dest_it it) const { return it; }
2639
+
2640
+ public:
2641
+ size_type size() const { return table.num_nonempty(); }
2642
+ size_type max_size() const { return table.max_size(); }
2643
+ bool empty() const { return size() == 0; }
2644
+ size_type bucket_count() const { return table.size(); }
2645
+ size_type max_bucket_count() const { return max_size(); }
2646
+ // These are tr1 methods. Their idea of 'bucket' doesn't map well to
2647
+ // what we do. We just say every bucket has 0 or 1 items in it.
2648
+ size_type bucket_size(size_type i) const
2649
+ {
2650
+ return (size_type)(begin(i) == end(i) ? 0 : 1);
2651
+ }
2652
+
2653
+ private:
2654
+ // Because of the above, size_type(-1) is never legal; use it for errors
2655
+ // ---------------------------------------------------------------------
2656
+ static const size_type ILLEGAL_BUCKET = size_type(-1);
2657
+
2658
+ // Used after a string of deletes. Returns true if we actually shrunk.
2659
+ // TODO(csilvers): take a delta so we can take into account inserts
2660
+ // done after shrinking. Maybe make part of the Settings class?
2661
+ // --------------------------------------------------------------------
2662
+ bool _maybe_shrink()
2663
+ {
2664
+ assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
2665
+ assert(bucket_count() >= HT_MIN_BUCKETS);
2666
+ bool retval = false;
2667
+
2668
+ // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
2669
+ // we'll never shrink until you get relatively big, and we'll never
2670
+ // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
2671
+ // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
2672
+ // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
2673
+ // ---------------------------------------------------------------
2674
+ const size_type num_remain = table.num_nonempty();
2675
+ const size_type shrink_threshold = settings.shrink_threshold();
2676
+ if (shrink_threshold > 0 && num_remain < shrink_threshold &&
2677
+ bucket_count() > HT_DEFAULT_STARTING_BUCKETS)
2678
+ {
2679
+ const float shrink_factor = settings.shrink_factor();
2680
+ size_type sz = (size_type)(bucket_count() / 2); // find how much we should shrink
2681
+ while (sz > HT_DEFAULT_STARTING_BUCKETS &&
2682
+ num_remain < static_cast<size_type>(sz * shrink_factor))
2683
+ {
2684
+ sz /= 2; // stay a power of 2
2685
+ }
2686
+ sparse_hashtable tmp(MoveDontCopy, *this, sz);
2687
+ swap(tmp); // now we are tmp
2688
+ retval = true;
2689
+ }
2690
+ settings.set_consider_shrink(false); // because we just considered it
2691
+ return retval;
2692
+ }
2693
+
2694
+ // We'll let you resize a hashtable -- though this makes us copy all!
2695
+ // When you resize, you say, "make it big enough for this many more elements"
2696
+ // Returns true if we actually resized, false if size was already ok.
2697
+ // --------------------------------------------------------------------------
2698
+ bool _resize_delta(size_type delta)
2699
+ {
2700
+ bool did_resize = false;
2701
+ if (settings.consider_shrink())
2702
+ {
2703
+ // see if lots of deletes happened
2704
+ if (_maybe_shrink())
2705
+ did_resize = true;
2706
+ }
2707
+ if (table.num_nonempty() >=
2708
+ (std::numeric_limits<size_type>::max)() - delta)
2709
+ {
2710
+ throw_exception(std::length_error("resize overflow"));
2711
+ }
2712
+
2713
+ size_type num_occupied = (size_type)(table.num_nonempty() + num_deleted);
2714
+
2715
+ if (bucket_count() >= HT_MIN_BUCKETS &&
2716
+ (num_occupied + delta) <= settings.enlarge_threshold())
2717
+ return did_resize; // we're ok as we are
2718
+
2719
+ // Sometimes, we need to resize just to get rid of all the
2720
+ // "deleted" buckets that are clogging up the hashtable. So when
2721
+ // deciding whether to resize, count the deleted buckets (which
2722
+ // are currently taking up room).
2723
+ // -------------------------------------------------------------
2724
+ const size_type needed_size =
2725
+ settings.min_buckets((size_type)(num_occupied + delta), (size_type)0);
2726
+
2727
+ if (needed_size <= bucket_count()) // we have enough buckets
2728
+ return did_resize;
2729
+
2730
+ size_type resize_to = settings.min_buckets((size_type)(num_occupied + delta), bucket_count());
2731
+
2732
+ if (resize_to < needed_size && // may double resize_to
2733
+ resize_to < (std::numeric_limits<size_type>::max)() / 2)
2734
+ {
2735
+ // This situation means that we have enough deleted elements,
2736
+ // that once we purge them, we won't actually have needed to
2737
+ // grow. But we may want to grow anyway: if we just purge one
2738
+ // element, say, we'll have to grow anyway next time we
2739
+ // insert. Might as well grow now, since we're already going
2740
+ // through the trouble of copying (in order to purge the
2741
+ // deleted elements).
2742
+ const size_type target =
2743
+ static_cast<size_type>(settings.shrink_size((size_type)(resize_to*2)));
2744
+ if (table.num_nonempty() + delta >= target)
2745
+ {
2746
+ // Good, we won't be below the shrink threshhold even if we double.
2747
+ resize_to *= 2;
2748
+ }
2749
+ }
2750
+
2751
+ sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
2752
+ swap(tmp); // now we are tmp
2753
+ return true;
2754
+ }
2755
+
2756
+ // Used to actually do the rehashing when we grow/shrink a hashtable
2757
+ // -----------------------------------------------------------------
2758
+ void _copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted)
2759
+ {
2760
+ clear(); // clear table, set num_deleted to 0
2761
+
2762
+ // If we need to change the size of our table, do it now
2763
+ const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
2764
+
2765
+ if (resize_to > bucket_count())
2766
+ {
2767
+ // we don't have enough buckets
2768
+ table.resize(resize_to); // sets the number of buckets
2769
+ settings.reset_thresholds(bucket_count());
2770
+ }
2771
+
2772
+ // We use a normal iterator to get bcks from ht
2773
+ // We could use insert() here, but since we know there are
2774
+ // no duplicates, we can be more efficient
2775
+ assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
2776
+ for (const_iterator it = ht.begin(); it != ht.end(); ++it)
2777
+ {
2778
+ size_type num_probes = 0; // how many times we've probed
2779
+ size_type bucknum;
2780
+ const size_type bucket_count_minus_one = bucket_count() - 1;
2781
+ for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
2782
+ table.test(bucknum); // table.test() OK since no erase()
2783
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one)
2784
+ {
2785
+ ++num_probes;
2786
+ assert(num_probes < bucket_count()
2787
+ && "Hashtable is full: an error in key_equal<> or hash<>");
2788
+ }
2789
+ table.set(bucknum, *it); // copies the value to here
2790
+ }
2791
+ settings.inc_num_ht_copies();
2792
+ }
2793
+
2794
+ // Implementation is like _copy_from, but it destroys the table of the
2795
+ // "from" guy by freeing sparsetable memory as we iterate. This is
2796
+ // useful in resizing, since we're throwing away the "from" guy anyway.
2797
+ // --------------------------------------------------------------------
2798
+ void _move_from(MoveDontCopyT mover, sparse_hashtable &ht,
2799
+ size_type min_buckets_wanted)
2800
+ {
2801
+ clear();
2802
+
2803
+ // If we need to change the size of our table, do it now
2804
+ size_type resize_to;
2805
+ if (mover == MoveDontGrow)
2806
+ resize_to = ht.bucket_count(); // keep same size as old ht
2807
+ else // MoveDontCopy
2808
+ resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
2809
+ if (resize_to > bucket_count())
2810
+ {
2811
+ // we don't have enough buckets
2812
+ table.resize(resize_to); // sets the number of buckets
2813
+ settings.reset_thresholds(bucket_count());
2814
+ }
2815
+
2816
+ // We use a normal iterator to get bcks from ht
2817
+ // We could use insert() here, but since we know there are
2818
+ // no duplicates, we can be more efficient
2819
+ assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
2820
+ const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
2821
+
2822
+ // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
2823
+ for (destructive_iterator it = ht.destructive_begin();
2824
+ it != ht.destructive_end(); ++it)
2825
+ {
2826
+ size_type num_probes = 0;
2827
+ size_type bucknum;
2828
+ for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
2829
+ table.test(bucknum); // table.test() OK since no erase()
2830
+ bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & (bucket_count()-1)))
2831
+ {
2832
+ ++num_probes;
2833
+ assert(num_probes < bucket_count()
2834
+ && "Hashtable is full: an error in key_equal<> or hash<>");
2835
+ }
2836
+ table.move(bucknum, *it); // moves the value to here
2837
+ }
2838
+ settings.inc_num_ht_copies();
2839
+ }
2840
+
2841
+
2842
+ // Required by the spec for hashed associative container
2843
+ public:
2844
+ // Though the docs say this should be num_buckets, I think it's much
2845
+ // more useful as num_elements. As a special feature, calling with
2846
+ // req_elements==0 will cause us to shrink if we can, saving space.
2847
+ // -----------------------------------------------------------------
2848
+ void resize(size_type req_elements)
2849
+ {
2850
+ // resize to this or larger
2851
+ if (settings.consider_shrink() || req_elements == 0)
2852
+ _maybe_shrink();
2853
+ if (req_elements > table.num_nonempty()) // we only grow
2854
+ _resize_delta((size_type)(req_elements - table.num_nonempty()));
2855
+ }
2856
+
2857
+ // Get and change the value of shrink_factor and enlarge_factor. The
2858
+ // description at the beginning of this file explains how to choose
2859
+ // the values. Setting the shrink parameter to 0.0 ensures that the
2860
+ // table never shrinks.
2861
+ // ------------------------------------------------------------------
2862
+ void get_resizing_parameters(float* shrink, float* grow) const
2863
+ {
2864
+ *shrink = settings.shrink_factor();
2865
+ *grow = settings.enlarge_factor();
2866
+ }
2867
+
2868
+ float get_shrink_factor() const { return settings.shrink_factor(); }
2869
+ float get_enlarge_factor() const { return settings.enlarge_factor(); }
2870
+
2871
+ void set_resizing_parameters(float shrink, float grow)
2872
+ {
2873
+ settings.set_resizing_parameters(shrink, grow);
2874
+ settings.reset_thresholds(bucket_count());
2875
+ }
2876
+
2877
+ void set_shrink_factor(float shrink)
2878
+ {
2879
+ set_resizing_parameters(shrink, get_enlarge_factor());
2880
+ }
2881
+
2882
+ void set_enlarge_factor(float grow)
2883
+ {
2884
+ set_resizing_parameters(get_shrink_factor(), grow);
2885
+ }
2886
+
2887
+ // CONSTRUCTORS -- as required by the specs, we take a size,
2888
+ // but also let you specify a hashfunction, key comparator,
2889
+ // and key extractor. We also define a copy constructor and =.
2890
+ // DESTRUCTOR -- the default is fine, surprisingly.
2891
+ // ------------------------------------------------------------
2892
+ explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
2893
+ const HashFcn& hf = HashFcn(),
2894
+ const EqualKey& eql = EqualKey(),
2895
+ const ExtractKey& ext = ExtractKey(),
2896
+ const SetKey& set = SetKey(),
2897
+ const allocator_type& alloc = allocator_type())
2898
+ : settings(hf),
2899
+ key_info(ext, set, eql),
2900
+ num_deleted(0),
2901
+ table((expected_max_items_in_table == 0
2902
+ ? HT_DEFAULT_STARTING_BUCKETS
2903
+ : settings.min_buckets(expected_max_items_in_table, 0)),
2904
+ alloc)
2905
+ {
2906
+ settings.reset_thresholds(bucket_count());
2907
+ }
2908
+
2909
+ // As a convenience for resize(), we allow an optional second argument
2910
+ // which lets you make this new hashtable a different size than ht.
2911
+ // We also provide a mechanism of saying you want to "move" the ht argument
2912
+ // into us instead of copying.
2913
+ // ------------------------------------------------------------------------
2914
+ sparse_hashtable(const sparse_hashtable& ht,
2915
+ size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
2916
+ : settings(ht.settings),
2917
+ key_info(ht.key_info),
2918
+ num_deleted(0),
2919
+ table(0)
2920
+ {
2921
+ settings.reset_thresholds(bucket_count());
2922
+ _copy_from(ht, min_buckets_wanted);
2923
+ }
2924
+
2925
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
2926
+
2927
+ sparse_hashtable(sparse_hashtable&& o) :
2928
+ settings(std::move(o.settings)),
2929
+ key_info(std::move(o.key_info)),
2930
+ num_deleted(o.num_deleted),
2931
+ table(std::move(o.table))
2932
+ {
2933
+ }
2934
+
2935
+ sparse_hashtable(sparse_hashtable&& o, const allocator_type& alloc) :
2936
+ settings(std::move(o.settings)),
2937
+ key_info(std::move(o.key_info)),
2938
+ num_deleted(o.num_deleted),
2939
+ table(std::move(o.table), alloc)
2940
+ {
2941
+ }
2942
+
2943
+ sparse_hashtable& operator=(sparse_hashtable&& o)
2944
+ {
2945
+ using std::swap;
2946
+
2947
+ sparse_hashtable tmp(std::move(o));
2948
+ swap(tmp, *this);
2949
+ return *this;
2950
+ }
2951
+ #endif
2952
+
2953
+ sparse_hashtable(MoveDontCopyT mover,
2954
+ sparse_hashtable& ht,
2955
+ size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
2956
+ : settings(ht.settings),
2957
+ key_info(ht.key_info),
2958
+ num_deleted(0),
2959
+ table(min_buckets_wanted, ht.table.get_allocator())
2960
+ //table(min_buckets_wanted)
2961
+ {
2962
+ settings.reset_thresholds(bucket_count());
2963
+ _move_from(mover, ht, min_buckets_wanted);
2964
+ }
2965
+
2966
+ sparse_hashtable& operator=(const sparse_hashtable& ht)
2967
+ {
2968
+ if (&ht == this)
2969
+ return *this; // don't copy onto ourselves
2970
+ settings = ht.settings;
2971
+ key_info = ht.key_info;
2972
+ num_deleted = ht.num_deleted;
2973
+
2974
+ // _copy_from() calls clear and sets num_deleted to 0 too
2975
+ _copy_from(ht, HT_MIN_BUCKETS);
2976
+
2977
+ // we purposefully don't copy the allocator, which may not be copyable
2978
+ return *this;
2979
+ }
2980
+
2981
+ // Many STL algorithms use swap instead of copy constructors
2982
+ void swap(sparse_hashtable& ht)
2983
+ {
2984
+ using std::swap;
2985
+
2986
+ swap(settings, ht.settings);
2987
+ swap(key_info, ht.key_info);
2988
+ swap(num_deleted, ht.num_deleted);
2989
+ table.swap(ht.table);
2990
+ settings.reset_thresholds(bucket_count()); // also resets consider_shrink
2991
+ ht.settings.reset_thresholds(ht.bucket_count());
2992
+ // we purposefully don't swap the allocator, which may not be swap-able
2993
+ }
2994
+
2995
+ // It's always nice to be able to clear a table without deallocating it
2996
+ void clear()
2997
+ {
2998
+ if (!empty() || num_deleted != 0)
2999
+ {
3000
+ table.clear();
3001
+ table = Table(HT_DEFAULT_STARTING_BUCKETS);
3002
+ }
3003
+ settings.reset_thresholds(bucket_count());
3004
+ num_deleted = 0;
3005
+ }
3006
+
3007
+ // LOOKUP ROUTINES
3008
+ private:
3009
+
3010
+ enum pos_type { pt_empty = 0, pt_erased, pt_full };
3011
+ // -------------------------------------------------------------------
3012
+ class Position
3013
+ {
3014
+ public:
3015
+
3016
+ Position() : _t(pt_empty) {}
3017
+ Position(pos_type t, size_type idx) : _t(t), _idx(idx) {}
3018
+
3019
+ pos_type _t;
3020
+ size_type _idx;
3021
+ };
3022
+
3023
+ // Returns a pair:
3024
+ // - 'first' is a code, 2 if key already present, 0 or 1 otherwise.
3025
+ // - 'second' is a position, where the key should go
3026
+ // Note: because of deletions where-to-insert is not trivial: it's the
3027
+ // first deleted bucket we see, as long as we don't find the key later
3028
+ // -------------------------------------------------------------------
3029
+ Position _find_position(const key_type &key) const
3030
+ {
3031
+ size_type num_probes = 0; // how many times we've probed
3032
+ const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
3033
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3034
+ Position pos;
3035
+
3036
+ while (1)
3037
+ {
3038
+ // probe until something happens
3039
+ // -----------------------------
3040
+ typename Table::GrpPos grp_pos(table, bucknum);
3041
+
3042
+ if (!grp_pos.test_strict())
3043
+ {
3044
+ // bucket is empty => key not present
3045
+ return pos._t ? pos : Position(pt_empty, bucknum);
3046
+ }
3047
+ else if (grp_pos.test())
3048
+ {
3049
+ reference ref(grp_pos.unsafe_get());
3050
+
3051
+ if (equals(key, get_key(ref)))
3052
+ return Position(pt_full, bucknum);
3053
+ }
3054
+ else if (pos._t == pt_empty)
3055
+ {
3056
+ // first erased position
3057
+ pos._t = pt_erased;
3058
+ pos._idx = bucknum;
3059
+ }
3060
+
3061
+ ++num_probes; // we're doing another probe
3062
+ bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one);
3063
+ assert(num_probes < bucket_count()
3064
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3065
+ }
3066
+ }
3067
+
3068
+ public:
3069
+ // I hate to duplicate find() like that, but it is
3070
+ // significantly faster to not have the intermediate pair
3071
+ // ------------------------------------------------------------------
3072
+ iterator find(const key_type& key)
3073
+ {
3074
+ size_type num_probes = 0; // how many times we've probed
3075
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3076
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3077
+
3078
+ while (1) // probe until something happens
3079
+ {
3080
+ typename Table::GrpPos grp_pos(table, bucknum);
3081
+
3082
+ if (!grp_pos.test_strict())
3083
+ return end(); // bucket is empty
3084
+ if (grp_pos.test())
3085
+ {
3086
+ reference ref(grp_pos.unsafe_get());
3087
+
3088
+ if (equals(key, get_key(ref)))
3089
+ return grp_pos.get_iter(ref);
3090
+ }
3091
+ ++num_probes; // we're doing another probe
3092
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3093
+ assert(num_probes < bucket_count()
3094
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3095
+ }
3096
+ }
3097
+
3098
+ // Wish I could avoid the duplicate find() const and non-const.
3099
+ // ------------------------------------------------------------
3100
+ const_iterator find(const key_type& key) const
3101
+ {
3102
+ size_type num_probes = 0; // how many times we've probed
3103
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3104
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3105
+
3106
+ while (1) // probe until something happens
3107
+ {
3108
+ typename Table::GrpPos grp_pos(table, bucknum);
3109
+
3110
+ if (!grp_pos.test_strict())
3111
+ return end(); // bucket is empty
3112
+ else if (grp_pos.test())
3113
+ {
3114
+ reference ref(grp_pos.unsafe_get());
3115
+
3116
+ if (equals(key, get_key(ref)))
3117
+ return _mk_const_iterator(table.get_iter(bucknum, &ref));
3118
+ }
3119
+ ++num_probes; // we're doing another probe
3120
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3121
+ assert(num_probes < bucket_count()
3122
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3123
+ }
3124
+ }
3125
+
3126
+ // This is a tr1 method: the bucket a given key is in, or what bucket
3127
+ // it would be put in, if it were to be inserted. Shrug.
3128
+ // ------------------------------------------------------------------
3129
+ size_type bucket(const key_type& key) const
3130
+ {
3131
+ Position pos = _find_position(key);
3132
+ return pos._idx;
3133
+ }
3134
+
3135
+ // Counts how many elements have key key. For maps, it's either 0 or 1.
3136
+ // ---------------------------------------------------------------------
3137
+ size_type count(const key_type &key) const
3138
+ {
3139
+ Position pos = _find_position(key);
3140
+ return (size_type)(pos._t == pt_full ? 1 : 0);
3141
+ }
3142
+
3143
+ // Likewise, equal_range doesn't really make sense for us. Oh well.
3144
+ // -----------------------------------------------------------------
3145
+ std::pair<iterator,iterator> equal_range(const key_type& key)
3146
+ {
3147
+ iterator pos = find(key); // either an iterator or end
3148
+ if (pos == end())
3149
+ return std::pair<iterator,iterator>(pos, pos);
3150
+ else
3151
+ {
3152
+ const iterator startpos = pos++;
3153
+ return std::pair<iterator,iterator>(startpos, pos);
3154
+ }
3155
+ }
3156
+
3157
+ std::pair<const_iterator,const_iterator> equal_range(const key_type& key) const
3158
+ {
3159
+ const_iterator pos = find(key); // either an iterator or end
3160
+ if (pos == end())
3161
+ return std::pair<const_iterator,const_iterator>(pos, pos);
3162
+ else
3163
+ {
3164
+ const const_iterator startpos = pos++;
3165
+ return std::pair<const_iterator,const_iterator>(startpos, pos);
3166
+ }
3167
+ }
3168
+
3169
+
3170
+ // INSERTION ROUTINES
3171
+ private:
3172
+ // Private method used by insert_noresize and find_or_insert.
3173
+ template <class T>
3174
+ reference _insert_at(T& obj, size_type pos, bool erased)
3175
+ {
3176
+ if (size() >= max_size())
3177
+ {
3178
+ throw_exception(std::length_error("insert overflow"));
3179
+ }
3180
+ if (erased)
3181
+ {
3182
+ assert(num_deleted);
3183
+ --num_deleted;
3184
+ }
3185
+ return table.set(pos, obj);
3186
+ }
3187
+
3188
+ // If you know *this is big enough to hold obj, use this routine
3189
+ template <class T>
3190
+ std::pair<iterator, bool> _insert_noresize(T& obj)
3191
+ {
3192
+ Position pos = _find_position(get_key(obj));
3193
+ bool already_there = (pos._t == pt_full);
3194
+
3195
+ if (!already_there)
3196
+ {
3197
+ reference ref(_insert_at(obj, pos._idx, pos._t == pt_erased));
3198
+ return std::pair<iterator, bool>(_mk_iterator(table.get_iter(pos._idx, &ref)), true);
3199
+ }
3200
+ return std::pair<iterator,bool>(_mk_iterator(table.get_iter(pos._idx)), false);
3201
+ }
3202
+
3203
+ // Specializations of insert(it, it) depending on the power of the iterator:
3204
+ // (1) Iterator supports operator-, resize before inserting
3205
+ template <class ForwardIterator>
3206
+ void _insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag /*unused*/)
3207
+ {
3208
+ int64_t dist = std::distance(f, l);
3209
+ if (dist < 0 || static_cast<size_t>(dist) >= (std::numeric_limits<size_type>::max)())
3210
+ throw_exception(std::length_error("insert-range overflow"));
3211
+
3212
+ _resize_delta(static_cast<size_type>(dist));
3213
+
3214
+ for (; dist > 0; --dist, ++f)
3215
+ _insert_noresize(*f);
3216
+ }
3217
+
3218
+ // (2) Arbitrary iterator, can't tell how much to resize
3219
+ template <class InputIterator>
3220
+ void _insert(InputIterator f, InputIterator l, std::input_iterator_tag /*unused*/)
3221
+ {
3222
+ for (; f != l; ++f)
3223
+ _insert(*f);
3224
+ }
3225
+
3226
+ public:
3227
+
3228
+ #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
3229
+ template <class... Args>
3230
+ std::pair<iterator, bool> emplace(Args&&... args)
3231
+ {
3232
+ _resize_delta(1);
3233
+ value_type obj(std::forward<Args>(args)...);
3234
+ return _insert_noresize(obj);
3235
+ }
3236
+ #endif
3237
+
3238
+ // This is the normal insert routine, used by the outside world
3239
+ std::pair<iterator, bool> insert(const_reference obj)
3240
+ {
3241
+ _resize_delta(1); // adding an object, grow if need be
3242
+ return _insert_noresize(obj);
3243
+ }
3244
+
3245
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
3246
+ template< class P >
3247
+ std::pair<iterator, bool> insert(P &&obj)
3248
+ {
3249
+ _resize_delta(1); // adding an object, grow if need be
3250
+ value_type val(std::forward<value_type>(obj));
3251
+ return _insert_noresize(val);
3252
+ }
3253
+ #endif
3254
+
3255
+ // When inserting a lot at a time, we specialize on the type of iterator
3256
+ template <class InputIterator>
3257
+ void insert(InputIterator f, InputIterator l)
3258
+ {
3259
+ // specializes on iterator type
3260
+ _insert(f, l,
3261
+ typename std::iterator_traits<InputIterator>::iterator_category());
3262
+ }
3263
+
3264
+ // DefaultValue is a functor that takes a key and returns a value_type
3265
+ // representing the default value to be inserted if none is found.
3266
+ template <class DefaultValue>
3267
+ value_type& find_or_insert(const key_type& key)
3268
+ {
3269
+ size_type num_probes = 0; // how many times we've probed
3270
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3271
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3272
+ DefaultValue default_value;
3273
+ size_type erased_pos = 0;
3274
+ bool erased = false;
3275
+
3276
+ while (1) // probe until something happens
3277
+ {
3278
+ typename Table::GrpPos grp_pos(table, bucknum);
3279
+
3280
+ if (!grp_pos.test_strict())
3281
+ {
3282
+ // not found
3283
+ if (_resize_delta(1))
3284
+ {
3285
+ // needed to rehash to make room
3286
+ // Since we resized, we can't use pos, so recalculate where to insert.
3287
+ value_type def(default_value(key));
3288
+ return *(_insert_noresize(def).first);
3289
+ }
3290
+ else
3291
+ {
3292
+ // no need to rehash, insert right here
3293
+ value_type def(default_value(key));
3294
+ return _insert_at(def, erased ? erased_pos : bucknum, erased);
3295
+ }
3296
+ }
3297
+ if (grp_pos.test())
3298
+ {
3299
+ reference ref(grp_pos.unsafe_get());
3300
+
3301
+ if (equals(key, get_key(ref)))
3302
+ return ref;
3303
+ }
3304
+ else if (!erased)
3305
+ {
3306
+ // first erased position
3307
+ erased_pos = bucknum;
3308
+ erased = true;
3309
+ }
3310
+
3311
+ ++num_probes; // we're doing another probe
3312
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3313
+ assert(num_probes < bucket_count()
3314
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3315
+ }
3316
+ }
3317
+
3318
+ size_type erase(const key_type& key)
3319
+ {
3320
+ size_type num_probes = 0; // how many times we've probed
3321
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3322
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3323
+
3324
+ while (1) // probe until something happens
3325
+ {
3326
+ typename Table::GrpPos grp_pos(table, bucknum);
3327
+
3328
+ if (!grp_pos.test_strict())
3329
+ return 0; // bucket is empty, we deleted nothing
3330
+ if (grp_pos.test())
3331
+ {
3332
+ reference ref(grp_pos.unsafe_get());
3333
+
3334
+ if (equals(key, get_key(ref)))
3335
+ {
3336
+ grp_pos.erase(table);
3337
+ ++num_deleted;
3338
+ settings.set_consider_shrink(true); // will think about shrink after next insert
3339
+ return 1; // because we deleted one thing
3340
+ }
3341
+ }
3342
+ ++num_probes; // we're doing another probe
3343
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3344
+ assert(num_probes < bucket_count()
3345
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3346
+ }
3347
+ }
3348
+
3349
+ const_iterator erase(const_iterator pos)
3350
+ {
3351
+ if (pos == cend())
3352
+ return cend(); // sanity check
3353
+
3354
+ const_iterator nextpos = table.erase(pos);
3355
+ ++num_deleted;
3356
+ settings.set_consider_shrink(true);
3357
+ return nextpos;
3358
+ }
3359
+
3360
+ const_iterator erase(const_iterator f, const_iterator l)
3361
+ {
3362
+ if (f == cend())
3363
+ return cend(); // sanity check
3364
+
3365
+ size_type num_before = table.num_nonempty();
3366
+ const_iterator nextpos = table.erase(f, l);
3367
+ num_deleted += num_before - table.num_nonempty();
3368
+ settings.set_consider_shrink(true);
3369
+ return nextpos;
3370
+ }
3371
+
3372
+ // Deleted key routines - just to keep google test framework happy
3373
+ // we don't actually use the deleted key
3374
+ // ---------------------------------------------------------------
3375
+ void set_deleted_key(const key_type&)
3376
+ {
3377
+ }
3378
+
3379
+ void clear_deleted_key()
3380
+ {
3381
+ }
3382
+
3383
+ bool operator==(const sparse_hashtable& ht) const
3384
+ {
3385
+ if (this == &ht)
3386
+ return true;
3387
+
3388
+ if (size() != ht.size())
3389
+ return false;
3390
+
3391
+ for (const_iterator it = begin(); it != end(); ++it)
3392
+ {
3393
+ const_iterator it2 = ht.find(get_key(*it));
3394
+ if ((it2 == ht.end()) || (*it != *it2))
3395
+ return false;
3396
+ }
3397
+
3398
+ return true;
3399
+ }
3400
+
3401
+ bool operator!=(const sparse_hashtable& ht) const
3402
+ {
3403
+ return !(*this == ht);
3404
+ }
3405
+
3406
+
3407
+ // I/O
3408
+ // We support reading and writing hashtables to disk. NOTE that
3409
+ // this only stores the hashtable metadata, not the stuff you've
3410
+ // actually put in the hashtable! Alas, since I don't know how to
3411
+ // write a hasher or key_equal, you have to make sure everything
3412
+ // but the table is the same. We compact before writing.
3413
+ //
3414
+ // The OUTPUT type needs to support a Write() operation. File and
3415
+ // OutputBuffer are appropriate types to pass in.
3416
+ //
3417
+ // The INPUT type needs to support a Read() operation. File and
3418
+ // InputBuffer are appropriate types to pass in.
3419
+ // -------------------------------------------------------------
3420
+ template <typename OUTPUT>
3421
+ bool write_metadata(OUTPUT *fp)
3422
+ {
3423
+ return table.write_metadata(fp);
3424
+ }
3425
+
3426
+ template <typename INPUT>
3427
+ bool read_metadata(INPUT *fp)
3428
+ {
3429
+ num_deleted = 0; // since we got rid before writing
3430
+ const bool result = table.read_metadata(fp);
3431
+ settings.reset_thresholds(bucket_count());
3432
+ return result;
3433
+ }
3434
+
3435
+ // Only meaningful if value_type is a POD.
3436
+ template <typename OUTPUT>
3437
+ bool write_nopointer_data(OUTPUT *fp)
3438
+ {
3439
+ return table.write_nopointer_data(fp);
3440
+ }
3441
+
3442
+ // Only meaningful if value_type is a POD.
3443
+ template <typename INPUT>
3444
+ bool read_nopointer_data(INPUT *fp)
3445
+ {
3446
+ return table.read_nopointer_data(fp);
3447
+ }
3448
+
3449
+ // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
3450
+ // (istream, ostream, etc) *or* a class providing
3451
+ // Read(void*, size_t) and Write(const void*, size_t)
3452
+ // (respectively), which writes a buffer into a stream
3453
+ // (which the INPUT/OUTPUT instance presumably owns).
3454
+
3455
+ typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
3456
+
3457
+ // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
3458
+ template <typename ValueSerializer, typename OUTPUT>
3459
+ bool serialize(ValueSerializer serializer, OUTPUT *fp)
3460
+ {
3461
+ return table.serialize(serializer, fp);
3462
+ }
3463
+
3464
+ // ValueSerializer: a functor. operator()(INPUT*, value_type*)
3465
+ template <typename ValueSerializer, typename INPUT>
3466
+ bool unserialize(ValueSerializer serializer, INPUT *fp)
3467
+ {
3468
+ num_deleted = 0; // since we got rid before writing
3469
+ const bool result = table.unserialize(serializer, fp);
3470
+ settings.reset_thresholds(bucket_count());
3471
+ return result;
3472
+ }
3473
+
3474
+ private:
3475
+
3476
+ // Package templated functors with the other types to eliminate memory
3477
+ // needed for storing these zero-size operators. Since ExtractKey and
3478
+ // hasher's operator() might have the same function signature, they
3479
+ // must be packaged in different classes.
3480
+ // -------------------------------------------------------------------------
3481
+ struct Settings :
3482
+ sparsehash_internal::sh_hashtable_settings<key_type, hasher,
3483
+ size_type, HT_MIN_BUCKETS>
3484
+ {
3485
+ explicit Settings(const hasher& hf)
3486
+ : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
3487
+ HT_MIN_BUCKETS>
3488
+ (hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
3489
+ };
3490
+
3491
+ // KeyInfo stores delete key and packages zero-size functors:
3492
+ // ExtractKey and SetKey.
3493
+ // ---------------------------------------------------------
3494
+ class KeyInfo : public ExtractKey, public SetKey, public EqualKey
3495
+ {
3496
+ public:
3497
+ KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
3498
+ : ExtractKey(ek), SetKey(sk), EqualKey(eq)
3499
+ {
3500
+ }
3501
+
3502
+ // We want to return the exact same type as ExtractKey: Key or const Key&
3503
+ typename ExtractKey::result_type get_key(const_reference v) const
3504
+ {
3505
+ return ExtractKey::operator()(v);
3506
+ }
3507
+
3508
+ bool equals(const key_type& a, const key_type& b) const
3509
+ {
3510
+ return EqualKey::operator()(a, b);
3511
+ }
3512
+ };
3513
+
3514
+ // Utility functions to access the templated operators
3515
+ size_t hash(const key_type& v) const
3516
+ {
3517
+ return settings.hash(v);
3518
+ }
3519
+
3520
+ bool equals(const key_type& a, const key_type& b) const
3521
+ {
3522
+ return key_info.equals(a, b);
3523
+ }
3524
+
3525
+ typename ExtractKey::result_type get_key(const_reference v) const
3526
+ {
3527
+ return key_info.get_key(v);
3528
+ }
3529
+
3530
+ private:
3531
+ // Actual data
3532
+ // -----------
3533
+ Settings settings;
3534
+ KeyInfo key_info;
3535
+ size_type num_deleted;
3536
+ Table table; // holds num_buckets and num_elements too
3537
+ };
3538
+
3539
+ #undef JUMP_
3540
+
3541
+ // -----------------------------------------------------------------------------
3542
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
3543
+ const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
3544
+ sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
3545
+
3546
+ // How full we let the table get before we resize. Knuth says .8 is
3547
+ // good -- higher causes us to probe too much, though saves memory
3548
+ // -----------------------------------------------------------------------------
3549
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
3550
+ const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
3551
+
3552
+ // How empty we let the table get before we resize lower.
3553
+ // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
3554
+ // -----------------------------------------------------------------------------
3555
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
3556
+ const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
3557
+ = static_cast<int>(0.4 *
3558
+ sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
3559
+
3560
+
3561
+ // ----------------------------------------------------------------------
3562
+ // S P A R S E _ H A S H _ M A P
3563
+ // ----------------------------------------------------------------------
3564
+ template <class Key, class T,
3565
+ class HashFcn = spp_hash<Key>,
3566
+ class EqualKey = std::equal_to<Key>,
3567
+ class Alloc = SPP_DEFAULT_ALLOCATOR<std::pair<const Key, T> > >
3568
+ class sparse_hash_map
3569
+ {
3570
+ public:
3571
+ typedef typename std::pair<const Key, T> value_type;
3572
+
3573
+ private:
3574
+ // Apparently select1st is not stl-standard, so we define our own
3575
+ struct SelectKey
3576
+ {
3577
+ typedef const Key& result_type;
3578
+
3579
+ inline const Key& operator()(const value_type& p) const
3580
+ {
3581
+ return p.first;
3582
+ }
3583
+ };
3584
+
3585
+ struct SetKey
3586
+ {
3587
+ inline void operator()(value_type* value, const Key& new_key) const
3588
+ {
3589
+ *const_cast<Key*>(&value->first) = new_key;
3590
+ }
3591
+ };
3592
+
3593
+ // For operator[].
3594
+ struct DefaultValue
3595
+ {
3596
+ inline value_type operator()(const Key& key) const
3597
+ {
3598
+ return std::make_pair(key, T());
3599
+ }
3600
+ };
3601
+
3602
+ // The actual data
3603
+ typedef sparse_hashtable<value_type, Key, HashFcn, SelectKey,
3604
+ SetKey, EqualKey, Alloc> ht;
3605
+
3606
+ public:
3607
+ typedef typename ht::key_type key_type;
3608
+ typedef T data_type;
3609
+ typedef T mapped_type;
3610
+ typedef typename ht::hasher hasher;
3611
+ typedef typename ht::key_equal key_equal;
3612
+ typedef Alloc allocator_type;
3613
+
3614
+ typedef typename ht::size_type size_type;
3615
+ typedef typename ht::difference_type difference_type;
3616
+ typedef typename ht::pointer pointer;
3617
+ typedef typename ht::const_pointer const_pointer;
3618
+ typedef typename ht::reference reference;
3619
+ typedef typename ht::const_reference const_reference;
3620
+
3621
+ typedef typename ht::iterator iterator;
3622
+ typedef typename ht::const_iterator const_iterator;
3623
+ typedef typename ht::local_iterator local_iterator;
3624
+ typedef typename ht::const_local_iterator const_local_iterator;
3625
+
3626
+ // Iterator functions
3627
+ iterator begin() { return rep.begin(); }
3628
+ iterator end() { return rep.end(); }
3629
+ const_iterator begin() const { return rep.cbegin(); }
3630
+ const_iterator end() const { return rep.cend(); }
3631
+ const_iterator cbegin() const { return rep.cbegin(); }
3632
+ const_iterator cend() const { return rep.cend(); }
3633
+
3634
+ // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
3635
+ local_iterator begin(size_type i) { return rep.begin(i); }
3636
+ local_iterator end(size_type i) { return rep.end(i); }
3637
+ const_local_iterator begin(size_type i) const { return rep.begin(i); }
3638
+ const_local_iterator end(size_type i) const { return rep.end(i); }
3639
+ const_local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
3640
+ const_local_iterator cend(size_type i) const { return rep.cend(i); }
3641
+
3642
+ // Accessor functions
3643
+ // ------------------
3644
+ allocator_type get_allocator() const { return rep.get_allocator(); }
3645
+ hasher hash_funct() const { return rep.hash_funct(); }
3646
+ hasher hash_function() const { return hash_funct(); }
3647
+ key_equal key_eq() const { return rep.key_eq(); }
3648
+
3649
+
3650
+ // Constructors
3651
+ // ------------
3652
+ explicit sparse_hash_map(size_type n = 0,
3653
+ const hasher& hf = hasher(),
3654
+ const key_equal& eql = key_equal(),
3655
+ const allocator_type& alloc = allocator_type())
3656
+ : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
3657
+ {
3658
+ }
3659
+
3660
+ explicit sparse_hash_map(const allocator_type& alloc) :
3661
+ rep(0, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3662
+ {
3663
+ }
3664
+
3665
+ sparse_hash_map(size_type n, const allocator_type& alloc) :
3666
+ rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3667
+ {
3668
+ }
3669
+
3670
+ sparse_hash_map(size_type n, const hasher& hf, const allocator_type& alloc) :
3671
+ rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
3672
+ {
3673
+ }
3674
+
3675
+ template <class InputIterator>
3676
+ sparse_hash_map(InputIterator f, InputIterator l,
3677
+ size_type n = 0,
3678
+ const hasher& hf = hasher(),
3679
+ const key_equal& eql = key_equal(),
3680
+ const allocator_type& alloc = allocator_type())
3681
+ : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
3682
+ {
3683
+ rep.insert(f, l);
3684
+ }
3685
+
3686
+ template <class InputIterator>
3687
+ sparse_hash_map(InputIterator f, InputIterator l,
3688
+ size_type n, const allocator_type& alloc)
3689
+ : rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3690
+ {
3691
+ rep.insert(f, l);
3692
+ }
3693
+
3694
+ template <class InputIterator>
3695
+ sparse_hash_map(InputIterator f, InputIterator l,
3696
+ size_type n, const hasher& hf, const allocator_type& alloc)
3697
+ : rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
3698
+ {
3699
+ rep.insert(f, l);
3700
+ }
3701
+
3702
+ sparse_hash_map(const sparse_hash_map &o) :
3703
+ rep(o.rep)
3704
+ {}
3705
+
3706
+ sparse_hash_map(const sparse_hash_map &o,
3707
+ const allocator_type& alloc) :
3708
+ rep(o.rep, alloc)
3709
+ {}
3710
+
3711
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
3712
+ sparse_hash_map(sparse_hash_map &&o) :
3713
+ rep(std::move(o.rep))
3714
+ {}
3715
+
3716
+ sparse_hash_map(sparse_hash_map &&o,
3717
+ const allocator_type& alloc) :
3718
+ rep(std::move(o.rep), alloc)
3719
+ {}
3720
+ #endif
3721
+
3722
+ #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
3723
+ sparse_hash_map(std::initializer_list<value_type> init,
3724
+ size_type n = 0,
3725
+ const hasher& hf = hasher(),
3726
+ const key_equal& eql = key_equal(),
3727
+ const allocator_type& alloc = allocator_type())
3728
+ : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
3729
+ {
3730
+ rep.insert(init.begin(), init.end());
3731
+ }
3732
+
3733
+ sparse_hash_map(std::initializer_list<value_type> init,
3734
+ size_type n, const allocator_type& alloc) :
3735
+ rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3736
+ {
3737
+ rep.insert(init.begin(), init.end());
3738
+ }
3739
+
3740
+ sparse_hash_map(std::initializer_list<value_type> init,
3741
+ size_type n, const hasher& hf, const allocator_type& alloc) :
3742
+ rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
3743
+ {
3744
+ rep.insert(init.begin(), init.end());
3745
+ }
3746
+
3747
+ sparse_hash_map& operator=(std::initializer_list<value_type> init)
3748
+ {
3749
+ rep.clear();
3750
+ rep.insert(init.begin(), init.end());
3751
+ return *this;
3752
+ }
3753
+
3754
+ void insert(std::initializer_list<value_type> init)
3755
+ {
3756
+ rep.insert(init.begin(), init.end());
3757
+ }
3758
+ #endif
3759
+
3760
+ sparse_hash_map& operator=(const sparse_hash_map &o)
3761
+ {
3762
+ rep = o.rep;
3763
+ return *this;
3764
+ }
3765
+
3766
+ void clear() { rep.clear(); }
3767
+ void swap(sparse_hash_map& hs) { rep.swap(hs.rep); }
3768
+
3769
+ // Functions concerning size
3770
+ // -------------------------
3771
+ size_type size() const { return rep.size(); }
3772
+ size_type max_size() const { return rep.max_size(); }
3773
+ bool empty() const { return rep.empty(); }
3774
+ size_type bucket_count() const { return rep.bucket_count(); }
3775
+ size_type max_bucket_count() const { return rep.max_bucket_count(); }
3776
+
3777
+ size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
3778
+ size_type bucket(const key_type& key) const { return rep.bucket(key); }
3779
+ float load_factor() const { return size() * 1.0f / bucket_count(); }
3780
+
3781
+ float max_load_factor() const { return rep.get_enlarge_factor(); }
3782
+ void max_load_factor(float grow) { rep.set_enlarge_factor(grow); }
3783
+
3784
+ float min_load_factor() const { return rep.get_shrink_factor(); }
3785
+ void min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
3786
+
3787
+ void set_resizing_parameters(float shrink, float grow)
3788
+ {
3789
+ rep.set_resizing_parameters(shrink, grow);
3790
+ }
3791
+
3792
+ void resize(size_type cnt) { rep.resize(cnt); }
3793
+ void rehash(size_type cnt) { resize(cnt); } // c++11 name
3794
+ void reserve(size_type cnt) { resize(cnt); } // c++11
3795
+
3796
+ // Lookup
3797
+ // ------
3798
+ iterator find(const key_type& key) { return rep.find(key); }
3799
+ const_iterator find(const key_type& key) const { return rep.find(key); }
3800
+ bool contains(const key_type& key) const { return rep.find(key) != rep.end(); }
3801
+
3802
+ mapped_type& operator[](const key_type& key)
3803
+ {
3804
+ return rep.template find_or_insert<DefaultValue>(key).second;
3805
+ }
3806
+
3807
+ size_type count(const key_type& key) const { return rep.count(key); }
3808
+
3809
+ std::pair<iterator, iterator>
3810
+ equal_range(const key_type& key) { return rep.equal_range(key); }
3811
+
3812
+ std::pair<const_iterator, const_iterator>
3813
+ equal_range(const key_type& key) const { return rep.equal_range(key); }
3814
+
3815
+ mapped_type& at(const key_type& key)
3816
+ {
3817
+ iterator it = rep.find(key);
3818
+ if (it == rep.end())
3819
+ throw_exception(std::out_of_range("at: key not present"));
3820
+ return it->second;
3821
+ }
3822
+
3823
+ const mapped_type& at(const key_type& key) const
3824
+ {
3825
+ const_iterator it = rep.find(key);
3826
+ if (it == rep.cend())
3827
+ throw_exception(std::out_of_range("at: key not present"));
3828
+ return it->second;
3829
+ }
3830
+
3831
+ #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
3832
+ template <class... Args>
3833
+ std::pair<iterator, bool> emplace(Args&&... args)
3834
+ {
3835
+ return rep.emplace(std::forward<Args>(args)...);
3836
+ }
3837
+
3838
+ template <class... Args>
3839
+ iterator emplace_hint(const_iterator , Args&&... args)
3840
+ {
3841
+ return rep.emplace(std::forward<Args>(args)...).first;
3842
+ }
3843
+ #endif
3844
+
3845
+ // Insert
3846
+ // ------
3847
+ std::pair<iterator, bool>
3848
+ insert(const value_type& obj) { return rep.insert(obj); }
3849
+
3850
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
3851
+ template< class P >
3852
+ std::pair<iterator, bool> insert(P&& obj) { return rep.insert(std::forward<P>(obj)); }
3853
+ #endif
3854
+
3855
+ template <class InputIterator>
3856
+ void insert(InputIterator f, InputIterator l) { rep.insert(f, l); }
3857
+
3858
+ void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
3859
+
3860
+ iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
3861
+ iterator insert(const_iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
3862
+
3863
+ // Deleted key routines - just to keep google test framework happy
3864
+ // we don't actually use the deleted key
3865
+ // ---------------------------------------------------------------
3866
+ void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
3867
+ void clear_deleted_key() { rep.clear_deleted_key(); }
3868
+ key_type deleted_key() const { return rep.deleted_key(); }
3869
+
3870
+ // Erase
3871
+ // -----
3872
+ size_type erase(const key_type& key) { return rep.erase(key); }
3873
+ iterator erase(iterator it) { return rep.erase(it); }
3874
+ iterator erase(iterator f, iterator l) { return rep.erase(f, l); }
3875
+ iterator erase(const_iterator it) { return rep.erase(it); }
3876
+ iterator erase(const_iterator f, const_iterator l){ return rep.erase(f, l); }
3877
+
3878
+ // Comparison
3879
+ // ----------
3880
+ bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; }
3881
+ bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; }
3882
+
3883
+
3884
+ // I/O -- this is an add-on for writing metainformation to disk
3885
+ //
3886
+ // For maximum flexibility, this does not assume a particular
3887
+ // file type (though it will probably be a FILE *). We just pass
3888
+ // the fp through to rep.
3889
+
3890
+ // If your keys and values are simple enough, you can pass this
3891
+ // serializer to serialize()/unserialize(). "Simple enough" means
3892
+ // value_type is a POD type that contains no pointers. Note,
3893
+ // however, we don't try to normalize endianness.
3894
+ // ---------------------------------------------------------------
3895
+ typedef typename ht::NopointerSerializer NopointerSerializer;
3896
+
3897
+ // serializer: a class providing operator()(OUTPUT*, const value_type&)
3898
+ // (writing value_type to OUTPUT). You can specify a
3899
+ // NopointerSerializer object if appropriate (see above).
3900
+ // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
3901
+ // pointer to a class providing size_t Write(const void*, size_t),
3902
+ // which writes a buffer into a stream (which fp presumably
3903
+ // owns) and returns the number of bytes successfully written.
3904
+ // Note basic_ostream<not_char> is not currently supported.
3905
+ // ---------------------------------------------------------------
3906
+ template <typename ValueSerializer, typename OUTPUT>
3907
+ bool serialize(ValueSerializer serializer, OUTPUT* fp)
3908
+ {
3909
+ return rep.serialize(serializer, fp);
3910
+ }
3911
+
3912
+ // serializer: a functor providing operator()(INPUT*, value_type*)
3913
+ // (reading from INPUT and into value_type). You can specify a
3914
+ // NopointerSerializer object if appropriate (see above).
3915
+ // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
3916
+ // pointer to a class providing size_t Read(void*, size_t),
3917
+ // which reads into a buffer from a stream (which fp presumably
3918
+ // owns) and returns the number of bytes successfully read.
3919
+ // Note basic_istream<not_char> is not currently supported.
3920
+ // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
3921
+ // may need to do a const cast in order to fill in the key.
3922
+ // NOTE: if Key or T are not POD types, the serializer MUST use
3923
+ // placement-new to initialize their values, rather than a normal
3924
+ // equals-assignment or similar. (The value_type* passed into the
3925
+ // serializer points to garbage memory.)
3926
+ // ---------------------------------------------------------------
3927
+ template <typename ValueSerializer, typename INPUT>
3928
+ bool unserialize(ValueSerializer serializer, INPUT* fp)
3929
+ {
3930
+ return rep.unserialize(serializer, fp);
3931
+ }
3932
+
3933
+ // The four methods below are DEPRECATED.
3934
+ // Use serialize() and unserialize() for new code.
3935
+ // -----------------------------------------------
3936
+ template <typename OUTPUT>
3937
+ bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
3938
+
3939
+ template <typename INPUT>
3940
+ bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
3941
+
3942
+ template <typename OUTPUT>
3943
+ bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
3944
+
3945
+ template <typename INPUT>
3946
+ bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
3947
+
3948
+
3949
+ private:
3950
+ // The actual data
3951
+ // ---------------
3952
+ ht rep;
3953
+ };
3954
+
3955
+ // ----------------------------------------------------------------------
3956
+ // S P A R S E _ H A S H _ S E T
3957
+ // ----------------------------------------------------------------------
3958
+
3959
+ template <class Value,
3960
+ class HashFcn = spp_hash<Value>,
3961
+ class EqualKey = std::equal_to<Value>,
3962
+ class Alloc = SPP_DEFAULT_ALLOCATOR<Value> >
3963
+ class sparse_hash_set
3964
+ {
3965
+ private:
3966
+ // Apparently identity is not stl-standard, so we define our own
3967
+ struct Identity
3968
+ {
3969
+ typedef const Value& result_type;
3970
+ inline const Value& operator()(const Value& v) const { return v; }
3971
+ };
3972
+
3973
+ struct SetKey
3974
+ {
3975
+ inline void operator()(Value* value, const Value& new_key) const
3976
+ {
3977
+ *value = new_key;
3978
+ }
3979
+ };
3980
+
3981
+ typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
3982
+ EqualKey, Alloc> ht;
3983
+
3984
+ public:
3985
+ typedef typename ht::key_type key_type;
3986
+ typedef typename ht::value_type value_type;
3987
+ typedef typename ht::hasher hasher;
3988
+ typedef typename ht::key_equal key_equal;
3989
+ typedef Alloc allocator_type;
3990
+
3991
+ typedef typename ht::size_type size_type;
3992
+ typedef typename ht::difference_type difference_type;
3993
+ typedef typename ht::const_pointer pointer;
3994
+ typedef typename ht::const_pointer const_pointer;
3995
+ typedef typename ht::const_reference reference;
3996
+ typedef typename ht::const_reference const_reference;
3997
+
3998
+ typedef typename ht::const_iterator iterator;
3999
+ typedef typename ht::const_iterator const_iterator;
4000
+ typedef typename ht::const_local_iterator local_iterator;
4001
+ typedef typename ht::const_local_iterator const_local_iterator;
4002
+
4003
+
4004
+ // Iterator functions -- recall all iterators are const
4005
+ iterator begin() const { return rep.begin(); }
4006
+ iterator end() const { return rep.end(); }
4007
+ const_iterator cbegin() const { return rep.cbegin(); }
4008
+ const_iterator cend() const { return rep.cend(); }
4009
+
4010
+ // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
4011
+ local_iterator begin(size_type i) const { return rep.begin(i); }
4012
+ local_iterator end(size_type i) const { return rep.end(i); }
4013
+ local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
4014
+ local_iterator cend(size_type i) const { return rep.cend(i); }
4015
+
4016
+
4017
+ // Accessor functions
4018
+ // ------------------
4019
+ allocator_type get_allocator() const { return rep.get_allocator(); }
4020
+ hasher hash_funct() const { return rep.hash_funct(); }
4021
+ hasher hash_function() const { return hash_funct(); } // tr1 name
4022
+ key_equal key_eq() const { return rep.key_eq(); }
4023
+
4024
+
4025
+ // Constructors
4026
+ // ------------
4027
+ explicit sparse_hash_set(size_type n = 0,
4028
+ const hasher& hf = hasher(),
4029
+ const key_equal& eql = key_equal(),
4030
+ const allocator_type& alloc = allocator_type()) :
4031
+ rep(n, hf, eql, Identity(), SetKey(), alloc)
4032
+ {
4033
+ }
4034
+
4035
+ explicit sparse_hash_set(const allocator_type& alloc) :
4036
+ rep(0, hasher(), key_equal(), Identity(), SetKey(), alloc)
4037
+ {
4038
+ }
4039
+
4040
+ sparse_hash_set(size_type n, const allocator_type& alloc) :
4041
+ rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
4042
+ {
4043
+ }
4044
+
4045
+ sparse_hash_set(size_type n, const hasher& hf,
4046
+ const allocator_type& alloc) :
4047
+ rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
4048
+ {
4049
+ }
4050
+
4051
+ template <class InputIterator>
4052
+ sparse_hash_set(InputIterator f, InputIterator l,
4053
+ size_type n = 0,
4054
+ const hasher& hf = hasher(),
4055
+ const key_equal& eql = key_equal(),
4056
+ const allocator_type& alloc = allocator_type())
4057
+ : rep(n, hf, eql, Identity(), SetKey(), alloc)
4058
+ {
4059
+ rep.insert(f, l);
4060
+ }
4061
+
4062
+ template <class InputIterator>
4063
+ sparse_hash_set(InputIterator f, InputIterator l,
4064
+ size_type n, const allocator_type& alloc)
4065
+ : rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
4066
+ {
4067
+ rep.insert(f, l);
4068
+ }
4069
+
4070
+ template <class InputIterator>
4071
+ sparse_hash_set(InputIterator f, InputIterator l,
4072
+ size_type n, const hasher& hf, const allocator_type& alloc)
4073
+ : rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
4074
+ {
4075
+ rep.insert(f, l);
4076
+ }
4077
+
4078
+ sparse_hash_set(const sparse_hash_set &o) :
4079
+ rep(o.rep)
4080
+ {}
4081
+
4082
+ sparse_hash_set(const sparse_hash_set &o,
4083
+ const allocator_type& alloc) :
4084
+ rep(o.rep, alloc)
4085
+ {}
4086
+
4087
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
4088
+ sparse_hash_set(sparse_hash_set &&o) :
4089
+ rep(std::move(o.rep))
4090
+ {}
4091
+
4092
+ sparse_hash_set(sparse_hash_set &&o,
4093
+ const allocator_type& alloc) :
4094
+ rep(std::move(o.rep), alloc)
4095
+ {}
4096
+ #endif
4097
+
4098
+ #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
4099
+ sparse_hash_set(std::initializer_list<value_type> init,
4100
+ size_type n = 0,
4101
+ const hasher& hf = hasher(),
4102
+ const key_equal& eql = key_equal(),
4103
+ const allocator_type& alloc = allocator_type()) :
4104
+ rep(n, hf, eql, Identity(), SetKey(), alloc)
4105
+ {
4106
+ rep.insert(init.begin(), init.end());
4107
+ }
4108
+
4109
+ sparse_hash_set(std::initializer_list<value_type> init,
4110
+ size_type n, const allocator_type& alloc) :
4111
+ rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
4112
+ {
4113
+ rep.insert(init.begin(), init.end());
4114
+ }
4115
+
4116
+ sparse_hash_set(std::initializer_list<value_type> init,
4117
+ size_type n, const hasher& hf,
4118
+ const allocator_type& alloc) :
4119
+ rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
4120
+ {
4121
+ rep.insert(init.begin(), init.end());
4122
+ }
4123
+
4124
+ sparse_hash_set& operator=(std::initializer_list<value_type> init)
4125
+ {
4126
+ rep.clear();
4127
+ rep.insert(init.begin(), init.end());
4128
+ return *this;
4129
+ }
4130
+
4131
+ void insert(std::initializer_list<value_type> init)
4132
+ {
4133
+ rep.insert(init.begin(), init.end());
4134
+ }
4135
+
4136
+ #endif
4137
+
4138
+ sparse_hash_set& operator=(const sparse_hash_set &o)
4139
+ {
4140
+ rep = o.rep;
4141
+ return *this;
4142
+ }
4143
+
4144
+ void clear() { rep.clear(); }
4145
+ void swap(sparse_hash_set& hs) { rep.swap(hs.rep); }
4146
+
4147
+
4148
+ // Functions concerning size
4149
+ // -------------------------
4150
+ size_type size() const { return rep.size(); }
4151
+ size_type max_size() const { return rep.max_size(); }
4152
+ bool empty() const { return rep.empty(); }
4153
+ size_type bucket_count() const { return rep.bucket_count(); }
4154
+ size_type max_bucket_count() const { return rep.max_bucket_count(); }
4155
+
4156
+ size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
4157
+ size_type bucket(const key_type& key) const { return rep.bucket(key); }
4158
+
4159
+ float load_factor() const { return size() * 1.0f / bucket_count(); }
4160
+
4161
+ float max_load_factor() const { return rep.get_enlarge_factor(); }
4162
+ void max_load_factor(float grow) { rep.set_enlarge_factor(grow); }
4163
+
4164
+ float min_load_factor() const { return rep.get_shrink_factor(); }
4165
+ void min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
4166
+
4167
+ void set_resizing_parameters(float shrink, float grow)
4168
+ {
4169
+ rep.set_resizing_parameters(shrink, grow);
4170
+ }
4171
+
4172
+ void resize(size_type cnt) { rep.resize(cnt); }
4173
+ void rehash(size_type cnt) { resize(cnt); } // c++11 name
4174
+ void reserve(size_type cnt) { resize(cnt); } // c++11
4175
+
4176
+ // Lookup
4177
+ // ------
4178
+ iterator find(const key_type& key) const { return rep.find(key); }
4179
+ bool contains(const key_type& key) const { return rep.find(key) != rep.end(); }
4180
+
4181
+ size_type count(const key_type& key) const { return rep.count(key); }
4182
+
4183
+ std::pair<iterator, iterator>
4184
+ equal_range(const key_type& key) const { return rep.equal_range(key); }
4185
+
4186
+ #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
4187
+ template <class... Args>
4188
+ std::pair<iterator, bool> emplace(Args&&... args)
4189
+ {
4190
+ return rep.emplace(std::forward<Args>(args)...);
4191
+ }
4192
+
4193
+ template <class... Args>
4194
+ iterator emplace_hint(const_iterator , Args&&... args)
4195
+ {
4196
+ return rep.emplace(std::forward<Args>(args)...).first;
4197
+ }
4198
+ #endif
4199
+
4200
+ // Insert
4201
+ // ------
4202
+ std::pair<iterator, bool> insert(const value_type& obj)
4203
+ {
4204
+ std::pair<typename ht::iterator, bool> p = rep.insert(obj);
4205
+ return std::pair<iterator, bool>(p.first, p.second); // const to non-const
4206
+ }
4207
+
4208
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
4209
+ template<class P>
4210
+ std::pair<iterator, bool> insert(P&& obj) { return rep.insert(std::forward<P>(obj)); }
4211
+ #endif
4212
+
4213
+ template <class InputIterator>
4214
+ void insert(InputIterator f, InputIterator l) { rep.insert(f, l); }
4215
+
4216
+ void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
4217
+
4218
+ iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
4219
+
4220
+ // Deleted key - do nothing - just to keep google test framework happy
4221
+ // -------------------------------------------------------------------
4222
+ void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
4223
+ void clear_deleted_key() { rep.clear_deleted_key(); }
4224
+ key_type deleted_key() const { return rep.deleted_key(); }
4225
+
4226
+ // Erase
4227
+ // -----
4228
+ size_type erase(const key_type& key) { return rep.erase(key); }
4229
+ iterator erase(iterator it) { return rep.erase(it); }
4230
+ iterator erase(iterator f, iterator l) { return rep.erase(f, l); }
4231
+
4232
+ // Comparison
4233
+ // ----------
4234
+ bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; }
4235
+ bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; }
4236
+
4237
+
4238
+ // I/O -- this is an add-on for writing metainformation to disk
4239
+ //
4240
+ // For maximum flexibility, this does not assume a particular
4241
+ // file type (though it will probably be a FILE *). We just pass
4242
+ // the fp through to rep.
4243
+
4244
+ // If your keys and values are simple enough, you can pass this
4245
+ // serializer to serialize()/unserialize(). "Simple enough" means
4246
+ // value_type is a POD type that contains no pointers. Note,
4247
+ // however, we don't try to normalize endianness.
4248
+ // ---------------------------------------------------------------
4249
+ typedef typename ht::NopointerSerializer NopointerSerializer;
4250
+
4251
+ // serializer: a class providing operator()(OUTPUT*, const value_type&)
4252
+ // (writing value_type to OUTPUT). You can specify a
4253
+ // NopointerSerializer object if appropriate (see above).
4254
+ // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
4255
+ // pointer to a class providing size_t Write(const void*, size_t),
4256
+ // which writes a buffer into a stream (which fp presumably
4257
+ // owns) and returns the number of bytes successfully written.
4258
+ // Note basic_ostream<not_char> is not currently supported.
4259
+ // ---------------------------------------------------------------
4260
+ template <typename ValueSerializer, typename OUTPUT>
4261
+ bool serialize(ValueSerializer serializer, OUTPUT* fp)
4262
+ {
4263
+ return rep.serialize(serializer, fp);
4264
+ }
4265
+
4266
+ // serializer: a functor providing operator()(INPUT*, value_type*)
4267
+ // (reading from INPUT and into value_type). You can specify a
4268
+ // NopointerSerializer object if appropriate (see above).
4269
+ // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
4270
+ // pointer to a class providing size_t Read(void*, size_t),
4271
+ // which reads into a buffer from a stream (which fp presumably
4272
+ // owns) and returns the number of bytes successfully read.
4273
+ // Note basic_istream<not_char> is not currently supported.
4274
+ // NOTE: Since value_type is const Key, ValueSerializer
4275
+ // may need to do a const cast in order to fill in the key.
4276
+ // NOTE: if Key is not a POD type, the serializer MUST use
4277
+ // placement-new to initialize its value, rather than a normal
4278
+ // equals-assignment or similar. (The value_type* passed into
4279
+ // the serializer points to garbage memory.)
4280
+ // ---------------------------------------------------------------
4281
+ template <typename ValueSerializer, typename INPUT>
4282
+ bool unserialize(ValueSerializer serializer, INPUT* fp)
4283
+ {
4284
+ return rep.unserialize(serializer, fp);
4285
+ }
4286
+
4287
+ // The four methods below are DEPRECATED.
4288
+ // Use serialize() and unserialize() for new code.
4289
+ // -----------------------------------------------
4290
+ template <typename OUTPUT>
4291
+ bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
4292
+
4293
+ template <typename INPUT>
4294
+ bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
4295
+
4296
+ template <typename OUTPUT>
4297
+ bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
4298
+
4299
+ template <typename INPUT>
4300
+ bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
4301
+
4302
+ private:
4303
+ // The actual data
4304
+ // ---------------
4305
+ ht rep;
4306
+ };
4307
+
4308
+ } // spp_ namespace
4309
+
4310
+
4311
+ // We need a global swap for all our classes as well
4312
+ // -------------------------------------------------
4313
+
4314
+ template <class T, class Alloc>
4315
+ inline void swap(spp_::sparsegroup<T,Alloc> &x, spp_::sparsegroup<T,Alloc> &y)
4316
+ {
4317
+ x.swap(y);
4318
+ }
4319
+
4320
+ template <class T, class Alloc>
4321
+ inline void swap(spp_::sparsetable<T,Alloc> &x, spp_::sparsetable<T,Alloc> &y)
4322
+ {
4323
+ x.swap(y);
4324
+ }
4325
+
4326
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
4327
+ inline void swap(spp_::sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
4328
+ spp_::sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &y)
4329
+ {
4330
+ x.swap(y);
4331
+ }
4332
+
4333
+ template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
4334
+ inline void swap(spp_::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
4335
+ spp_::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2)
4336
+ {
4337
+ hm1.swap(hm2);
4338
+ }
4339
+
4340
+ template <class Val, class HashFcn, class EqualKey, class Alloc>
4341
+ inline void swap(spp_::sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
4342
+ spp_::sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2)
4343
+ {
4344
+ hs1.swap(hs2);
4345
+ }
4346
+
4347
+ #endif // sparsepp_h_guard_