sparsam 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,101 @@
1
+ #ifndef __SERIALIZER_H__
2
+ #include <ruby.h>
3
+ #include <ruby/intern.h>
4
+ #ifndef NUM2SHORT
5
+ #define NUM2SHORT NUM2INT
6
+ #endif
7
+ #ifdef __cplusplus
8
+ extern "C" {
9
+ #endif
10
+
11
+ enum Proto {
12
+ compact = 0,
13
+ binary = 1,
14
+ };
15
+
16
+ enum TOType {
17
+ t_union = 0,
18
+ t_struct = 1,
19
+ };
20
+
21
+ enum ValidateStrictness { normal = 0, strict = 1, recursive = 2 };
22
+
23
+ void serializer_free(void *data);
24
+ void *serializer_create();
25
+ void serializer_init(void *serializer, int protocol, void *str_arg1,
26
+ uint32_t len);
27
+
28
+ VALUE serializer_readStruct(VALUE self, VALUE klass);
29
+ VALUE serializer_writeStruct(VALUE self, VALUE klass, VALUE data);
30
+
31
+ VALUE cache_fields(VALUE self, VALUE klass);
32
+
33
+ VALUE serializer_validate(VALUE self, VALUE klass, VALUE data,
34
+ VALUE strictness);
35
+
36
+ void initialize_constants();
37
+ void initialize_runtime_constants();
38
+
39
+ #ifdef __cplusplus
40
+ } // end extern "C"
41
+
42
+ #include <boost/shared_ptr.hpp>
43
+ #include <map>
44
+ #include <string>
45
+ #include <thrift/protocol/TProtocol.h>
46
+ #include <thrift/transport/TBufferTransports.h>
47
+ #include <unordered_set>
48
+ #include "third-party/sparsepp/sparsepp/spp.h"
49
+
50
+ using ::apache::thrift::protocol::TType;
51
+
52
+ typedef uint16_t FieldIdIndex;
53
+ typedef uint16_t KlassIndex;
54
+
55
+ typedef int16_t FieldID;
56
+
57
+ typedef struct FieldBegin {
58
+ TType ftype;
59
+ FieldID fid;
60
+ } FieldBegin;
61
+
62
+ typedef struct FieldInfo {
63
+ TType ftype;
64
+ VALUE klass; // set if TTYPE is struct or union
65
+ ID ivarName; // set if field is on struct
66
+ VALUE symName; // set if field is on struct/union
67
+ bool isOptional;
68
+ bool isBinaryString;
69
+ FieldInfo *elementType; // element of list or set, or map
70
+ FieldInfo *keyType; // type of key in maps
71
+ } FieldInfo;
72
+
73
+ typedef std::map<FieldID, FieldInfo *> FieldInfoMap;
74
+ typedef spp::sparse_hash_map<VALUE, FieldInfoMap *> KlassFieldsCache;
75
+
76
+ class ThriftSerializer {
77
+ public:
78
+ ThriftSerializer(){};
79
+ boost::shared_ptr< ::apache::thrift::protocol::TProtocol > tprot;
80
+ boost::shared_ptr< ::apache::thrift::transport::TMemoryBuffer > tmb;
81
+
82
+ VALUE readStruct(VALUE klass);
83
+ void writeStruct(VALUE klass, VALUE data);
84
+
85
+ private:
86
+ VALUE readUnion(VALUE klass);
87
+ VALUE readAny(TType ttype, FieldInfo *field_info);
88
+ void writeAny(TType ttype, FieldInfo *field_info, VALUE data);
89
+ void skip_n_type(uint32_t n, TType ttype);
90
+ void skip_n_pair(uint32_t n, TType type_a, TType type_b);
91
+ };
92
+
93
+ bool validateStruct(VALUE klass, VALUE data, bool validateContainerTypes,
94
+ bool recursive);
95
+ bool validateAny(FieldInfo *type, VALUE val, bool recursive);
96
+ FieldInfoMap *FindOrCreateFieldInfoMap(VALUE klass);
97
+ FieldInfo *CreateFieldInfo(VALUE field_map_entry);
98
+ FieldInfoMap *CreateFieldInfoMap(VALUE klass);
99
+
100
+ #endif
101
+ #endif
@@ -0,0 +1,4347 @@
1
+ #if !defined(sparsepp_h_guard_)
2
+ #define sparsepp_h_guard_
3
+
4
+
5
+ // ----------------------------------------------------------------------
6
+ // Copyright (c) 2016, Gregory Popovitch - greg7mdp@gmail.com
7
+ // All rights reserved.
8
+ //
9
+ // This work is derived from Google's sparsehash library
10
+ //
11
+ // Copyright (c) 2005, Google Inc.
12
+ // All rights reserved.
13
+ //
14
+ // Redistribution and use in source and binary forms, with or without
15
+ // modification, are permitted provided that the following conditions are
16
+ // met:
17
+ //
18
+ // * Redistributions of source code must retain the above copyright
19
+ // notice, this list of conditions and the following disclaimer.
20
+ // * Redistributions in binary form must reproduce the above
21
+ // copyright notice, this list of conditions and the following disclaimer
22
+ // in the documentation and/or other materials provided with the
23
+ // distribution.
24
+ // * Neither the name of Google Inc. nor the names of its
25
+ // contributors may be used to endorse or promote products derived from
26
+ // this software without specific prior written permission.
27
+ //
28
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39
+ // ----------------------------------------------------------------------
40
+
41
+
42
+ // some macros for portability
43
+ // ---------------------------
44
+ // includes
45
+ // --------
46
+ #include <cassert>
47
+ #include <cstring>
48
+ #include <string>
49
+ #include <limits> // for numeric_limits
50
+ #include <algorithm> // For swap(), eg
51
+ #include <iterator> // for iterator tags
52
+ #include <functional> // for equal_to<>, select1st<>, std::unary_function, etc
53
+ #include <memory> // for alloc, uninitialized_copy, uninitialized_fill
54
+ #include <cstdlib> // for malloc/realloc/free
55
+ #include <cstddef> // for ptrdiff_t
56
+ #include <new> // for placement new
57
+ #include <stdexcept> // For length_error
58
+ #include <utility> // for pair<>
59
+ #include <cstdio>
60
+ #include <iosfwd>
61
+ #include <ios>
62
+
63
+ #include <sparsepp/spp_stdint.h> // includes spp_config.h
64
+ #include <sparsepp/spp_traits.h>
65
+ #include <sparsepp/spp_utils.h>
66
+
67
+ #ifdef SPP_INCLUDE_SPP_ALLOC
68
+ #include <sparsepp/spp_dlalloc.h>
69
+ #endif
70
+
71
+ #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
72
+ #include <initializer_list>
73
+ #endif
74
+
75
+ #if (SPP_GROUP_SIZE == 32)
76
+ #define SPP_SHIFT_ 5
77
+ #define SPP_MASK_ 0x1F
78
+ typedef uint32_t group_bm_type;
79
+ #elif (SPP_GROUP_SIZE == 64)
80
+ #define SPP_SHIFT_ 6
81
+ #define SPP_MASK_ 0x3F
82
+ typedef uint64_t group_bm_type;
83
+ #else
84
+ #error "SPP_GROUP_SIZE must be either 32 or 64"
85
+ #endif
86
+
87
+ namespace spp_ {
88
+
89
+ // ----------------------------------------------------------------------
90
+ // U T I L F U N C T I O N S
91
+ // ----------------------------------------------------------------------
92
+ template <class E>
93
+ inline void throw_exception(const E& exception)
94
+ {
95
+ #if !defined(SPP_NO_EXCEPTIONS)
96
+ throw exception;
97
+ #else
98
+ assert(0);
99
+ abort();
100
+ #endif
101
+ }
102
+
103
+ // ----------------------------------------------------------------------
104
+ // M U T A B L E P A I R H A C K
105
+ // turn std::pair<const K, V> into mutable std::pair<K, V>
106
+ // ----------------------------------------------------------------------
107
+ template <class T>
108
+ struct cvt
109
+ {
110
+ typedef T type;
111
+ };
112
+
113
+ template <class K, class V>
114
+ struct cvt<std::pair<const K, V> >
115
+ {
116
+ typedef std::pair<K, V> type;
117
+ };
118
+
119
+ template <class K, class V>
120
+ struct cvt<const std::pair<const K, V> >
121
+ {
122
+ typedef const std::pair<K, V> type;
123
+ };
124
+
125
+ // ----------------------------------------------------------------------
126
+ // M O V E I T E R A T O R
127
+ // ----------------------------------------------------------------------
128
+ #ifdef SPP_NO_CXX11_RVALUE_REFERENCES
129
+ #define MK_MOVE_IT(p) (p)
130
+ #else
131
+ #define MK_MOVE_IT(p) std::make_move_iterator(p)
132
+ #endif
133
+
134
+
135
+ // ----------------------------------------------------------------------
136
+ // I N T E R N A L S T U F F
137
+ // ----------------------------------------------------------------------
138
+ #ifdef SPP_NO_CXX11_STATIC_ASSERT
139
+ template <bool> struct SppCompileAssert { };
140
+ #define SPP_COMPILE_ASSERT(expr, msg) \
141
+ SPP_ATTRIBUTE_UNUSED typedef SppCompileAssert<(bool(expr))> spp_bogus_[bool(expr) ? 1 : -1]
142
+ #else
143
+ #define SPP_COMPILE_ASSERT static_assert
144
+ #endif
145
+
146
+ namespace sparsehash_internal
147
+ {
148
+
149
+ // Adaptor methods for reading/writing data from an INPUT or OUPTUT
150
+ // variable passed to serialize() or unserialize(). For now we
151
+ // have implemented INPUT/OUTPUT for FILE*, istream*/ostream* (note
152
+ // they are pointers, unlike typical use), or else a pointer to
153
+ // something that supports a Read()/Write() method.
154
+ //
155
+ // For technical reasons, we implement read_data/write_data in two
156
+ // stages. The actual work is done in *_data_internal, which takes
157
+ // the stream argument twice: once as a template type, and once with
158
+ // normal type information. (We only use the second version.) We do
159
+ // this because of how C++ picks what function overload to use. If we
160
+ // implemented this the naive way:
161
+ // bool read_data(istream* is, const void* data, size_t length);
162
+ // template<typename T> read_data(T* fp, const void* data, size_t length);
163
+ // C++ would prefer the second version for every stream type except
164
+ // istream. However, we want C++ to prefer the first version for
165
+ // streams that are *subclasses* of istream, such as istringstream.
166
+ // This is not possible given the way template types are resolved. So
167
+ // we split the stream argument in two, one of which is templated and
168
+ // one of which is not. The specialized functions (like the istream
169
+ // version above) ignore the template arg and use the second, 'type'
170
+ // arg, getting subclass matching as normal. The 'catch-all'
171
+ // functions (the second version above) use the template arg to deduce
172
+ // the type, and use a second, void* arg to achieve the desired
173
+ // 'catch-all' semantics.
174
+
175
+ // ----- low-level I/O for FILE* ----
176
+
177
+ template<typename Ignored>
178
+ inline bool read_data_internal(Ignored* /*unused*/, FILE* fp,
179
+ void* data, size_t length)
180
+ {
181
+ return fread(data, length, 1, fp) == 1;
182
+ }
183
+
184
+ template<typename Ignored>
185
+ inline bool write_data_internal(Ignored* /*unused*/, FILE* fp,
186
+ const void* data, size_t length)
187
+ {
188
+ return fwrite(data, length, 1, fp) == 1;
189
+ }
190
+
191
+ // ----- low-level I/O for iostream ----
192
+
193
+ // We want the caller to be responsible for #including <iostream>, not
194
+ // us, because iostream is a big header! According to the standard,
195
+ // it's only legal to delay the instantiation the way we want to if
196
+ // the istream/ostream is a template type. So we jump through hoops.
197
+ template<typename ISTREAM>
198
+ inline bool read_data_internal_for_istream(ISTREAM* fp,
199
+ void* data, size_t length)
200
+ {
201
+ return fp->read(reinterpret_cast<char*>(data),
202
+ static_cast<std::streamsize>(length)).good();
203
+ }
204
+ template<typename Ignored>
205
+ inline bool read_data_internal(Ignored* /*unused*/, std::istream* fp,
206
+ void* data, size_t length)
207
+ {
208
+ return read_data_internal_for_istream(fp, data, length);
209
+ }
210
+
211
+ template<typename OSTREAM>
212
+ inline bool write_data_internal_for_ostream(OSTREAM* fp,
213
+ const void* data, size_t length)
214
+ {
215
+ return fp->write(reinterpret_cast<const char*>(data),
216
+ static_cast<std::streamsize>(length)).good();
217
+ }
218
+ template<typename Ignored>
219
+ inline bool write_data_internal(Ignored* /*unused*/, std::ostream* fp,
220
+ const void* data, size_t length)
221
+ {
222
+ return write_data_internal_for_ostream(fp, data, length);
223
+ }
224
+
225
+ // ----- low-level I/O for custom streams ----
226
+
227
+ // The INPUT type needs to support a Read() method that takes a
228
+ // buffer and a length and returns the number of bytes read.
229
+ template <typename INPUT>
230
+ inline bool read_data_internal(INPUT* fp, void* /*unused*/,
231
+ void* data, size_t length)
232
+ {
233
+ return static_cast<size_t>(fp->Read(data, length)) == length;
234
+ }
235
+
236
+ // The OUTPUT type needs to support a Write() operation that takes
237
+ // a buffer and a length and returns the number of bytes written.
238
+ template <typename OUTPUT>
239
+ inline bool write_data_internal(OUTPUT* fp, void* /*unused*/,
240
+ const void* data, size_t length)
241
+ {
242
+ return static_cast<size_t>(fp->Write(data, length)) == length;
243
+ }
244
+
245
+ // ----- low-level I/O: the public API ----
246
+
247
+ template <typename INPUT>
248
+ inline bool read_data(INPUT* fp, void* data, size_t length)
249
+ {
250
+ return read_data_internal(fp, fp, data, length);
251
+ }
252
+
253
+ template <typename OUTPUT>
254
+ inline bool write_data(OUTPUT* fp, const void* data, size_t length)
255
+ {
256
+ return write_data_internal(fp, fp, data, length);
257
+ }
258
+
259
+ // Uses read_data() and write_data() to read/write an integer.
260
+ // length is the number of bytes to read/write (which may differ
261
+ // from sizeof(IntType), allowing us to save on a 32-bit system
262
+ // and load on a 64-bit system). Excess bytes are taken to be 0.
263
+ // INPUT and OUTPUT must match legal inputs to read/write_data (above).
264
+ // --------------------------------------------------------------------
265
+ template <typename INPUT, typename IntType>
266
+ bool read_bigendian_number(INPUT* fp, IntType* value, size_t length)
267
+ {
268
+ *value = 0;
269
+ unsigned char byte;
270
+ // We require IntType to be unsigned or else the shifting gets all screwy.
271
+ SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
272
+ for (size_t i = 0; i < length; ++i)
273
+ {
274
+ if (!read_data(fp, &byte, sizeof(byte)))
275
+ return false;
276
+ *value |= static_cast<IntType>(byte) << ((length - 1 - i) * 8);
277
+ }
278
+ return true;
279
+ }
280
+
281
+ template <typename OUTPUT, typename IntType>
282
+ bool write_bigendian_number(OUTPUT* fp, IntType value, size_t length)
283
+ {
284
+ unsigned char byte;
285
+ // We require IntType to be unsigned or else the shifting gets all screwy.
286
+ SPP_COMPILE_ASSERT(static_cast<IntType>(-1) > static_cast<IntType>(0), "serializing_int_requires_an_unsigned_type");
287
+ for (size_t i = 0; i < length; ++i)
288
+ {
289
+ byte = (sizeof(value) <= length-1 - i)
290
+ ? static_cast<unsigned char>(0) : static_cast<unsigned char>((value >> ((length-1 - i) * 8)) & 255);
291
+ if (!write_data(fp, &byte, sizeof(byte))) return false;
292
+ }
293
+ return true;
294
+ }
295
+
296
+ // If your keys and values are simple enough, you can pass this
297
+ // serializer to serialize()/unserialize(). "Simple enough" means
298
+ // value_type is a POD type that contains no pointers. Note,
299
+ // however, we don't try to normalize endianness.
300
+ // This is the type used for NopointerSerializer.
301
+ // ---------------------------------------------------------------
302
+ template <typename value_type> struct pod_serializer
303
+ {
304
+ template <typename INPUT>
305
+ bool operator()(INPUT* fp, value_type* value) const
306
+ {
307
+ return read_data(fp, value, sizeof(*value));
308
+ }
309
+
310
+ template <typename OUTPUT>
311
+ bool operator()(OUTPUT* fp, const value_type& value) const
312
+ {
313
+ return write_data(fp, &value, sizeof(value));
314
+ }
315
+ };
316
+
317
+
318
+ // Settings contains parameters for growing and shrinking the table.
319
+ // It also packages zero-size functor (ie. hasher).
320
+ //
321
+ // It does some munging of the hash value for the cases where
322
+ // the original hash function is not be very good.
323
+ // ---------------------------------------------------------------
324
+ template<typename Key, typename HashFunc, typename SizeType, int HT_MIN_BUCKETS>
325
+ class sh_hashtable_settings : public HashFunc
326
+ {
327
+ private:
328
+ #ifndef SPP_MIX_HASH
329
+ template <class T, int sz> struct Mixer
330
+ {
331
+ inline T operator()(T h) const { return h; }
332
+ };
333
+ #else
334
+ template <class T, int sz> struct Mixer
335
+ {
336
+ inline T operator()(T h) const;
337
+ };
338
+
339
+ template <class T> struct Mixer<T, 4>
340
+ {
341
+ inline T operator()(T h) const
342
+ {
343
+ // from Thomas Wang - https://gist.github.com/badboy/6267743
344
+ // ---------------------------------------------------------
345
+ h = (h ^ 61) ^ (h >> 16);
346
+ h = h + (h << 3);
347
+ h = h ^ (h >> 4);
348
+ h = h * 0x27d4eb2d;
349
+ h = h ^ (h >> 15);
350
+ return h;
351
+ }
352
+ };
353
+
354
+ template <class T> struct Mixer<T, 8>
355
+ {
356
+ inline T operator()(T h) const
357
+ {
358
+ // from Thomas Wang - https://gist.github.com/badboy/6267743
359
+ // ---------------------------------------------------------
360
+ h = (~h) + (h << 21); // h = (h << 21) - h - 1;
361
+ h = h ^ (h >> 24);
362
+ h = (h + (h << 3)) + (h << 8); // h * 265
363
+ h = h ^ (h >> 14);
364
+ h = (h + (h << 2)) + (h << 4); // h * 21
365
+ h = h ^ (h >> 28);
366
+ h = h + (h << 31);
367
+ return h;
368
+ }
369
+ };
370
+ #endif
371
+
372
+ public:
373
+ typedef Key key_type;
374
+ typedef HashFunc hasher;
375
+ typedef SizeType size_type;
376
+
377
+ public:
378
+ sh_hashtable_settings(const hasher& hf,
379
+ const float ht_occupancy_flt,
380
+ const float ht_empty_flt)
381
+ : hasher(hf),
382
+ enlarge_threshold_(0),
383
+ shrink_threshold_(0),
384
+ consider_shrink_(false),
385
+ num_ht_copies_(0)
386
+ {
387
+ set_enlarge_factor(ht_occupancy_flt);
388
+ set_shrink_factor(ht_empty_flt);
389
+ }
390
+
391
+ size_t hash(const key_type& v) const
392
+ {
393
+ size_t h = hasher::operator()(v);
394
+ Mixer<size_t, sizeof(size_t)> mixer;
395
+
396
+ return mixer(h);
397
+ }
398
+
399
+ float enlarge_factor() const { return enlarge_factor_; }
400
+ void set_enlarge_factor(float f) { enlarge_factor_ = f; }
401
+ float shrink_factor() const { return shrink_factor_; }
402
+ void set_shrink_factor(float f) { shrink_factor_ = f; }
403
+
404
+ size_type enlarge_threshold() const { return enlarge_threshold_; }
405
+ void set_enlarge_threshold(size_type t) { enlarge_threshold_ = t; }
406
+ size_type shrink_threshold() const { return shrink_threshold_; }
407
+ void set_shrink_threshold(size_type t) { shrink_threshold_ = t; }
408
+
409
+ size_type enlarge_size(size_type x) const { return static_cast<size_type>(x * enlarge_factor_); }
410
+ size_type shrink_size(size_type x) const { return static_cast<size_type>(x * shrink_factor_); }
411
+
412
+ bool consider_shrink() const { return consider_shrink_; }
413
+ void set_consider_shrink(bool t) { consider_shrink_ = t; }
414
+
415
+ unsigned int num_ht_copies() const { return num_ht_copies_; }
416
+ void inc_num_ht_copies() { ++num_ht_copies_; }
417
+
418
+ // Reset the enlarge and shrink thresholds
419
+ void reset_thresholds(size_type num_buckets)
420
+ {
421
+ set_enlarge_threshold(enlarge_size(num_buckets));
422
+ set_shrink_threshold(shrink_size(num_buckets));
423
+ // whatever caused us to reset already considered
424
+ set_consider_shrink(false);
425
+ }
426
+
427
+ // Caller is resposible for calling reset_threshold right after
428
+ // set_resizing_parameters.
429
+ // ------------------------------------------------------------
430
+ void set_resizing_parameters(float shrink, float grow)
431
+ {
432
+ assert(shrink >= 0);
433
+ assert(grow <= 1);
434
+ if (shrink > grow/2.0f)
435
+ shrink = grow / 2.0f; // otherwise we thrash hashtable size
436
+ set_shrink_factor(shrink);
437
+ set_enlarge_factor(grow);
438
+ }
439
+
440
+ // This is the smallest size a hashtable can be without being too crowded
441
+ // If you like, you can give a min #buckets as well as a min #elts
442
+ // ----------------------------------------------------------------------
443
+ size_type min_buckets(size_type num_elts, size_type min_buckets_wanted)
444
+ {
445
+ float enlarge = enlarge_factor();
446
+ size_type sz = HT_MIN_BUCKETS; // min buckets allowed
447
+ while (sz < min_buckets_wanted ||
448
+ num_elts >= static_cast<size_type>(sz * enlarge))
449
+ {
450
+ // This just prevents overflowing size_type, since sz can exceed
451
+ // max_size() here.
452
+ // -------------------------------------------------------------
453
+ if (static_cast<size_type>(sz * 2) < sz)
454
+ throw_exception(std::length_error("resize overflow")); // protect against overflow
455
+ sz *= 2;
456
+ }
457
+ return sz;
458
+ }
459
+
460
+ private:
461
+ size_type enlarge_threshold_; // table.size() * enlarge_factor
462
+ size_type shrink_threshold_; // table.size() * shrink_factor
463
+ float enlarge_factor_; // how full before resize
464
+ float shrink_factor_; // how empty before resize
465
+ bool consider_shrink_; // if we should try to shrink before next insert
466
+
467
+ unsigned int num_ht_copies_; // num_ht_copies is a counter incremented every Copy/Move
468
+ };
469
+
470
+ } // namespace sparsehash_internal
471
+
472
+ #undef SPP_COMPILE_ASSERT
473
+
474
+ // ----------------------------------------------------------------------
475
+ // S P A R S E T A B L E
476
+ // ----------------------------------------------------------------------
477
+ //
478
+ // A sparsetable is a random container that implements a sparse array,
479
+ // that is, an array that uses very little memory to store unassigned
480
+ // indices (in this case, between 1-2 bits per unassigned index). For
481
+ // instance, if you allocate an array of size 5 and assign a[2] = <big
482
+ // struct>, then a[2] will take up a lot of memory but a[0], a[1],
483
+ // a[3], and a[4] will not. Array elements that have a value are
484
+ // called "assigned". Array elements that have no value yet, or have
485
+ // had their value cleared using erase() or clear(), are called
486
+ // "unassigned".
487
+ //
488
+ // Unassigned values seem to have the default value of T (see below).
489
+ // Nevertheless, there is a difference between an unassigned index and
490
+ // one explicitly assigned the value of T(). The latter is considered
491
+ // assigned.
492
+ //
493
+ // Access to an array element is constant time, as is insertion and
494
+ // deletion. Insertion and deletion may be fairly slow, however:
495
+ // because of this container's memory economy, each insert and delete
496
+ // causes a memory reallocation.
497
+ //
498
+ // NOTE: You should not test(), get(), or set() any index that is
499
+ // greater than sparsetable.size(). If you need to do that, call
500
+ // resize() first.
501
+ //
502
+ // --- Template parameters
503
+ // PARAMETER DESCRIPTION DEFAULT
504
+ // T The value of the array: the type of --
505
+ // object that is stored in the array.
506
+ //
507
+ // Alloc: Allocator to use to allocate memory.
508
+ //
509
+ // --- Model of
510
+ // Random Access Container
511
+ //
512
+ // --- Type requirements
513
+ // T must be Copy Constructible. It need not be Assignable.
514
+ //
515
+ // --- Public base classes
516
+ // None.
517
+ //
518
+ // --- Members
519
+ //
520
+ // [*] All iterators are const in a sparsetable (though nonempty_iterators
521
+ // may not be). Use get() and set() to assign values, not iterators.
522
+ //
523
+ // [+] iterators are random-access iterators. nonempty_iterators are
524
+ // bidirectional iterators.
525
+
526
+ // [*] If you shrink a sparsetable using resize(), assigned elements
527
+ // past the end of the table are removed using erase(). If you grow
528
+ // a sparsetable, new unassigned indices are created.
529
+ //
530
+ // [+] Note that operator[] returns a const reference. You must use
531
+ // set() to change the value of a table element.
532
+ //
533
+ // [!] Unassignment also calls the destructor.
534
+ //
535
+ // Iterators are invalidated whenever an item is inserted or
536
+ // deleted (ie set() or erase() is used) or when the size of
537
+ // the table changes (ie resize() or clear() is used).
538
+
539
+
540
+
541
+ // ---------------------------------------------------------------------------
542
+ // Our iterator as simple as iterators can be: basically it's just
543
+ // the index into our table. Dereference, the only complicated
544
+ // thing, we punt to the table class. This just goes to show how
545
+ // much machinery STL requires to do even the most trivial tasks.
546
+ //
547
+ // A NOTE ON ASSIGNING:
548
+ // A sparse table does not actually allocate memory for entries
549
+ // that are not filled. Because of this, it becomes complicated
550
+ // to have a non-const iterator: we don't know, if the iterator points
551
+ // to a not-filled bucket, whether you plan to fill it with something
552
+ // or whether you plan to read its value (in which case you'll get
553
+ // the default bucket value). Therefore, while we can define const
554
+ // operations in a pretty 'normal' way, for non-const operations, we
555
+ // define something that returns a helper object with operator= and
556
+ // operator& that allocate a bucket lazily. We use this for table[]
557
+ // and also for regular table iterators.
558
+
559
+ // ---------------------------------------------------------------------------
560
+ // ---------------------------------------------------------------------------
561
+ // Our iterator as simple as iterators can be: basically it's just
562
+ // the index into our table. Dereference, the only complicated
563
+ // thing, we punt to the table class. This just goes to show how
564
+ // much machinery STL requires to do even the most trivial tasks.
565
+ //
566
+ // By templatizing over tabletype, we have one iterator type which
567
+ // we can use for both sparsetables and sparsebins. In fact it
568
+ // works on any class that allows size() and operator[] (eg vector),
569
+ // as long as it does the standard STL typedefs too (eg value_type).
570
+
571
+ // ---------------------------------------------------------------------------
572
+ // ---------------------------------------------------------------------------
573
+ template <class tabletype>
574
+ class table_iterator
575
+ {
576
+ public:
577
+ typedef table_iterator iterator;
578
+
579
+ typedef std::random_access_iterator_tag iterator_category;
580
+ typedef typename tabletype::value_type value_type;
581
+ typedef typename tabletype::difference_type difference_type;
582
+ typedef typename tabletype::size_type size_type;
583
+
584
+ explicit table_iterator(tabletype *tbl = 0, size_type p = 0) :
585
+ table(tbl), pos(p)
586
+ { }
587
+
588
+ // Helper function to assert things are ok; eg pos is still in range
589
+ void check() const
590
+ {
591
+ assert(table);
592
+ assert(pos <= table->size());
593
+ }
594
+
595
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
596
+ // do bounds checking, since STL doesn't consider that its job. :-)
597
+ iterator& operator+=(size_type t) { pos += t; check(); return *this; }
598
+ iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
599
+ iterator& operator++() { ++pos; check(); return *this; }
600
+ iterator& operator--() { --pos; check(); return *this; }
601
+ iterator operator++(int)
602
+ {
603
+ iterator tmp(*this); // for x++
604
+ ++pos; check(); return tmp;
605
+ }
606
+
607
+ iterator operator--(int)
608
+ {
609
+ iterator tmp(*this); // for x--
610
+ --pos; check(); return tmp;
611
+ }
612
+
613
+ iterator operator+(difference_type i) const
614
+ {
615
+ iterator tmp(*this);
616
+ tmp += i; return tmp;
617
+ }
618
+
619
+ iterator operator-(difference_type i) const
620
+ {
621
+ iterator tmp(*this);
622
+ tmp -= i; return tmp;
623
+ }
624
+
625
+ difference_type operator-(iterator it) const
626
+ {
627
+ // for "x = it2 - it"
628
+ assert(table == it.table);
629
+ return pos - it.pos;
630
+ }
631
+
632
+ // Comparisons.
633
+ bool operator==(const iterator& it) const
634
+ {
635
+ return table == it.table && pos == it.pos;
636
+ }
637
+
638
+ bool operator<(const iterator& it) const
639
+ {
640
+ assert(table == it.table); // life is bad bad bad otherwise
641
+ return pos < it.pos;
642
+ }
643
+
644
+ bool operator!=(const iterator& it) const { return !(*this == it); }
645
+ bool operator<=(const iterator& it) const { return !(it < *this); }
646
+ bool operator>(const iterator& it) const { return it < *this; }
647
+ bool operator>=(const iterator& it) const { return !(*this < it); }
648
+
649
+ // Here's the info we actually need to be an iterator
650
+ tabletype *table; // so we can dereference and bounds-check
651
+ size_type pos; // index into the table
652
+ };
653
+
654
+ // ---------------------------------------------------------------------------
655
+ // ---------------------------------------------------------------------------
656
+ template <class tabletype>
657
+ class const_table_iterator
658
+ {
659
+ public:
660
+ typedef table_iterator<tabletype> iterator;
661
+ typedef const_table_iterator const_iterator;
662
+
663
+ typedef std::random_access_iterator_tag iterator_category;
664
+ typedef typename tabletype::value_type value_type;
665
+ typedef typename tabletype::difference_type difference_type;
666
+ typedef typename tabletype::size_type size_type;
667
+ typedef typename tabletype::const_reference reference; // we're const-only
668
+ typedef typename tabletype::const_pointer pointer;
669
+
670
+ // The "real" constructor
671
+ const_table_iterator(const tabletype *tbl, size_type p)
672
+ : table(tbl), pos(p) { }
673
+
674
+ // The default constructor, used when I define vars of type table::iterator
675
+ const_table_iterator() : table(NULL), pos(0) { }
676
+
677
+ // The copy constructor, for when I say table::iterator foo = tbl.begin()
678
+ // Also converts normal iterators to const iterators // not explicit on purpose
679
+ const_table_iterator(const iterator &from)
680
+ : table(from.table), pos(from.pos) { }
681
+
682
+ // The default destructor is fine; we don't define one
683
+ // The default operator= is fine; we don't define one
684
+
685
+ // The main thing our iterator does is dereference. If the table entry
686
+ // we point to is empty, we return the default value type.
687
+ reference operator*() const { return (*table)[pos]; }
688
+ pointer operator->() const { return &(operator*()); }
689
+
690
+ // Helper function to assert things are ok; eg pos is still in range
691
+ void check() const
692
+ {
693
+ assert(table);
694
+ assert(pos <= table->size());
695
+ }
696
+
697
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
698
+ // do bounds checking, since STL doesn't consider that its job. :-)
699
+ const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
700
+ const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
701
+ const_iterator& operator++() { ++pos; check(); return *this; }
702
+ const_iterator& operator--() { --pos; check(); return *this; }
703
+ const_iterator operator++(int)
704
+ {
705
+ const_iterator tmp(*this); // for x++
706
+ ++pos; check();
707
+ return tmp;
708
+ }
709
+ const_iterator operator--(int)
710
+ {
711
+ const_iterator tmp(*this); // for x--
712
+ --pos; check();
713
+ return tmp;
714
+ }
715
+ const_iterator operator+(difference_type i) const
716
+ {
717
+ const_iterator tmp(*this);
718
+ tmp += i;
719
+ return tmp;
720
+ }
721
+ const_iterator operator-(difference_type i) const
722
+ {
723
+ const_iterator tmp(*this);
724
+ tmp -= i;
725
+ return tmp;
726
+ }
727
+ difference_type operator-(const_iterator it) const
728
+ {
729
+ // for "x = it2 - it"
730
+ assert(table == it.table);
731
+ return pos - it.pos;
732
+ }
733
+ reference operator[](difference_type n) const
734
+ {
735
+ return *(*this + n); // simple though not totally efficient
736
+ }
737
+
738
+ // Comparisons.
739
+ bool operator==(const const_iterator& it) const
740
+ {
741
+ return table == it.table && pos == it.pos;
742
+ }
743
+
744
+ bool operator<(const const_iterator& it) const
745
+ {
746
+ assert(table == it.table); // life is bad bad bad otherwise
747
+ return pos < it.pos;
748
+ }
749
+ bool operator!=(const const_iterator& it) const { return !(*this == it); }
750
+ bool operator<=(const const_iterator& it) const { return !(it < *this); }
751
+ bool operator>(const const_iterator& it) const { return it < *this; }
752
+ bool operator>=(const const_iterator& it) const { return !(*this < it); }
753
+
754
+ // Here's the info we actually need to be an iterator
755
+ const tabletype *table; // so we can dereference and bounds-check
756
+ size_type pos; // index into the table
757
+ };
758
+
759
+ // ---------------------------------------------------------------------------
760
+ // This is a 2-D iterator. You specify a begin and end over a list
761
+ // of *containers*. We iterate over each container by iterating over
762
+ // it. It's actually simple:
763
+ // VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---,
764
+ // | ________________________________________________/
765
+ // | \_> VECTOR[1].begin() --------> VECTOR[1].end() -,
766
+ // | ___________________________________________________/
767
+ // v \_> ......
768
+ // VECTOR.end()
769
+ //
770
+ // It's impossible to do random access on one of these things in constant
771
+ // time, so it's just a bidirectional iterator.
772
+ //
773
+ // Unfortunately, because we need to use this for a non-empty iterator,
774
+ // we use ne_begin() and ne_end() instead of begin() and end()
775
+ // (though only going across, not down).
776
+ // ---------------------------------------------------------------------------
777
+
778
+ // ---------------------------------------------------------------------------
779
+ // ---------------------------------------------------------------------------
780
+ template <class T, class row_it, class col_it, class iter_type>
781
+ class Two_d_iterator : public std::iterator<iter_type, T>
782
+ {
783
+ public:
784
+ typedef Two_d_iterator iterator;
785
+ typedef T value_type;
786
+
787
+ explicit Two_d_iterator(row_it curr) : row_current(curr), col_current(0)
788
+ {
789
+ if (row_current && !row_current->is_marked())
790
+ {
791
+ col_current = row_current->ne_begin();
792
+ advance_past_end(); // in case cur->begin() == cur->end()
793
+ }
794
+ }
795
+
796
+ explicit Two_d_iterator(row_it curr, col_it col) : row_current(curr), col_current(col)
797
+ {
798
+ assert(col);
799
+ }
800
+
801
+ // The default constructor
802
+ Two_d_iterator() : row_current(0), col_current(0) { }
803
+
804
+ // Need this explicitly so we can convert normal iterators <=> const iterators
805
+ // not explicit on purpose
806
+ // ---------------------------------------------------------------------------
807
+ template <class T2, class row_it2, class col_it2, class iter_type2>
808
+ Two_d_iterator(const Two_d_iterator<T2, row_it2, col_it2, iter_type2>& it) :
809
+ row_current (*(row_it *)&it.row_current),
810
+ col_current (*(col_it *)&it.col_current)
811
+ { }
812
+
813
+ // The default destructor is fine; we don't define one
814
+ // The default operator= is fine; we don't define one
815
+
816
+ value_type& operator*() const { return *(col_current); }
817
+ value_type* operator->() const { return &(operator*()); }
818
+
819
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
820
+ // do bounds checking, since STL doesn't consider that its job. :-)
821
+ // NOTE: this is not amortized constant time! What do we do about it?
822
+ // ------------------------------------------------------------------
823
+ void advance_past_end()
824
+ {
825
+ // used when col_current points to end()
826
+ while (col_current == row_current->ne_end())
827
+ {
828
+ // end of current row
829
+ // ------------------
830
+ ++row_current; // go to beginning of next
831
+ if (!row_current->is_marked()) // col is irrelevant at end
832
+ col_current = row_current->ne_begin();
833
+ else
834
+ break; // don't go past row_end
835
+ }
836
+ }
837
+
838
+ friend size_t operator-(iterator l, iterator f)
839
+ {
840
+ if (f.row_current->is_marked())
841
+ return 0;
842
+
843
+ size_t diff(0);
844
+ while (f != l)
845
+ {
846
+ ++diff;
847
+ ++f;
848
+ }
849
+ return diff;
850
+ }
851
+
852
+ iterator& operator++()
853
+ {
854
+ // assert(!row_current->is_marked()); // how to ++ from there?
855
+ ++col_current;
856
+ advance_past_end(); // in case col_current is at end()
857
+ return *this;
858
+ }
859
+
860
+ iterator& operator--()
861
+ {
862
+ while (row_current->is_marked() ||
863
+ col_current == row_current->ne_begin())
864
+ {
865
+ --row_current;
866
+ col_current = row_current->ne_end(); // this is 1 too far
867
+ }
868
+ --col_current;
869
+ return *this;
870
+ }
871
+ iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
872
+ iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
873
+
874
+
875
+ // Comparisons.
876
+ bool operator==(const iterator& it) const
877
+ {
878
+ return (row_current == it.row_current &&
879
+ (!row_current || row_current->is_marked() || col_current == it.col_current));
880
+ }
881
+
882
+ bool operator!=(const iterator& it) const { return !(*this == it); }
883
+
884
+ // Here's the info we actually need to be an iterator
885
+ // These need to be public so we convert from iterator to const_iterator
886
+ // ---------------------------------------------------------------------
887
+ row_it row_current;
888
+ col_it col_current;
889
+ };
890
+
891
+
892
+ // ---------------------------------------------------------------------------
893
+ // ---------------------------------------------------------------------------
894
+ template <class T, class row_it, class col_it, class iter_type, class Alloc>
895
+ class Two_d_destructive_iterator : public Two_d_iterator<T, row_it, col_it, iter_type>
896
+ {
897
+ public:
898
+ typedef Two_d_destructive_iterator iterator;
899
+
900
+ Two_d_destructive_iterator(Alloc &alloc, row_it curr) :
901
+ _alloc(alloc)
902
+ {
903
+ this->row_current = curr;
904
+ this->col_current = 0;
905
+ if (this->row_current && !this->row_current->is_marked())
906
+ {
907
+ this->col_current = this->row_current->ne_begin();
908
+ advance_past_end(); // in case cur->begin() == cur->end()
909
+ }
910
+ }
911
+
912
+ // Arithmetic: we just do arithmetic on pos. We don't even need to
913
+ // do bounds checking, since STL doesn't consider that its job. :-)
914
+ // NOTE: this is not amortized constant time! What do we do about it?
915
+ // ------------------------------------------------------------------
916
+ void advance_past_end()
917
+ {
918
+ // used when col_current points to end()
919
+ while (this->col_current == this->row_current->ne_end())
920
+ {
921
+ this->row_current->clear(_alloc, true); // This is what differs from non-destructive iterators above
922
+
923
+ // end of current row
924
+ // ------------------
925
+ ++this->row_current; // go to beginning of next
926
+ if (!this->row_current->is_marked()) // col is irrelevant at end
927
+ this->col_current = this->row_current->ne_begin();
928
+ else
929
+ break; // don't go past row_end
930
+ }
931
+ }
932
+
933
+ iterator& operator++()
934
+ {
935
+ // assert(!this->row_current->is_marked()); // how to ++ from there?
936
+ ++this->col_current;
937
+ advance_past_end(); // in case col_current is at end()
938
+ return *this;
939
+ }
940
+
941
+ private:
942
+ Two_d_destructive_iterator& operator=(const Two_d_destructive_iterator &o);
943
+
944
+ Alloc &_alloc;
945
+ };
946
+
947
+
948
+ // ---------------------------------------------------------------------------
949
+ // ---------------------------------------------------------------------------
950
+ #if defined(SPP_POPCNT_CHECK)
951
+ static inline bool spp_popcount_check()
952
+ {
953
+ int cpuInfo[4] = { -1 };
954
+ spp_cpuid(cpuInfo, 1);
955
+ if (cpuInfo[2] & (1 << 23))
956
+ return true; // means SPP_POPCNT supported
957
+ return false;
958
+ }
959
+ #endif
960
+
961
+ #if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT)
962
+
963
+ static inline uint32_t spp_popcount(uint32_t i)
964
+ {
965
+ static const bool s_ok = spp_popcount_check();
966
+ return s_ok ? SPP_POPCNT(i) : s_spp_popcount_default(i);
967
+ }
968
+
969
+ #else
970
+
971
+ static inline uint32_t spp_popcount(uint32_t i)
972
+ {
973
+ #if defined(SPP_POPCNT)
974
+ return static_cast<uint32_t>(SPP_POPCNT(i));
975
+ #else
976
+ return s_spp_popcount_default(i);
977
+ #endif
978
+ }
979
+
980
+ #endif
981
+
982
+ #if defined(SPP_POPCNT_CHECK) && defined(SPP_POPCNT64)
983
+
984
+ static inline uint32_t spp_popcount(uint64_t i)
985
+ {
986
+ static const bool s_ok = spp_popcount_check();
987
+ return s_ok ? (uint32_t)SPP_POPCNT64(i) : s_spp_popcount_default(i);
988
+ }
989
+
990
+ #else
991
+
992
+ static inline uint32_t spp_popcount(uint64_t i)
993
+ {
994
+ #if defined(SPP_POPCNT64)
995
+ return static_cast<uint32_t>(SPP_POPCNT64(i));
996
+ #elif 1
997
+ return s_spp_popcount_default(i);
998
+ #endif
999
+ }
1000
+
1001
+ #endif
1002
+
1003
+ // ---------------------------------------------------------------------------
1004
+ // SPARSE-TABLE
1005
+ // ------------
1006
+ // The idea is that a table with (logically) t buckets is divided
1007
+ // into t/M *groups* of M buckets each. (M is a constant, typically
1008
+ // 32) Each group is stored sparsely.
1009
+ // Thus, inserting into the table causes some array to grow, which is
1010
+ // slow but still constant time. Lookup involves doing a
1011
+ // logical-position-to-sparse-position lookup, which is also slow but
1012
+ // constant time. The larger M is, the slower these operations are
1013
+ // but the less overhead (slightly).
1014
+ //
1015
+ // To store the sparse array, we store a bitmap B, where B[i] = 1 iff
1016
+ // bucket i is non-empty. Then to look up bucket i we really look up
1017
+ // array[# of 1s before i in B]. This is constant time for fixed M.
1018
+ //
1019
+ // Terminology: the position of an item in the overall table (from
1020
+ // 1 .. t) is called its "location." The logical position in a group
1021
+ // (from 1 .. M) is called its "position." The actual location in
1022
+ // the array (from 1 .. # of non-empty buckets in the group) is
1023
+ // called its "offset."
1024
+ // ---------------------------------------------------------------------------
1025
+
1026
+ template <class T, class Alloc>
1027
+ class sparsegroup
1028
+ {
1029
+ public:
1030
+ // Basic types
1031
+ typedef T value_type;
1032
+ typedef Alloc allocator_type;
1033
+ typedef value_type& reference;
1034
+ typedef const value_type& const_reference;
1035
+ typedef value_type* pointer;
1036
+ typedef const value_type* const_pointer;
1037
+
1038
+ typedef uint8_t size_type; // max # of buckets
1039
+
1040
+ // These are our special iterators, that go over non-empty buckets in a
1041
+ // group. These aren't const-only because you can change non-empty bcks.
1042
+ // ---------------------------------------------------------------------
1043
+ typedef pointer ne_iterator;
1044
+ typedef const_pointer const_ne_iterator;
1045
+ typedef std::reverse_iterator<ne_iterator> reverse_ne_iterator;
1046
+ typedef std::reverse_iterator<const_ne_iterator> const_reverse_ne_iterator;
1047
+
1048
+ // We'll have versions for our special non-empty iterator too
1049
+ // ----------------------------------------------------------
1050
+ ne_iterator ne_begin() { return reinterpret_cast<pointer>(_group); }
1051
+ const_ne_iterator ne_begin() const { return reinterpret_cast<pointer>(_group); }
1052
+ const_ne_iterator ne_cbegin() const { return reinterpret_cast<pointer>(_group); }
1053
+ ne_iterator ne_end() { return reinterpret_cast<pointer>(_group + _num_items()); }
1054
+ const_ne_iterator ne_end() const { return reinterpret_cast<pointer>(_group + _num_items()); }
1055
+ const_ne_iterator ne_cend() const { return reinterpret_cast<pointer>(_group + _num_items()); }
1056
+ reverse_ne_iterator ne_rbegin() { return reverse_ne_iterator(ne_end()); }
1057
+ const_reverse_ne_iterator ne_rbegin() const { return const_reverse_ne_iterator(ne_cend()); }
1058
+ const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_cend()); }
1059
+ reverse_ne_iterator ne_rend() { return reverse_ne_iterator(ne_begin()); }
1060
+ const_reverse_ne_iterator ne_rend() const { return const_reverse_ne_iterator(ne_cbegin()); }
1061
+ const_reverse_ne_iterator ne_crend() const { return const_reverse_ne_iterator(ne_cbegin()); }
1062
+
1063
+ private:
1064
+ // T can be std::pair<const K, V>, but sometime we need to cast to a mutable type
1065
+ // ------------------------------------------------------------------------------
1066
+ typedef typename spp_::cvt<T>::type mutable_value_type;
1067
+ typedef mutable_value_type * mutable_pointer;
1068
+ typedef const mutable_value_type * const_mutable_pointer;
1069
+
1070
+ bool _bmtest(size_type i) const { return !!(_bitmap & (static_cast<group_bm_type>(1) << i)); }
1071
+ void _bmset(size_type i) { _bitmap |= static_cast<group_bm_type>(1) << i; }
1072
+ void _bmclear(size_type i) { _bitmap &= ~(static_cast<group_bm_type>(1) << i); }
1073
+
1074
+ bool _bme_test(size_type i) const { return !!(_bm_erased & (static_cast<group_bm_type>(1) << i)); }
1075
+ void _bme_set(size_type i) { _bm_erased |= static_cast<group_bm_type>(1) << i; }
1076
+ void _bme_clear(size_type i) { _bm_erased &= ~(static_cast<group_bm_type>(1) << i); }
1077
+
1078
+ bool _bmtest_strict(size_type i) const
1079
+ { return !!((_bitmap | _bm_erased) & (static_cast<group_bm_type>(1) << i)); }
1080
+
1081
+
1082
+ static uint32_t _sizing(uint32_t n)
1083
+ {
1084
+ #if !defined(SPP_ALLOC_SZ) || (SPP_ALLOC_SZ == 0)
1085
+ // aggressive allocation first, then decreasing as sparsegroups fill up
1086
+ // --------------------------------------------------------------------
1087
+ static uint8_t s_alloc_batch_sz[SPP_GROUP_SIZE] = { 0 };
1088
+ if (!s_alloc_batch_sz[0])
1089
+ {
1090
+ // 32 bit bitmap
1091
+ // ........ .... .... .. .. .. .. . . . . . . . .
1092
+ // 8 12 16 18 20 22 24 25 26 ... 32
1093
+ // ------------------------------------------------------
1094
+ uint8_t group_sz = SPP_GROUP_SIZE / 4;
1095
+ uint8_t group_start_alloc = SPP_GROUP_SIZE / 8; //4;
1096
+ uint8_t alloc_sz = group_start_alloc;
1097
+ for (int i=0; i<4; ++i)
1098
+ {
1099
+ for (int j=0; j<group_sz; ++j)
1100
+ {
1101
+ if (j && j % group_start_alloc == 0)
1102
+ alloc_sz += group_start_alloc;
1103
+ s_alloc_batch_sz[i * group_sz + j] = alloc_sz;
1104
+ }
1105
+ if (group_start_alloc > 2)
1106
+ group_start_alloc /= 2;
1107
+ alloc_sz += group_start_alloc;
1108
+ }
1109
+ }
1110
+
1111
+ return n ? static_cast<uint32_t>(s_alloc_batch_sz[n-1]) : 0; // more aggressive alloc at the beginning
1112
+
1113
+ #elif (SPP_ALLOC_SZ == 1)
1114
+ // use as little memory as possible - slowest insert/delete in table
1115
+ // -----------------------------------------------------------------
1116
+ return n;
1117
+ #else
1118
+ // decent compromise when SPP_ALLOC_SZ == 2
1119
+ // ----------------------------------------
1120
+ static size_type sz_minus_1 = SPP_ALLOC_SZ - 1;
1121
+ return (n + sz_minus_1) & ~sz_minus_1;
1122
+ #endif
1123
+ }
1124
+
1125
+ pointer _allocate_group(allocator_type &alloc, uint32_t n /* , bool tight = false */)
1126
+ {
1127
+ // ignore tight since we don't store num_alloc
1128
+ // num_alloc = (uint8_t)(tight ? n : _sizing(n));
1129
+
1130
+ uint32_t num_alloc = (uint8_t)_sizing(n);
1131
+ _set_num_alloc(num_alloc);
1132
+ pointer retval = alloc.allocate(static_cast<size_type>(num_alloc));
1133
+ if (retval == NULL)
1134
+ {
1135
+ // the allocator is supposed to throw an exception if the allocation fails.
1136
+ fprintf(stderr, "sparsehash FATAL ERROR: failed to allocate %d groups\n", num_alloc);
1137
+ exit(1);
1138
+ }
1139
+ return retval;
1140
+ }
1141
+
1142
+ void _free_group(allocator_type &alloc, uint32_t num_alloc)
1143
+ {
1144
+ if (_group)
1145
+ {
1146
+ uint32_t num_buckets = _num_items();
1147
+ if (num_buckets)
1148
+ {
1149
+ mutable_pointer end_it = (mutable_pointer)(_group + num_buckets);
1150
+ for (mutable_pointer p = (mutable_pointer)_group; p != end_it; ++p)
1151
+ p->~mutable_value_type();
1152
+ }
1153
+ alloc.deallocate(_group, (typename allocator_type::size_type)num_alloc);
1154
+ _group = NULL;
1155
+ }
1156
+ }
1157
+
1158
+ // private because should not be called - no allocator!
1159
+ sparsegroup &operator=(const sparsegroup& x);
1160
+
1161
+ static size_type _pos_to_offset(group_bm_type bm, size_type pos)
1162
+ {
1163
+ //return (size_type)((uint32_t)~((int32_t(-1) + pos) >> 31) & spp_popcount(bm << (SPP_GROUP_SIZE - pos)));
1164
+ //return (size_type)(pos ? spp_popcount(bm << (SPP_GROUP_SIZE - pos)) : 0);
1165
+ return static_cast<size_type>(spp_popcount(bm & ((static_cast<group_bm_type>(1) << pos) - 1)));
1166
+ }
1167
+
1168
+ public:
1169
+
1170
+ // get_iter() in sparsetable needs it
1171
+ size_type pos_to_offset(size_type pos) const
1172
+ {
1173
+ return _pos_to_offset(_bitmap, pos);
1174
+ }
1175
+
1176
+ #ifdef _MSC_VER
1177
+ #pragma warning(push)
1178
+ #pragma warning(disable : 4146)
1179
+ #endif
1180
+
1181
+ // Returns the (logical) position in the bm[] array, i, such that
1182
+ // bm[i] is the offset-th set bit in the array. It is the inverse
1183
+ // of pos_to_offset. get_pos() uses this function to find the index
1184
+ // of an ne_iterator in the table. Bit-twiddling from
1185
+ // http://hackersdelight.org/basics.pdf
1186
+ // -----------------------------------------------------------------
1187
+ static size_type offset_to_pos(group_bm_type bm, size_type offset)
1188
+ {
1189
+ for (; offset > 0; offset--)
1190
+ bm &= (bm-1); // remove right-most set bit
1191
+
1192
+ // Clear all bits to the left of the rightmost bit (the &),
1193
+ // and then clear the rightmost bit but set all bits to the
1194
+ // right of it (the -1).
1195
+ // --------------------------------------------------------
1196
+ bm = (bm & -bm) - 1;
1197
+ return static_cast<size_type>(spp_popcount(bm));
1198
+ }
1199
+
1200
+ #ifdef _MSC_VER
1201
+ #pragma warning(pop)
1202
+ #endif
1203
+
1204
+ size_type offset_to_pos(size_type offset) const
1205
+ {
1206
+ return offset_to_pos(_bitmap, offset);
1207
+ }
1208
+
1209
+ public:
1210
+ // Constructors -- default and copy -- and destructor
1211
+ explicit sparsegroup() :
1212
+ _group(0), _bitmap(0), _bm_erased(0)
1213
+ {
1214
+ _set_num_items(0);
1215
+ _set_num_alloc(0);
1216
+ }
1217
+
1218
+ sparsegroup(const sparsegroup& x) :
1219
+ _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
1220
+ {
1221
+ _set_num_items(0);
1222
+ _set_num_alloc(0);
1223
+ assert(_group == 0); if (_group) exit(1);
1224
+ }
1225
+
1226
+ sparsegroup(const sparsegroup& x, allocator_type& a) :
1227
+ _group(0), _bitmap(x._bitmap), _bm_erased(x._bm_erased)
1228
+ {
1229
+ _set_num_items(0);
1230
+ _set_num_alloc(0);
1231
+
1232
+ uint32_t num_items = x._num_items();
1233
+ if (num_items)
1234
+ {
1235
+ _group = _allocate_group(a, num_items /* , true */);
1236
+ _set_num_items(num_items);
1237
+ std::uninitialized_copy(x._group, x._group + num_items, _group);
1238
+ }
1239
+ }
1240
+
1241
+ ~sparsegroup() { assert(_group == 0); if (_group) exit(1); }
1242
+
1243
+ void destruct(allocator_type& a) { _free_group(a, _num_alloc()); }
1244
+
1245
+ // Many STL algorithms use swap instead of copy constructors
1246
+ void swap(sparsegroup& x)
1247
+ {
1248
+ using std::swap;
1249
+
1250
+ swap(_group, x._group);
1251
+ swap(_bitmap, x._bitmap);
1252
+ swap(_bm_erased, x._bm_erased);
1253
+ #ifdef SPP_STORE_NUM_ITEMS
1254
+ swap(_num_buckets, x._num_buckets);
1255
+ swap(_num_allocated, x._num_allocated);
1256
+ #endif
1257
+ }
1258
+
1259
+ // It's always nice to be able to clear a table without deallocating it
1260
+ void clear(allocator_type &alloc, bool erased)
1261
+ {
1262
+ _free_group(alloc, _num_alloc());
1263
+ _bitmap = 0;
1264
+ if (erased)
1265
+ _bm_erased = 0;
1266
+ _set_num_items(0);
1267
+ _set_num_alloc(0);
1268
+ }
1269
+
1270
+ // Functions that tell you about size. Alas, these aren't so useful
1271
+ // because our table is always fixed size.
1272
+ size_type size() const { return static_cast<size_type>(SPP_GROUP_SIZE); }
1273
+ size_type max_size() const { return static_cast<size_type>(SPP_GROUP_SIZE); }
1274
+
1275
+ bool empty() const { return false; }
1276
+
1277
+ // We also may want to know how many *used* buckets there are
1278
+ size_type num_nonempty() const { return (size_type)_num_items(); }
1279
+
1280
+ // TODO(csilvers): make protected + friend
1281
+ // This is used by sparse_hashtable to get an element from the table
1282
+ // when we know it exists.
1283
+ reference unsafe_get(size_type i) const
1284
+ {
1285
+ // assert(_bmtest(i));
1286
+ return (reference)_group[pos_to_offset(i)];
1287
+ }
1288
+
1289
+ typedef std::pair<pointer, bool> SetResult;
1290
+
1291
+ private:
1292
+ //typedef spp_::integral_constant<bool, spp_::is_relocatable<value_type>::value> check_relocatable;
1293
+ typedef spp_::true_type realloc_ok_type;
1294
+ typedef spp_::false_type realloc_not_ok_type;
1295
+
1296
+ //typedef spp_::zero_type libc_reloc_type;
1297
+ //typedef spp_::one_type spp_reloc_type;
1298
+ //typedef spp_::two_type spp_not_reloc_type;
1299
+ //typedef spp_::three_type generic_alloc_type;
1300
+
1301
+ #if 1
1302
+ typedef typename if_<((spp_::is_same<allocator_type, libc_allocator<value_type> >::value ||
1303
+ spp_::is_same<allocator_type, spp_allocator<value_type> >::value) &&
1304
+ spp_::is_relocatable<value_type>::value), realloc_ok_type, realloc_not_ok_type>::type
1305
+ check_alloc_type;
1306
+ #else
1307
+ typedef typename if_<spp_::is_same<allocator_type, spp_allocator<value_type> >::value,
1308
+ typename if_<spp_::is_relocatable<value_type>::value, spp_reloc_type, spp_not_reloc_type>::type,
1309
+ typename if_<(spp_::is_same<allocator_type, libc_allocator<value_type> >::value &&
1310
+ spp_::is_relocatable<value_type>::value), libc_reloc_type, generic_alloc_type>::type >::type
1311
+ check_alloc_type;
1312
+ #endif
1313
+
1314
+
1315
+ //typedef if_<spp_::is_same<allocator_type, libc_allocator<value_type> >::value,
1316
+ // libc_alloc_type,
1317
+ // if_<spp_::is_same<allocator_type, spp_allocator<value_type> >::value,
1318
+ // spp_alloc_type, user_alloc_type> > check_alloc_type;
1319
+
1320
+ //typedef spp_::integral_constant<bool,
1321
+ // (spp_::is_relocatable<value_type>::value &&
1322
+ // (spp_::is_same<allocator_type, spp_allocator<value_type> >::value ||
1323
+ // spp_::is_same<allocator_type, libc_allocator<value_type> >::value)) >
1324
+ // realloc_and_memmove_ok;
1325
+
1326
+ // ------------------------- memory at *p is uninitialized => need to construct
1327
+ void _init_val(mutable_value_type *p, reference val)
1328
+ {
1329
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
1330
+ ::new (p) value_type(std::move(val));
1331
+ #else
1332
+ ::new (p) value_type(val);
1333
+ #endif
1334
+ }
1335
+
1336
+ // ------------------------- memory at *p is uninitialized => need to construct
1337
+ void _init_val(mutable_value_type *p, const_reference val)
1338
+ {
1339
+ ::new (p) value_type(val);
1340
+ }
1341
+
1342
+ // ------------------------------------------------ memory at *p is initialized
1343
+ void _set_val(value_type *p, reference val)
1344
+ {
1345
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
1346
+ *(mutable_pointer)p = std::move(val);
1347
+ #else
1348
+ using std::swap;
1349
+ swap(*(mutable_pointer)p, *(mutable_pointer)&val);
1350
+ #endif
1351
+ }
1352
+
1353
+ // ------------------------------------------------ memory at *p is initialized
1354
+ void _set_val(value_type *p, const_reference val)
1355
+ {
1356
+ *(mutable_pointer)p = *(const_mutable_pointer)&val;
1357
+ }
1358
+
1359
+ // Create space at _group[offset], assuming value_type is relocatable, and the
1360
+ // allocator_type is the spp allocator.
1361
+ // return true if the slot was constructed (i.e. contains a valid value_type
1362
+ // ---------------------------------------------------------------------------------
1363
+ template <class Val>
1364
+ void _set_aux(allocator_type &alloc, size_type offset, Val &val, realloc_ok_type)
1365
+ {
1366
+ //static int x=0; if (++x < 10) printf("x\n"); // check we are getting here
1367
+
1368
+ uint32_t num_items = _num_items();
1369
+ uint32_t num_alloc = _sizing(num_items);
1370
+
1371
+ if (num_items == num_alloc)
1372
+ {
1373
+ num_alloc = _sizing(num_items + 1);
1374
+ _group = alloc.reallocate(_group, num_alloc);
1375
+ _set_num_alloc(num_alloc);
1376
+ }
1377
+
1378
+ for (uint32_t i = num_items; i > offset; --i)
1379
+ memcpy(_group + i, _group + i-1, sizeof(*_group));
1380
+
1381
+ _init_val((mutable_pointer)(_group + offset), val);
1382
+ }
1383
+
1384
+ // Create space at _group[offset], assuming value_type is *not* relocatable, and the
1385
+ // allocator_type is the spp allocator.
1386
+ // return true if the slot was constructed (i.e. contains a valid value_type
1387
+ // ---------------------------------------------------------------------------------
1388
+ template <class Val>
1389
+ void _set_aux(allocator_type &alloc, size_type offset, Val &val, realloc_not_ok_type)
1390
+ {
1391
+ uint32_t num_items = _num_items();
1392
+ uint32_t num_alloc = _sizing(num_items);
1393
+
1394
+ //assert(num_alloc == (uint32_t)_num_allocated);
1395
+ if (num_items < num_alloc)
1396
+ {
1397
+ // create new object at end and rotate it to position
1398
+ _init_val((mutable_pointer)&_group[num_items], val);
1399
+ std::rotate((mutable_pointer)(_group + offset),
1400
+ (mutable_pointer)(_group + num_items),
1401
+ (mutable_pointer)(_group + num_items + 1));
1402
+ return;
1403
+ }
1404
+
1405
+ // This is valid because 0 <= offset <= num_items
1406
+ pointer p = _allocate_group(alloc, _sizing(num_items + 1));
1407
+ if (offset)
1408
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)_group),
1409
+ MK_MOVE_IT((mutable_pointer)(_group + offset)),
1410
+ (mutable_pointer)p);
1411
+ if (num_items > offset)
1412
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset)),
1413
+ MK_MOVE_IT((mutable_pointer)(_group + num_items)),
1414
+ (mutable_pointer)(p + offset + 1));
1415
+ _init_val((mutable_pointer)(p + offset), val);
1416
+ _free_group(alloc, num_alloc);
1417
+ _group = p;
1418
+ }
1419
+
1420
+ // ----------------------------------------------------------------------------------
1421
+ template <class Val>
1422
+ void _set(allocator_type &alloc, size_type i, size_type offset, Val &val)
1423
+ {
1424
+ if (!_bmtest(i))
1425
+ {
1426
+ _set_aux(alloc, offset, val, check_alloc_type());
1427
+ _incr_num_items();
1428
+ _bmset(i);
1429
+ }
1430
+ else
1431
+ _set_val(&_group[offset], val);
1432
+ }
1433
+
1434
+ public:
1435
+
1436
+ // This returns the pointer to the inserted item
1437
+ // ---------------------------------------------
1438
+ template <class Val>
1439
+ pointer set(allocator_type &alloc, size_type i, Val &val)
1440
+ {
1441
+ _bme_clear(i); // in case this was an "erased" location
1442
+
1443
+ size_type offset = pos_to_offset(i);
1444
+ _set(alloc, i, offset, val); // may change _group pointer
1445
+ return (pointer)(_group + offset);
1446
+ }
1447
+
1448
+ // We let you see if a bucket is non-empty without retrieving it
1449
+ // -------------------------------------------------------------
1450
+ bool test(size_type i) const
1451
+ {
1452
+ return _bmtest(i);
1453
+ }
1454
+
1455
+ // also tests for erased values
1456
+ // ----------------------------
1457
+ bool test_strict(size_type i) const
1458
+ {
1459
+ return _bmtest_strict(i);
1460
+ }
1461
+
1462
+ private:
1463
+ // Shrink the array, assuming value_type is relocatable, and the
1464
+ // allocator_type is the libc allocator (supporting reallocate).
1465
+ // -------------------------------------------------------------
1466
+ void _group_erase_aux(allocator_type &alloc, size_type offset, realloc_ok_type)
1467
+ {
1468
+ // static int x=0; if (++x < 10) printf("Y\n"); // check we are getting here
1469
+ uint32_t num_items = _num_items();
1470
+ uint32_t num_alloc = _sizing(num_items);
1471
+
1472
+ if (num_items == 1)
1473
+ {
1474
+ assert(offset == 0);
1475
+ _free_group(alloc, num_alloc);
1476
+ _set_num_alloc(0);
1477
+ return;
1478
+ }
1479
+
1480
+ _group[offset].~value_type();
1481
+
1482
+ for (size_type i = offset; i < num_items - 1; ++i)
1483
+ memcpy(_group + i, _group + i + 1, sizeof(*_group));
1484
+
1485
+ if (_sizing(num_items - 1) != num_alloc)
1486
+ {
1487
+ num_alloc = _sizing(num_items - 1);
1488
+ assert(num_alloc); // because we have at least 1 item left
1489
+ _set_num_alloc(num_alloc);
1490
+ _group = alloc.reallocate(_group, num_alloc);
1491
+ }
1492
+ }
1493
+
1494
+ // Shrink the array, without any special assumptions about value_type and
1495
+ // allocator_type.
1496
+ // --------------------------------------------------------------------------
1497
+ void _group_erase_aux(allocator_type &alloc, size_type offset, realloc_not_ok_type)
1498
+ {
1499
+ uint32_t num_items = _num_items();
1500
+ uint32_t num_alloc = _sizing(num_items);
1501
+
1502
+ if (_sizing(num_items - 1) != num_alloc)
1503
+ {
1504
+ pointer p = 0;
1505
+ if (num_items > 1)
1506
+ {
1507
+ p = _allocate_group(alloc, num_items - 1);
1508
+ if (offset)
1509
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group)),
1510
+ MK_MOVE_IT((mutable_pointer)(_group + offset)),
1511
+ (mutable_pointer)(p));
1512
+ if (static_cast<uint32_t>(offset + 1) < num_items)
1513
+ std::uninitialized_copy(MK_MOVE_IT((mutable_pointer)(_group + offset + 1)),
1514
+ MK_MOVE_IT((mutable_pointer)(_group + num_items)),
1515
+ (mutable_pointer)(p + offset));
1516
+ }
1517
+ else
1518
+ {
1519
+ assert(offset == 0);
1520
+ _set_num_alloc(0);
1521
+ }
1522
+ _free_group(alloc, num_alloc);
1523
+ _group = p;
1524
+ }
1525
+ else
1526
+ {
1527
+ std::rotate((mutable_pointer)(_group + offset),
1528
+ (mutable_pointer)(_group + offset + 1),
1529
+ (mutable_pointer)(_group + num_items));
1530
+ ((mutable_pointer)(_group + num_items - 1))->~mutable_value_type();
1531
+ }
1532
+ }
1533
+
1534
+ void _group_erase(allocator_type &alloc, size_type offset)
1535
+ {
1536
+ _group_erase_aux(alloc, offset, check_alloc_type());
1537
+ }
1538
+
1539
+ public:
1540
+ template <class twod_iter>
1541
+ bool erase_ne(allocator_type &alloc, twod_iter &it)
1542
+ {
1543
+ assert(_group && it.col_current != ne_end());
1544
+ size_type offset = (size_type)(it.col_current - ne_begin());
1545
+ size_type pos = offset_to_pos(offset);
1546
+
1547
+ if (_num_items() <= 1)
1548
+ {
1549
+ clear(alloc, false);
1550
+ it.col_current = 0;
1551
+ }
1552
+ else
1553
+ {
1554
+ _group_erase(alloc, offset);
1555
+ _decr_num_items();
1556
+ _bmclear(pos);
1557
+
1558
+ // in case _group_erase reallocated the buffer
1559
+ it.col_current = reinterpret_cast<pointer>(_group) + offset;
1560
+ }
1561
+ _bme_set(pos); // remember that this position has been erased
1562
+ it.advance_past_end();
1563
+ return true;
1564
+ }
1565
+
1566
+
1567
+ // This takes the specified elements out of the group. This is
1568
+ // "undefining", rather than "clearing".
1569
+ // TODO(austern): Make this exception safe: handle exceptions from
1570
+ // value_type's copy constructor.
1571
+ // ---------------------------------------------------------------
1572
+ void erase(allocator_type &alloc, size_type i)
1573
+ {
1574
+ if (_bmtest(i))
1575
+ {
1576
+ // trivial to erase empty bucket
1577
+ if (_num_items() == 1)
1578
+ clear(alloc, false);
1579
+ else
1580
+ {
1581
+ _group_erase(alloc, pos_to_offset(i));
1582
+ _decr_num_items();
1583
+ _bmclear(i);
1584
+ }
1585
+ _bme_set(i); // remember that this position has been erased
1586
+ }
1587
+ }
1588
+
1589
+ // I/O
1590
+ // We support reading and writing groups to disk. We don't store
1591
+ // the actual array contents (which we don't know how to store),
1592
+ // just the bitmap and size. Meant to be used with table I/O.
1593
+ // --------------------------------------------------------------
1594
+ template <typename OUTPUT> bool write_metadata(OUTPUT *fp) const
1595
+ {
1596
+ // warning: we write 4 or 8 bytes for the bitmap, instead of 6 in the
1597
+ // original google sparsehash
1598
+ // ------------------------------------------------------------------
1599
+ if (!sparsehash_internal::write_data(fp, &_bitmap, sizeof(_bitmap)))
1600
+ return false;
1601
+
1602
+ return true;
1603
+ }
1604
+
1605
+ // Reading destroys the old group contents! Returns true if all was ok.
1606
+ template <typename INPUT> bool read_metadata(allocator_type &alloc, INPUT *fp)
1607
+ {
1608
+ clear(alloc, true);
1609
+
1610
+ if (!sparsehash_internal::read_data(fp, &_bitmap, sizeof(_bitmap)))
1611
+ return false;
1612
+
1613
+ // We'll allocate the space, but we won't fill it: it will be
1614
+ // left as uninitialized raw memory.
1615
+ uint32_t num_items = spp_popcount(_bitmap); // yes, _num_buckets not set
1616
+ _set_num_items(num_items);
1617
+ _group = num_items ? _allocate_group(alloc, num_items/* , true */) : 0;
1618
+ return true;
1619
+ }
1620
+
1621
+ // Again, only meaningful if value_type is a POD.
1622
+ template <typename INPUT> bool read_nopointer_data(INPUT *fp)
1623
+ {
1624
+ for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
1625
+ if (!sparsehash_internal::read_data(fp, &(*it), sizeof(*it)))
1626
+ return false;
1627
+ return true;
1628
+ }
1629
+
1630
+ // If your keys and values are simple enough, we can write them
1631
+ // to disk for you. "simple enough" means POD and no pointers.
1632
+ // However, we don't try to normalize endianness.
1633
+ // ------------------------------------------------------------
1634
+ template <typename OUTPUT> bool write_nopointer_data(OUTPUT *fp) const
1635
+ {
1636
+ for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
1637
+ if (!sparsehash_internal::write_data(fp, &(*it), sizeof(*it)))
1638
+ return false;
1639
+ return true;
1640
+ }
1641
+
1642
+
1643
+ // Comparisons. We only need to define == and < -- we get
1644
+ // != > <= >= via relops.h (which we happily included above).
1645
+ // Note the comparisons are pretty arbitrary: we compare
1646
+ // values of the first index that isn't equal (using default
1647
+ // value for empty buckets).
1648
+ // ---------------------------------------------------------
1649
+ bool operator==(const sparsegroup& x) const
1650
+ {
1651
+ return (_bitmap == x._bitmap &&
1652
+ _bm_erased == x._bm_erased &&
1653
+ std::equal(_group, _group + _num_items(), x._group));
1654
+ }
1655
+
1656
+ bool operator<(const sparsegroup& x) const
1657
+ {
1658
+ // also from <algorithm>
1659
+ return std::lexicographical_compare(_group, _group + _num_items(),
1660
+ x._group, x._group + x._num_items());
1661
+ }
1662
+
1663
+ bool operator!=(const sparsegroup& x) const { return !(*this == x); }
1664
+ bool operator<=(const sparsegroup& x) const { return !(x < *this); }
1665
+ bool operator> (const sparsegroup& x) const { return x < *this; }
1666
+ bool operator>=(const sparsegroup& x) const { return !(*this < x); }
1667
+
1668
+ void mark() { _group = (value_type *)static_cast<uintptr_t>(-1); }
1669
+ bool is_marked() const { return _group == (value_type *)static_cast<uintptr_t>(-1); }
1670
+
1671
+ private:
1672
+ // ---------------------------------------------------------------------------
1673
+ template <class A>
1674
+ class alloc_impl : public A
1675
+ {
1676
+ public:
1677
+ typedef typename A::pointer pointer;
1678
+ typedef typename A::size_type size_type;
1679
+
1680
+ // Convert a normal allocator to one that has realloc_or_die()
1681
+ explicit alloc_impl(const A& a) : A(a) { }
1682
+
1683
+ // realloc_or_die should only be used when using the default
1684
+ // allocator (spp::spp_allocator).
1685
+ pointer realloc_or_die(pointer /*ptr*/, size_type /*n*/)
1686
+ {
1687
+ fprintf(stderr, "realloc_or_die is only supported for "
1688
+ "spp::spp_allocator\n");
1689
+ exit(1);
1690
+ return NULL;
1691
+ }
1692
+ };
1693
+
1694
+ // A template specialization of alloc_impl for
1695
+ // spp::libc_allocator that can handle realloc_or_die.
1696
+ // -----------------------------------------------------------
1697
+ template <class A>
1698
+ class alloc_impl<spp_::libc_allocator<A> > : public spp_::libc_allocator<A>
1699
+ {
1700
+ public:
1701
+ typedef typename spp_::libc_allocator<A>::pointer pointer;
1702
+ typedef typename spp_::libc_allocator<A>::size_type size_type;
1703
+
1704
+ explicit alloc_impl(const spp_::libc_allocator<A>& a)
1705
+ : spp_::libc_allocator<A>(a)
1706
+ { }
1707
+
1708
+ pointer realloc_or_die(pointer ptr, size_type n)
1709
+ {
1710
+ pointer retval = this->reallocate(ptr, n);
1711
+ if (retval == NULL)
1712
+ {
1713
+ fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
1714
+ "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
1715
+ exit(1);
1716
+ }
1717
+ return retval;
1718
+ }
1719
+ };
1720
+
1721
+ // A template specialization of alloc_impl for
1722
+ // spp::spp_allocator that can handle realloc_or_die.
1723
+ // -----------------------------------------------------------
1724
+ template <class A>
1725
+ class alloc_impl<spp_::spp_allocator<A> > : public spp_::spp_allocator<A>
1726
+ {
1727
+ public:
1728
+ typedef typename spp_::spp_allocator<A>::pointer pointer;
1729
+ typedef typename spp_::spp_allocator<A>::size_type size_type;
1730
+
1731
+ explicit alloc_impl(const spp_::spp_allocator<A>& a)
1732
+ : spp_::spp_allocator<A>(a)
1733
+ { }
1734
+
1735
+ pointer realloc_or_die(pointer ptr, size_type n)
1736
+ {
1737
+ pointer retval = this->reallocate(ptr, n);
1738
+ if (retval == NULL)
1739
+ {
1740
+ fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
1741
+ "%lu elements for ptr %p", static_cast<unsigned long>(n), ptr);
1742
+ exit(1);
1743
+ }
1744
+ return retval;
1745
+ }
1746
+ };
1747
+
1748
+
1749
+ #ifdef SPP_STORE_NUM_ITEMS
1750
+ uint32_t _num_items() const { return (uint32_t)_num_buckets; }
1751
+ void _set_num_items(uint32_t val) { _num_buckets = static_cast<size_type>(val); }
1752
+ void _incr_num_items() { ++_num_buckets; }
1753
+ void _decr_num_items() { --_num_buckets; }
1754
+ uint32_t _num_alloc() const { return (uint32_t)_num_allocated; }
1755
+ void _set_num_alloc(uint32_t val) { _num_allocated = static_cast<size_type>(val); }
1756
+ #else
1757
+ uint32_t _num_items() const { return spp_popcount(_bitmap); }
1758
+ void _set_num_items(uint32_t ) { }
1759
+ void _incr_num_items() { }
1760
+ void _decr_num_items() { }
1761
+ uint32_t _num_alloc() const { return _sizing(_num_items()); }
1762
+ void _set_num_alloc(uint32_t val) { }
1763
+ #endif
1764
+
1765
+ // The actual data
1766
+ // ---------------
1767
+ value_type * _group; // (small) array of T's
1768
+ group_bm_type _bitmap;
1769
+ group_bm_type _bm_erased; // ones where items have been erased
1770
+
1771
+ #ifdef SPP_STORE_NUM_ITEMS
1772
+ size_type _num_buckets;
1773
+ size_type _num_allocated;
1774
+ #endif
1775
+ };
1776
+
1777
+ // ---------------------------------------------------------------------------
1778
+ // ---------------------------------------------------------------------------
1779
+ template <class T, class Alloc>
1780
+ class sparsetable
1781
+ {
1782
+ public:
1783
+ typedef T value_type;
1784
+ typedef Alloc allocator_type;
1785
+ typedef sparsegroup<value_type, allocator_type> group_type;
1786
+
1787
+ private:
1788
+ typedef typename Alloc::template rebind<group_type>::other group_alloc_type;
1789
+ typedef typename group_alloc_type::size_type group_size_type;
1790
+
1791
+ public:
1792
+ // Basic types
1793
+ // -----------
1794
+ typedef typename allocator_type::size_type size_type;
1795
+ typedef typename allocator_type::difference_type difference_type;
1796
+ typedef value_type& reference;
1797
+ typedef const value_type& const_reference;
1798
+ typedef value_type* pointer;
1799
+ typedef const value_type* const_pointer;
1800
+
1801
+ typedef group_type& GroupsReference;
1802
+ typedef const group_type& GroupsConstReference;
1803
+
1804
+ typedef typename group_type::ne_iterator ColIterator;
1805
+ typedef typename group_type::const_ne_iterator ColConstIterator;
1806
+
1807
+ typedef table_iterator<sparsetable<T, allocator_type> > iterator; // defined with index
1808
+ typedef const_table_iterator<sparsetable<T, allocator_type> > const_iterator; // defined with index
1809
+ typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
1810
+ typedef std::reverse_iterator<iterator> reverse_iterator;
1811
+
1812
+ // These are our special iterators, that go over non-empty buckets in a
1813
+ // table. These aren't const only because you can change non-empty bcks.
1814
+ // ----------------------------------------------------------------------
1815
+ typedef Two_d_iterator<T,
1816
+ group_type *,
1817
+ ColIterator,
1818
+ std::bidirectional_iterator_tag> ne_iterator;
1819
+
1820
+ typedef Two_d_iterator<const T,
1821
+ const group_type *,
1822
+ ColConstIterator,
1823
+ std::bidirectional_iterator_tag> const_ne_iterator;
1824
+
1825
+ // Another special iterator: it frees memory as it iterates (used to resize).
1826
+ // Obviously, you can only iterate over it once, which is why it's an input iterator
1827
+ // ---------------------------------------------------------------------------------
1828
+ typedef Two_d_destructive_iterator<T,
1829
+ group_type *,
1830
+ ColIterator,
1831
+ std::input_iterator_tag,
1832
+ allocator_type> destructive_iterator;
1833
+
1834
+ typedef std::reverse_iterator<ne_iterator> reverse_ne_iterator;
1835
+ typedef std::reverse_iterator<const_ne_iterator> const_reverse_ne_iterator;
1836
+
1837
+
1838
+ // Iterator functions
1839
+ // ------------------
1840
+ iterator begin() { return iterator(this, 0); }
1841
+ const_iterator begin() const { return const_iterator(this, 0); }
1842
+ const_iterator cbegin() const { return const_iterator(this, 0); }
1843
+ iterator end() { return iterator(this, size()); }
1844
+ const_iterator end() const { return const_iterator(this, size()); }
1845
+ const_iterator cend() const { return const_iterator(this, size()); }
1846
+ reverse_iterator rbegin() { return reverse_iterator(end()); }
1847
+ const_reverse_iterator rbegin() const { return const_reverse_iterator(cend()); }
1848
+ const_reverse_iterator crbegin() const { return const_reverse_iterator(cend()); }
1849
+ reverse_iterator rend() { return reverse_iterator(begin()); }
1850
+ const_reverse_iterator rend() const { return const_reverse_iterator(cbegin()); }
1851
+ const_reverse_iterator crend() const { return const_reverse_iterator(cbegin()); }
1852
+
1853
+ // Versions for our special non-empty iterator
1854
+ // ------------------------------------------
1855
+ ne_iterator ne_begin() { return ne_iterator (_first_group); }
1856
+ const_ne_iterator ne_begin() const { return const_ne_iterator(_first_group); }
1857
+ const_ne_iterator ne_cbegin() const { return const_ne_iterator(_first_group); }
1858
+ ne_iterator ne_end() { return ne_iterator (_last_group); }
1859
+ const_ne_iterator ne_end() const { return const_ne_iterator(_last_group); }
1860
+ const_ne_iterator ne_cend() const { return const_ne_iterator(_last_group); }
1861
+
1862
+ reverse_ne_iterator ne_rbegin() { return reverse_ne_iterator(ne_end()); }
1863
+ const_reverse_ne_iterator ne_rbegin() const { return const_reverse_ne_iterator(ne_end()); }
1864
+ const_reverse_ne_iterator ne_crbegin() const { return const_reverse_ne_iterator(ne_end()); }
1865
+ reverse_ne_iterator ne_rend() { return reverse_ne_iterator(ne_begin()); }
1866
+ const_reverse_ne_iterator ne_rend() const { return const_reverse_ne_iterator(ne_begin()); }
1867
+ const_reverse_ne_iterator ne_crend() const { return const_reverse_ne_iterator(ne_begin()); }
1868
+
1869
+ destructive_iterator destructive_begin()
1870
+ {
1871
+ return destructive_iterator(_alloc, _first_group);
1872
+ }
1873
+
1874
+ destructive_iterator destructive_end()
1875
+ {
1876
+ return destructive_iterator(_alloc, _last_group);
1877
+ }
1878
+
1879
+ // How to deal with the proper group
1880
+ static group_size_type num_groups(size_type num)
1881
+ {
1882
+ // how many to hold num buckets
1883
+ return num == 0 ? (group_size_type)0 :
1884
+ (group_size_type)(((num-1) / SPP_GROUP_SIZE) + 1);
1885
+ }
1886
+
1887
+ typename group_type::size_type pos_in_group(size_type i) const
1888
+ {
1889
+ return static_cast<typename group_type::size_type>(i & SPP_MASK_);
1890
+ }
1891
+
1892
+ size_type group_num(size_type i) const
1893
+ {
1894
+ return (size_type)(i >> SPP_SHIFT_);
1895
+ }
1896
+
1897
+ GroupsReference which_group(size_type i)
1898
+ {
1899
+ return _first_group[group_num(i)];
1900
+ }
1901
+
1902
+ GroupsConstReference which_group(size_type i) const
1903
+ {
1904
+ return _first_group[group_num(i)];
1905
+ }
1906
+
1907
+ void _alloc_group_array(group_size_type sz, group_type *&first, group_type *&last)
1908
+ {
1909
+ if (sz)
1910
+ {
1911
+ first = _group_alloc.allocate((size_type)(sz + 1)); // + 1 for end marker
1912
+ first[sz].mark(); // for the ne_iterator
1913
+ last = first + sz;
1914
+ }
1915
+ }
1916
+
1917
+ void _free_group_array(group_type *&first, group_type *&last)
1918
+ {
1919
+ if (first)
1920
+ {
1921
+ _group_alloc.deallocate(first, (group_size_type)(last - first + 1)); // + 1 for end marker
1922
+ first = last = 0;
1923
+ }
1924
+ }
1925
+
1926
+ void _allocate_groups(size_type sz)
1927
+ {
1928
+ if (sz)
1929
+ {
1930
+ _alloc_group_array(sz, _first_group, _last_group);
1931
+ std::uninitialized_fill(_first_group, _last_group, group_type());
1932
+ }
1933
+ }
1934
+
1935
+ void _free_groups()
1936
+ {
1937
+ if (_first_group)
1938
+ {
1939
+ for (group_type *g = _first_group; g != _last_group; ++g)
1940
+ g->destruct(_alloc);
1941
+ _free_group_array(_first_group, _last_group);
1942
+ }
1943
+ }
1944
+
1945
+ void _cleanup()
1946
+ {
1947
+ _free_groups(); // sets _first_group = _last_group = 0
1948
+ _table_size = 0;
1949
+ _num_buckets = 0;
1950
+ }
1951
+
1952
+ void _init()
1953
+ {
1954
+ _first_group = 0;
1955
+ _last_group = 0;
1956
+ _table_size = 0;
1957
+ _num_buckets = 0;
1958
+ }
1959
+
1960
+ void _copy(const sparsetable &o)
1961
+ {
1962
+ _table_size = o._table_size;
1963
+ _num_buckets = o._num_buckets;
1964
+ _alloc = o._alloc; // todo - copy or move allocator according to...
1965
+ _group_alloc = o._group_alloc; // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
1966
+
1967
+ group_size_type sz = (group_size_type)(o._last_group - o._first_group);
1968
+ if (sz)
1969
+ {
1970
+ _alloc_group_array(sz, _first_group, _last_group);
1971
+ for (group_size_type i=0; i<sz; ++i)
1972
+ new (_first_group + i) group_type(o._first_group[i], _alloc);
1973
+ }
1974
+ }
1975
+
1976
+ public:
1977
+ // Constructors -- default, normal (when you specify size), and copy
1978
+ explicit sparsetable(size_type sz = 0, const allocator_type &alloc = allocator_type()) :
1979
+ _first_group(0),
1980
+ _last_group(0),
1981
+ _table_size(sz),
1982
+ _num_buckets(0),
1983
+ _alloc(alloc)
1984
+ // todo - copy or move allocator according to
1985
+ // http://en.cppreference.com/w/cpp/container/unordered_map/unordered_map
1986
+ {
1987
+ _allocate_groups(num_groups(sz));
1988
+ }
1989
+
1990
+ ~sparsetable()
1991
+ {
1992
+ _free_groups();
1993
+ }
1994
+
1995
+ sparsetable(const sparsetable &o)
1996
+ {
1997
+ _init();
1998
+ _copy(o);
1999
+ }
2000
+
2001
+ sparsetable& operator=(const sparsetable &o)
2002
+ {
2003
+ _cleanup();
2004
+ _copy(o);
2005
+ return *this;
2006
+ }
2007
+
2008
+
2009
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
2010
+ sparsetable(sparsetable&& o)
2011
+ {
2012
+ _init();
2013
+ this->swap(o);
2014
+ }
2015
+
2016
+ sparsetable(sparsetable&& o, const allocator_type &alloc)
2017
+ {
2018
+ _init();
2019
+ this->swap(o);
2020
+ _alloc = alloc; // [gp todo] is this correct?
2021
+ }
2022
+
2023
+ sparsetable& operator=(sparsetable&& o)
2024
+ {
2025
+ _cleanup();
2026
+ this->swap(o);
2027
+ return *this;
2028
+ }
2029
+ #endif
2030
+
2031
+ // Many STL algorithms use swap instead of copy constructors
2032
+ void swap(sparsetable& o)
2033
+ {
2034
+ using std::swap;
2035
+
2036
+ swap(_first_group, o._first_group);
2037
+ swap(_last_group, o._last_group);
2038
+ swap(_table_size, o._table_size);
2039
+ swap(_num_buckets, o._num_buckets);
2040
+ if (_alloc != o._alloc)
2041
+ swap(_alloc, o._alloc);
2042
+ if (_group_alloc != o._group_alloc)
2043
+ swap(_group_alloc, o._group_alloc);
2044
+ }
2045
+
2046
+ // It's always nice to be able to clear a table without deallocating it
2047
+ void clear()
2048
+ {
2049
+ _free_groups();
2050
+ _num_buckets = 0;
2051
+ _table_size = 0;
2052
+ }
2053
+
2054
+ inline allocator_type get_allocator() const
2055
+ {
2056
+ return _alloc;
2057
+ }
2058
+
2059
+
2060
+ // Functions that tell you about size.
2061
+ // NOTE: empty() is non-intuitive! It does not tell you the number
2062
+ // of not-empty buckets (use num_nonempty() for that). Instead
2063
+ // it says whether you've allocated any buckets or not.
2064
+ // ----------------------------------------------------------------
2065
+ size_type size() const { return _table_size; }
2066
+ size_type max_size() const { return _alloc.max_size(); }
2067
+ bool empty() const { return _table_size == 0; }
2068
+ size_type num_nonempty() const { return _num_buckets; }
2069
+
2070
+ // OK, we'll let you resize one of these puppies
2071
+ void resize(size_type new_size)
2072
+ {
2073
+ group_size_type sz = num_groups(new_size);
2074
+ group_size_type old_sz = (group_size_type)(_last_group - _first_group);
2075
+
2076
+ if (sz != old_sz)
2077
+ {
2078
+ // resize group array
2079
+ // ------------------
2080
+ group_type *first = 0, *last = 0;
2081
+ if (sz)
2082
+ {
2083
+ _alloc_group_array(sz, first, last);
2084
+ memcpy(first, _first_group, sizeof(*first) * (std::min)(sz, old_sz));
2085
+ }
2086
+
2087
+ if (sz < old_sz)
2088
+ {
2089
+ for (group_type *g = _first_group + sz; g != _last_group; ++g)
2090
+ g->destruct(_alloc);
2091
+ }
2092
+ else
2093
+ std::uninitialized_fill(first + old_sz, last, group_type());
2094
+
2095
+ _free_group_array(_first_group, _last_group);
2096
+ _first_group = first;
2097
+ _last_group = last;
2098
+ }
2099
+ #if 0
2100
+ // used only in test program
2101
+ // todo: fix if sparsetable to be used directly
2102
+ // --------------------------------------------
2103
+ if (new_size < _table_size)
2104
+ {
2105
+ // lower num_buckets, clear last group
2106
+ if (pos_in_group(new_size) > 0) // need to clear inside last group
2107
+ groups.back().erase(_alloc, groups.back().begin() + pos_in_group(new_size),
2108
+ groups.back().end());
2109
+ _num_buckets = 0; // refigure # of used buckets
2110
+ for (const group_type *group = _first_group; group != _last_group; ++group)
2111
+ _num_buckets += group->num_nonempty();
2112
+ }
2113
+ #endif
2114
+ _table_size = new_size;
2115
+ }
2116
+
2117
+ // We let you see if a bucket is non-empty without retrieving it
2118
+ // -------------------------------------------------------------
2119
+ bool test(size_type i) const
2120
+ {
2121
+ // assert(i < _table_size);
2122
+ return which_group(i).test(pos_in_group(i));
2123
+ }
2124
+
2125
+ // also tests for erased values
2126
+ // ----------------------------
2127
+ bool test_strict(size_type i) const
2128
+ {
2129
+ // assert(i < _table_size);
2130
+ return which_group(i).test_strict(pos_in_group(i));
2131
+ }
2132
+
2133
+ friend struct GrpPos;
2134
+
2135
+ struct GrpPos
2136
+ {
2137
+ typedef typename sparsetable::ne_iterator ne_iter;
2138
+ GrpPos(const sparsetable &table, size_type i) :
2139
+ grp(table.which_group(i)), pos(table.pos_in_group(i)) {}
2140
+
2141
+ bool test_strict() const { return grp.test_strict(pos); }
2142
+ bool test() const { return grp.test(pos); }
2143
+ typename sparsetable::reference unsafe_get() const { return grp.unsafe_get(pos); }
2144
+ ne_iter get_iter(typename sparsetable::reference ref)
2145
+ {
2146
+ return ne_iter((group_type *)&grp, &ref);
2147
+ }
2148
+
2149
+ void erase(sparsetable &table) // item *must* be present
2150
+ {
2151
+ assert(table._num_buckets);
2152
+ ((group_type &)grp).erase(table._alloc, pos);
2153
+ --table._num_buckets;
2154
+ }
2155
+
2156
+ private:
2157
+ GrpPos* operator=(const GrpPos&);
2158
+
2159
+ const group_type &grp;
2160
+ typename group_type::size_type pos;
2161
+ };
2162
+
2163
+ bool test(iterator pos) const
2164
+ {
2165
+ return which_group(pos.pos).test(pos_in_group(pos.pos));
2166
+ }
2167
+
2168
+ bool test(const_iterator pos) const
2169
+ {
2170
+ return which_group(pos.pos).test(pos_in_group(pos.pos));
2171
+ }
2172
+
2173
+ // TODO(csilvers): make protected + friend
2174
+ // This is used by sparse_hashtable to get an element from the table
2175
+ // when we know it exists (because the caller has called test(i)).
2176
+ // -----------------------------------------------------------------
2177
+ reference unsafe_get(size_type i) const
2178
+ {
2179
+ assert(i < _table_size);
2180
+ // assert(test(i));
2181
+ return which_group(i).unsafe_get(pos_in_group(i));
2182
+ }
2183
+
2184
+ // Needed for hashtables, gets as a ne_iterator. Crashes for empty bcks
2185
+ const_ne_iterator get_iter(size_type i) const
2186
+ {
2187
+ //assert(test(i)); // how can a ne_iterator point to an empty bucket?
2188
+
2189
+ size_type grp_idx = group_num(i);
2190
+
2191
+ return const_ne_iterator(_first_group + grp_idx,
2192
+ (_first_group[grp_idx].ne_begin() +
2193
+ _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
2194
+ }
2195
+
2196
+ const_ne_iterator get_iter(size_type i, ColIterator col_it) const
2197
+ {
2198
+ return const_ne_iterator(_first_group + group_num(i), col_it);
2199
+ }
2200
+
2201
+ // For nonempty we can return a non-const version
2202
+ ne_iterator get_iter(size_type i)
2203
+ {
2204
+ //assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
2205
+
2206
+ size_type grp_idx = group_num(i);
2207
+
2208
+ return ne_iterator(_first_group + grp_idx,
2209
+ (_first_group[grp_idx].ne_begin() +
2210
+ _first_group[grp_idx].pos_to_offset(pos_in_group(i))));
2211
+ }
2212
+
2213
+ ne_iterator get_iter(size_type i, ColIterator col_it)
2214
+ {
2215
+ return ne_iterator(_first_group + group_num(i), col_it);
2216
+ }
2217
+
2218
+ // And the reverse transformation.
2219
+ size_type get_pos(const const_ne_iterator& it) const
2220
+ {
2221
+ difference_type current_row = it.row_current - _first_group;
2222
+ difference_type current_col = (it.col_current - _first_group[current_row].ne_begin());
2223
+ return ((current_row * SPP_GROUP_SIZE) +
2224
+ _first_group[current_row].offset_to_pos(current_col));
2225
+ }
2226
+
2227
+ // Val can be reference or const_reference
2228
+ // ---------------------------------------
2229
+ template <class Val>
2230
+ reference set(size_type i, Val &val)
2231
+ {
2232
+ assert(i < _table_size);
2233
+ group_type &group = which_group(i);
2234
+ typename group_type::size_type old_numbuckets = group.num_nonempty();
2235
+ pointer p(group.set(_alloc, pos_in_group(i), val));
2236
+ _num_buckets += group.num_nonempty() - old_numbuckets;
2237
+ return *p;
2238
+ }
2239
+
2240
+ // used in _move_from (where we can move the old value instead of copying it
2241
+ void move(size_type i, reference val)
2242
+ {
2243
+ assert(i < _table_size);
2244
+ which_group(i).set(_alloc, pos_in_group(i), val);
2245
+ ++_num_buckets;
2246
+ }
2247
+
2248
+ // This takes the specified elements out of the table.
2249
+ // --------------------------------------------------
2250
+ void erase(size_type i)
2251
+ {
2252
+ assert(i < _table_size);
2253
+
2254
+ GroupsReference grp(which_group(i));
2255
+ typename group_type::size_type old_numbuckets = grp.num_nonempty();
2256
+ grp.erase(_alloc, pos_in_group(i));
2257
+ _num_buckets += grp.num_nonempty() - old_numbuckets;
2258
+ }
2259
+
2260
+ void erase(iterator pos)
2261
+ {
2262
+ erase(pos.pos);
2263
+ }
2264
+
2265
+ void erase(iterator start_it, iterator end_it)
2266
+ {
2267
+ // This could be more efficient, but then we'd need to figure
2268
+ // out if we spanned groups or not. Doesn't seem worth it.
2269
+ for (; start_it != end_it; ++start_it)
2270
+ erase(start_it);
2271
+ }
2272
+
2273
+ const_ne_iterator erase(const_ne_iterator it)
2274
+ {
2275
+ ne_iterator res(it);
2276
+ if (res.row_current->erase_ne(_alloc, res))
2277
+ _num_buckets--;
2278
+ return res;
2279
+ }
2280
+
2281
+ const_ne_iterator erase(const_ne_iterator f, const_ne_iterator l)
2282
+ {
2283
+ size_t diff = l - f;
2284
+ while (diff--)
2285
+ f = erase(f);
2286
+ return f;
2287
+ }
2288
+
2289
+ // We support reading and writing tables to disk. We don't store
2290
+ // the actual array contents (which we don't know how to store),
2291
+ // just the groups and sizes. Returns true if all went ok.
2292
+
2293
+ private:
2294
+ // Every time the disk format changes, this should probably change too
2295
+ typedef unsigned long MagicNumberType;
2296
+ static const MagicNumberType MAGIC_NUMBER = 0x24687531;
2297
+
2298
+ // Old versions of this code write all data in 32 bits. We need to
2299
+ // support these files as well as having support for 64-bit systems.
2300
+ // So we use the following encoding scheme: for values < 2^32-1, we
2301
+ // store in 4 bytes in big-endian order. For values > 2^32, we
2302
+ // store 0xFFFFFFF followed by 8 bytes in big-endian order. This
2303
+ // causes us to mis-read old-version code that stores exactly
2304
+ // 0xFFFFFFF, but I don't think that is likely to have happened for
2305
+ // these particular values.
2306
+ template <typename OUTPUT, typename IntType>
2307
+ static bool write_32_or_64(OUTPUT* fp, IntType value)
2308
+ {
2309
+ if (value < 0xFFFFFFFFULL) // fits in 4 bytes
2310
+ {
2311
+ if (!sparsehash_internal::write_bigendian_number(fp, value, 4))
2312
+ return false;
2313
+ }
2314
+ else
2315
+ {
2316
+ if (!sparsehash_internal::write_bigendian_number(fp, 0xFFFFFFFFUL, 4))
2317
+ return false;
2318
+ if (!sparsehash_internal::write_bigendian_number(fp, value, 8))
2319
+ return false;
2320
+ }
2321
+ return true;
2322
+ }
2323
+
2324
+ template <typename INPUT, typename IntType>
2325
+ static bool read_32_or_64(INPUT* fp, IntType *value)
2326
+ {
2327
+ // reads into value
2328
+ MagicNumberType first4 = 0; // a convenient 32-bit unsigned type
2329
+ if (!sparsehash_internal::read_bigendian_number(fp, &first4, 4))
2330
+ return false;
2331
+
2332
+ if (first4 < 0xFFFFFFFFULL)
2333
+ {
2334
+ *value = first4;
2335
+ }
2336
+ else
2337
+ {
2338
+ if (!sparsehash_internal::read_bigendian_number(fp, value, 8))
2339
+ return false;
2340
+ }
2341
+ return true;
2342
+ }
2343
+
2344
+ public:
2345
+ // read/write_metadata() and read_write/nopointer_data() are DEPRECATED.
2346
+ // Use serialize() and unserialize(), below, for new code.
2347
+
2348
+ template <typename OUTPUT>
2349
+ bool write_metadata(OUTPUT *fp) const
2350
+ {
2351
+ if (!write_32_or_64(fp, MAGIC_NUMBER)) return false;
2352
+ if (!write_32_or_64(fp, _table_size)) return false;
2353
+ if (!write_32_or_64(fp, _num_buckets)) return false;
2354
+
2355
+ for (const group_type *group = _first_group; group != _last_group; ++group)
2356
+ if (group->write_metadata(fp) == false)
2357
+ return false;
2358
+ return true;
2359
+ }
2360
+
2361
+ // Reading destroys the old table contents! Returns true if read ok.
2362
+ template <typename INPUT>
2363
+ bool read_metadata(INPUT *fp)
2364
+ {
2365
+ size_type magic_read = 0;
2366
+ if (!read_32_or_64(fp, &magic_read)) return false;
2367
+ if (magic_read != MAGIC_NUMBER)
2368
+ {
2369
+ clear(); // just to be consistent
2370
+ return false;
2371
+ }
2372
+
2373
+ if (!read_32_or_64(fp, &_table_size)) return false;
2374
+ if (!read_32_or_64(fp, &_num_buckets)) return false;
2375
+
2376
+ resize(_table_size); // so the vector's sized ok
2377
+ for (group_type *group = _first_group; group != _last_group; ++group)
2378
+ if (group->read_metadata(_alloc, fp) == false)
2379
+ return false;
2380
+ return true;
2381
+ }
2382
+
2383
+ // This code is identical to that for SparseGroup
2384
+ // If your keys and values are simple enough, we can write them
2385
+ // to disk for you. "simple enough" means no pointers.
2386
+ // However, we don't try to normalize endianness
2387
+ bool write_nopointer_data(FILE *fp) const
2388
+ {
2389
+ for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
2390
+ if (!fwrite(&*it, sizeof(*it), 1, fp))
2391
+ return false;
2392
+ return true;
2393
+ }
2394
+
2395
+ // When reading, we have to override the potential const-ness of *it
2396
+ bool read_nopointer_data(FILE *fp)
2397
+ {
2398
+ for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
2399
+ if (!fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp))
2400
+ return false;
2401
+ return true;
2402
+ }
2403
+
2404
+ // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
2405
+ // (istream, ostream, etc) *or* a class providing
2406
+ // Read(void*, size_t) and Write(const void*, size_t)
2407
+ // (respectively), which writes a buffer into a stream
2408
+ // (which the INPUT/OUTPUT instance presumably owns).
2409
+
2410
+ typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
2411
+
2412
+ // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
2413
+ template <typename ValueSerializer, typename OUTPUT>
2414
+ bool serialize(ValueSerializer serializer, OUTPUT *fp)
2415
+ {
2416
+ if (!write_metadata(fp))
2417
+ return false;
2418
+ for (const_ne_iterator it = ne_begin(); it != ne_end(); ++it)
2419
+ if (!serializer(fp, *it))
2420
+ return false;
2421
+ return true;
2422
+ }
2423
+
2424
+ // ValueSerializer: a functor. operator()(INPUT*, value_type*)
2425
+ template <typename ValueSerializer, typename INPUT>
2426
+ bool unserialize(ValueSerializer serializer, INPUT *fp)
2427
+ {
2428
+ clear();
2429
+ if (!read_metadata(fp))
2430
+ return false;
2431
+ for (ne_iterator it = ne_begin(); it != ne_end(); ++it)
2432
+ if (!serializer(fp, &*it))
2433
+ return false;
2434
+ return true;
2435
+ }
2436
+
2437
+ // Comparisons. Note the comparisons are pretty arbitrary: we
2438
+ // compare values of the first index that isn't equal (using default
2439
+ // value for empty buckets).
2440
+ bool operator==(const sparsetable& x) const
2441
+ {
2442
+ return (_table_size == x._table_size &&
2443
+ _num_buckets == x._num_buckets &&
2444
+ _first_group == x._first_group);
2445
+ }
2446
+
2447
+ bool operator<(const sparsetable& x) const
2448
+ {
2449
+ return std::lexicographical_compare(begin(), end(), x.begin(), x.end());
2450
+ }
2451
+ bool operator!=(const sparsetable& x) const { return !(*this == x); }
2452
+ bool operator<=(const sparsetable& x) const { return !(x < *this); }
2453
+ bool operator>(const sparsetable& x) const { return x < *this; }
2454
+ bool operator>=(const sparsetable& x) const { return !(*this < x); }
2455
+
2456
+
2457
+ private:
2458
+ // The actual data
2459
+ // ---------------
2460
+ group_type * _first_group;
2461
+ group_type * _last_group;
2462
+ size_type _table_size; // how many buckets they want
2463
+ size_type _num_buckets; // number of non-empty buckets
2464
+ group_alloc_type _group_alloc;
2465
+ allocator_type _alloc;
2466
+ };
2467
+
2468
+ // ----------------------------------------------------------------------
2469
+ // S P A R S E _ H A S H T A B L E
2470
+ // ----------------------------------------------------------------------
2471
+ // Hashtable class, used to implement the hashed associative containers
2472
+ // hash_set and hash_map.
2473
+ //
2474
+ // Value: what is stored in the table (each bucket is a Value).
2475
+ // Key: something in a 1-to-1 correspondence to a Value, that can be used
2476
+ // to search for a Value in the table (find() takes a Key).
2477
+ // HashFcn: Takes a Key and returns an integer, the more unique the better.
2478
+ // ExtractKey: given a Value, returns the unique Key associated with it.
2479
+ // Must inherit from unary_function, or at least have a
2480
+ // result_type enum indicating the return type of operator().
2481
+ // EqualKey: Given two Keys, says whether they are the same (that is,
2482
+ // if they are both associated with the same Value).
2483
+ // Alloc: STL allocator to use to allocate memory.
2484
+ //
2485
+ // ----------------------------------------------------------------------
2486
+
2487
+ // The probing method
2488
+ // ------------------
2489
+ // Linear probing
2490
+ // #define JUMP_(key, num_probes) ( 1 )
2491
+ // Quadratic probing
2492
+ #define JUMP_(key, num_probes) ( num_probes )
2493
+
2494
+
2495
+ // -------------------------------------------------------------------
2496
+ // -------------------------------------------------------------------
2497
+ template <class Value, class Key, class HashFcn,
2498
+ class ExtractKey, class SetKey, class EqualKey, class Alloc>
2499
+ class sparse_hashtable
2500
+ {
2501
+ public:
2502
+ typedef Key key_type;
2503
+ typedef Value value_type;
2504
+ typedef HashFcn hasher; // user provided or spp_hash<Key>
2505
+ typedef EqualKey key_equal;
2506
+ typedef Alloc allocator_type;
2507
+
2508
+ typedef typename allocator_type::size_type size_type;
2509
+ typedef typename allocator_type::difference_type difference_type;
2510
+ typedef value_type& reference;
2511
+ typedef const value_type& const_reference;
2512
+ typedef value_type* pointer;
2513
+ typedef const value_type* const_pointer;
2514
+
2515
+ // Table is the main storage class.
2516
+ typedef sparsetable<value_type, allocator_type> Table;
2517
+ typedef typename Table::ne_iterator ne_it;
2518
+ typedef typename Table::const_ne_iterator cne_it;
2519
+ typedef typename Table::destructive_iterator dest_it;
2520
+ typedef typename Table::ColIterator ColIterator;
2521
+
2522
+ typedef ne_it iterator;
2523
+ typedef cne_it const_iterator;
2524
+ typedef dest_it destructive_iterator;
2525
+
2526
+ // These come from tr1. For us they're the same as regular iterators.
2527
+ // -------------------------------------------------------------------
2528
+ typedef iterator local_iterator;
2529
+ typedef const_iterator const_local_iterator;
2530
+
2531
+ // How full we let the table get before we resize
2532
+ // ----------------------------------------------
2533
+ static const int HT_OCCUPANCY_PCT; // = 80 (out of 100);
2534
+
2535
+ // How empty we let the table get before we resize lower, by default.
2536
+ // (0.0 means never resize lower.)
2537
+ // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
2538
+ // ------------------------------------------------------------------
2539
+ static const int HT_EMPTY_PCT; // = 0.4 * HT_OCCUPANCY_PCT;
2540
+
2541
+ // Minimum size we're willing to let hashtables be.
2542
+ // Must be a power of two, and at least 4.
2543
+ // Note, however, that for a given hashtable, the initial size is a
2544
+ // function of the first constructor arg, and may be >HT_MIN_BUCKETS.
2545
+ // ------------------------------------------------------------------
2546
+ static const size_type HT_MIN_BUCKETS = 4;
2547
+
2548
+ // By default, if you don't specify a hashtable size at
2549
+ // construction-time, we use this size. Must be a power of two, and
2550
+ // at least HT_MIN_BUCKETS.
2551
+ // -----------------------------------------------------------------
2552
+ static const size_type HT_DEFAULT_STARTING_BUCKETS = 32;
2553
+
2554
+ // iterators
2555
+ // ---------
2556
+ iterator begin() { return _mk_iterator(table.ne_begin()); }
2557
+ iterator end() { return _mk_iterator(table.ne_end()); }
2558
+ const_iterator begin() const { return _mk_const_iterator(table.ne_cbegin()); }
2559
+ const_iterator end() const { return _mk_const_iterator(table.ne_cend()); }
2560
+ const_iterator cbegin() const { return _mk_const_iterator(table.ne_cbegin()); }
2561
+ const_iterator cend() const { return _mk_const_iterator(table.ne_cend()); }
2562
+
2563
+ // These come from tr1 unordered_map. They iterate over 'bucket' n.
2564
+ // For sparsehashtable, we could consider each 'group' to be a bucket,
2565
+ // I guess, but I don't really see the point. We'll just consider
2566
+ // bucket n to be the n-th element of the sparsetable, if it's occupied,
2567
+ // or some empty element, otherwise.
2568
+ // ---------------------------------------------------------------------
2569
+ local_iterator begin(size_type i)
2570
+ {
2571
+ return _mk_iterator(table.test(i) ? table.get_iter(i) : table.ne_end());
2572
+ }
2573
+
2574
+ local_iterator end(size_type i)
2575
+ {
2576
+ local_iterator it = begin(i);
2577
+ if (table.test(i))
2578
+ ++it;
2579
+ return _mk_iterator(it);
2580
+ }
2581
+
2582
+ const_local_iterator begin(size_type i) const
2583
+ {
2584
+ return _mk_const_iterator(table.test(i) ? table.get_iter(i) : table.ne_cend());
2585
+ }
2586
+
2587
+ const_local_iterator end(size_type i) const
2588
+ {
2589
+ const_local_iterator it = begin(i);
2590
+ if (table.test(i))
2591
+ ++it;
2592
+ return _mk_const_iterator(it);
2593
+ }
2594
+
2595
+ const_local_iterator cbegin(size_type i) const { return begin(i); }
2596
+ const_local_iterator cend(size_type i) const { return end(i); }
2597
+
2598
+ // This is used when resizing
2599
+ // --------------------------
2600
+ destructive_iterator destructive_begin() { return _mk_destructive_iterator(table.destructive_begin()); }
2601
+ destructive_iterator destructive_end() { return _mk_destructive_iterator(table.destructive_end()); }
2602
+
2603
+
2604
+ // accessor functions for the things we templatize on, basically
2605
+ // -------------------------------------------------------------
2606
+ hasher hash_funct() const { return settings; }
2607
+ key_equal key_eq() const { return key_info; }
2608
+ allocator_type get_allocator() const { return table.get_allocator(); }
2609
+
2610
+ // Accessor function for statistics gathering.
2611
+ unsigned int num_table_copies() const { return settings.num_ht_copies(); }
2612
+
2613
+ private:
2614
+ // This is used as a tag for the copy constructor, saying to destroy its
2615
+ // arg We have two ways of destructively copying: with potentially growing
2616
+ // the hashtable as we copy, and without. To make sure the outside world
2617
+ // can't do a destructive copy, we make the typename private.
2618
+ // -----------------------------------------------------------------------
2619
+ enum MoveDontCopyT {MoveDontCopy, MoveDontGrow};
2620
+
2621
+ void _squash_deleted()
2622
+ {
2623
+ // gets rid of any deleted entries we have
2624
+ // ---------------------------------------
2625
+ if (num_deleted)
2626
+ {
2627
+ // get rid of deleted before writing
2628
+ sparse_hashtable tmp(MoveDontGrow, *this);
2629
+ swap(tmp); // now we are tmp
2630
+ }
2631
+ assert(num_deleted == 0);
2632
+ }
2633
+
2634
+ // creating iterators from sparsetable::ne_iterators
2635
+ // -------------------------------------------------
2636
+ iterator _mk_iterator(ne_it it) const { return it; }
2637
+ const_iterator _mk_const_iterator(cne_it it) const { return it; }
2638
+ destructive_iterator _mk_destructive_iterator(dest_it it) const { return it; }
2639
+
2640
+ public:
2641
+ size_type size() const { return table.num_nonempty(); }
2642
+ size_type max_size() const { return table.max_size(); }
2643
+ bool empty() const { return size() == 0; }
2644
+ size_type bucket_count() const { return table.size(); }
2645
+ size_type max_bucket_count() const { return max_size(); }
2646
+ // These are tr1 methods. Their idea of 'bucket' doesn't map well to
2647
+ // what we do. We just say every bucket has 0 or 1 items in it.
2648
+ size_type bucket_size(size_type i) const
2649
+ {
2650
+ return (size_type)(begin(i) == end(i) ? 0 : 1);
2651
+ }
2652
+
2653
+ private:
2654
+ // Because of the above, size_type(-1) is never legal; use it for errors
2655
+ // ---------------------------------------------------------------------
2656
+ static const size_type ILLEGAL_BUCKET = size_type(-1);
2657
+
2658
+ // Used after a string of deletes. Returns true if we actually shrunk.
2659
+ // TODO(csilvers): take a delta so we can take into account inserts
2660
+ // done after shrinking. Maybe make part of the Settings class?
2661
+ // --------------------------------------------------------------------
2662
+ bool _maybe_shrink()
2663
+ {
2664
+ assert((bucket_count() & (bucket_count()-1)) == 0); // is a power of two
2665
+ assert(bucket_count() >= HT_MIN_BUCKETS);
2666
+ bool retval = false;
2667
+
2668
+ // If you construct a hashtable with < HT_DEFAULT_STARTING_BUCKETS,
2669
+ // we'll never shrink until you get relatively big, and we'll never
2670
+ // shrink below HT_DEFAULT_STARTING_BUCKETS. Otherwise, something
2671
+ // like "dense_hash_set<int> x; x.insert(4); x.erase(4);" will
2672
+ // shrink us down to HT_MIN_BUCKETS buckets, which is too small.
2673
+ // ---------------------------------------------------------------
2674
+ const size_type num_remain = table.num_nonempty();
2675
+ const size_type shrink_threshold = settings.shrink_threshold();
2676
+ if (shrink_threshold > 0 && num_remain < shrink_threshold &&
2677
+ bucket_count() > HT_DEFAULT_STARTING_BUCKETS)
2678
+ {
2679
+ const float shrink_factor = settings.shrink_factor();
2680
+ size_type sz = (size_type)(bucket_count() / 2); // find how much we should shrink
2681
+ while (sz > HT_DEFAULT_STARTING_BUCKETS &&
2682
+ num_remain < static_cast<size_type>(sz * shrink_factor))
2683
+ {
2684
+ sz /= 2; // stay a power of 2
2685
+ }
2686
+ sparse_hashtable tmp(MoveDontCopy, *this, sz);
2687
+ swap(tmp); // now we are tmp
2688
+ retval = true;
2689
+ }
2690
+ settings.set_consider_shrink(false); // because we just considered it
2691
+ return retval;
2692
+ }
2693
+
2694
+ // We'll let you resize a hashtable -- though this makes us copy all!
2695
+ // When you resize, you say, "make it big enough for this many more elements"
2696
+ // Returns true if we actually resized, false if size was already ok.
2697
+ // --------------------------------------------------------------------------
2698
+ bool _resize_delta(size_type delta)
2699
+ {
2700
+ bool did_resize = false;
2701
+ if (settings.consider_shrink())
2702
+ {
2703
+ // see if lots of deletes happened
2704
+ if (_maybe_shrink())
2705
+ did_resize = true;
2706
+ }
2707
+ if (table.num_nonempty() >=
2708
+ (std::numeric_limits<size_type>::max)() - delta)
2709
+ {
2710
+ throw_exception(std::length_error("resize overflow"));
2711
+ }
2712
+
2713
+ size_type num_occupied = (size_type)(table.num_nonempty() + num_deleted);
2714
+
2715
+ if (bucket_count() >= HT_MIN_BUCKETS &&
2716
+ (num_occupied + delta) <= settings.enlarge_threshold())
2717
+ return did_resize; // we're ok as we are
2718
+
2719
+ // Sometimes, we need to resize just to get rid of all the
2720
+ // "deleted" buckets that are clogging up the hashtable. So when
2721
+ // deciding whether to resize, count the deleted buckets (which
2722
+ // are currently taking up room).
2723
+ // -------------------------------------------------------------
2724
+ const size_type needed_size =
2725
+ settings.min_buckets((size_type)(num_occupied + delta), (size_type)0);
2726
+
2727
+ if (needed_size <= bucket_count()) // we have enough buckets
2728
+ return did_resize;
2729
+
2730
+ size_type resize_to = settings.min_buckets((size_type)(num_occupied + delta), bucket_count());
2731
+
2732
+ if (resize_to < needed_size && // may double resize_to
2733
+ resize_to < (std::numeric_limits<size_type>::max)() / 2)
2734
+ {
2735
+ // This situation means that we have enough deleted elements,
2736
+ // that once we purge them, we won't actually have needed to
2737
+ // grow. But we may want to grow anyway: if we just purge one
2738
+ // element, say, we'll have to grow anyway next time we
2739
+ // insert. Might as well grow now, since we're already going
2740
+ // through the trouble of copying (in order to purge the
2741
+ // deleted elements).
2742
+ const size_type target =
2743
+ static_cast<size_type>(settings.shrink_size((size_type)(resize_to*2)));
2744
+ if (table.num_nonempty() + delta >= target)
2745
+ {
2746
+ // Good, we won't be below the shrink threshhold even if we double.
2747
+ resize_to *= 2;
2748
+ }
2749
+ }
2750
+
2751
+ sparse_hashtable tmp(MoveDontCopy, *this, resize_to);
2752
+ swap(tmp); // now we are tmp
2753
+ return true;
2754
+ }
2755
+
2756
+ // Used to actually do the rehashing when we grow/shrink a hashtable
2757
+ // -----------------------------------------------------------------
2758
+ void _copy_from(const sparse_hashtable &ht, size_type min_buckets_wanted)
2759
+ {
2760
+ clear(); // clear table, set num_deleted to 0
2761
+
2762
+ // If we need to change the size of our table, do it now
2763
+ const size_type resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
2764
+
2765
+ if (resize_to > bucket_count())
2766
+ {
2767
+ // we don't have enough buckets
2768
+ table.resize(resize_to); // sets the number of buckets
2769
+ settings.reset_thresholds(bucket_count());
2770
+ }
2771
+
2772
+ // We use a normal iterator to get bcks from ht
2773
+ // We could use insert() here, but since we know there are
2774
+ // no duplicates, we can be more efficient
2775
+ assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
2776
+ for (const_iterator it = ht.begin(); it != ht.end(); ++it)
2777
+ {
2778
+ size_type num_probes = 0; // how many times we've probed
2779
+ size_type bucknum;
2780
+ const size_type bucket_count_minus_one = bucket_count() - 1;
2781
+ for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
2782
+ table.test(bucknum); // table.test() OK since no erase()
2783
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one)
2784
+ {
2785
+ ++num_probes;
2786
+ assert(num_probes < bucket_count()
2787
+ && "Hashtable is full: an error in key_equal<> or hash<>");
2788
+ }
2789
+ table.set(bucknum, *it); // copies the value to here
2790
+ }
2791
+ settings.inc_num_ht_copies();
2792
+ }
2793
+
2794
+ // Implementation is like _copy_from, but it destroys the table of the
2795
+ // "from" guy by freeing sparsetable memory as we iterate. This is
2796
+ // useful in resizing, since we're throwing away the "from" guy anyway.
2797
+ // --------------------------------------------------------------------
2798
+ void _move_from(MoveDontCopyT mover, sparse_hashtable &ht,
2799
+ size_type min_buckets_wanted)
2800
+ {
2801
+ clear();
2802
+
2803
+ // If we need to change the size of our table, do it now
2804
+ size_type resize_to;
2805
+ if (mover == MoveDontGrow)
2806
+ resize_to = ht.bucket_count(); // keep same size as old ht
2807
+ else // MoveDontCopy
2808
+ resize_to = settings.min_buckets(ht.size(), min_buckets_wanted);
2809
+ if (resize_to > bucket_count())
2810
+ {
2811
+ // we don't have enough buckets
2812
+ table.resize(resize_to); // sets the number of buckets
2813
+ settings.reset_thresholds(bucket_count());
2814
+ }
2815
+
2816
+ // We use a normal iterator to get bcks from ht
2817
+ // We could use insert() here, but since we know there are
2818
+ // no duplicates, we can be more efficient
2819
+ assert((bucket_count() & (bucket_count()-1)) == 0); // a power of two
2820
+ const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
2821
+
2822
+ // THIS IS THE MAJOR LINE THAT DIFFERS FROM COPY_FROM():
2823
+ for (destructive_iterator it = ht.destructive_begin();
2824
+ it != ht.destructive_end(); ++it)
2825
+ {
2826
+ size_type num_probes = 0;
2827
+ size_type bucknum;
2828
+ for (bucknum = hash(get_key(*it)) & bucket_count_minus_one;
2829
+ table.test(bucknum); // table.test() OK since no erase()
2830
+ bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & (bucket_count()-1)))
2831
+ {
2832
+ ++num_probes;
2833
+ assert(num_probes < bucket_count()
2834
+ && "Hashtable is full: an error in key_equal<> or hash<>");
2835
+ }
2836
+ table.move(bucknum, *it); // moves the value to here
2837
+ }
2838
+ settings.inc_num_ht_copies();
2839
+ }
2840
+
2841
+
2842
+ // Required by the spec for hashed associative container
2843
+ public:
2844
+ // Though the docs say this should be num_buckets, I think it's much
2845
+ // more useful as num_elements. As a special feature, calling with
2846
+ // req_elements==0 will cause us to shrink if we can, saving space.
2847
+ // -----------------------------------------------------------------
2848
+ void resize(size_type req_elements)
2849
+ {
2850
+ // resize to this or larger
2851
+ if (settings.consider_shrink() || req_elements == 0)
2852
+ _maybe_shrink();
2853
+ if (req_elements > table.num_nonempty()) // we only grow
2854
+ _resize_delta((size_type)(req_elements - table.num_nonempty()));
2855
+ }
2856
+
2857
+ // Get and change the value of shrink_factor and enlarge_factor. The
2858
+ // description at the beginning of this file explains how to choose
2859
+ // the values. Setting the shrink parameter to 0.0 ensures that the
2860
+ // table never shrinks.
2861
+ // ------------------------------------------------------------------
2862
+ void get_resizing_parameters(float* shrink, float* grow) const
2863
+ {
2864
+ *shrink = settings.shrink_factor();
2865
+ *grow = settings.enlarge_factor();
2866
+ }
2867
+
2868
+ float get_shrink_factor() const { return settings.shrink_factor(); }
2869
+ float get_enlarge_factor() const { return settings.enlarge_factor(); }
2870
+
2871
+ void set_resizing_parameters(float shrink, float grow)
2872
+ {
2873
+ settings.set_resizing_parameters(shrink, grow);
2874
+ settings.reset_thresholds(bucket_count());
2875
+ }
2876
+
2877
+ void set_shrink_factor(float shrink)
2878
+ {
2879
+ set_resizing_parameters(shrink, get_enlarge_factor());
2880
+ }
2881
+
2882
+ void set_enlarge_factor(float grow)
2883
+ {
2884
+ set_resizing_parameters(get_shrink_factor(), grow);
2885
+ }
2886
+
2887
+ // CONSTRUCTORS -- as required by the specs, we take a size,
2888
+ // but also let you specify a hashfunction, key comparator,
2889
+ // and key extractor. We also define a copy constructor and =.
2890
+ // DESTRUCTOR -- the default is fine, surprisingly.
2891
+ // ------------------------------------------------------------
2892
+ explicit sparse_hashtable(size_type expected_max_items_in_table = 0,
2893
+ const HashFcn& hf = HashFcn(),
2894
+ const EqualKey& eql = EqualKey(),
2895
+ const ExtractKey& ext = ExtractKey(),
2896
+ const SetKey& set = SetKey(),
2897
+ const allocator_type& alloc = allocator_type())
2898
+ : settings(hf),
2899
+ key_info(ext, set, eql),
2900
+ num_deleted(0),
2901
+ table((expected_max_items_in_table == 0
2902
+ ? HT_DEFAULT_STARTING_BUCKETS
2903
+ : settings.min_buckets(expected_max_items_in_table, 0)),
2904
+ alloc)
2905
+ {
2906
+ settings.reset_thresholds(bucket_count());
2907
+ }
2908
+
2909
+ // As a convenience for resize(), we allow an optional second argument
2910
+ // which lets you make this new hashtable a different size than ht.
2911
+ // We also provide a mechanism of saying you want to "move" the ht argument
2912
+ // into us instead of copying.
2913
+ // ------------------------------------------------------------------------
2914
+ sparse_hashtable(const sparse_hashtable& ht,
2915
+ size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
2916
+ : settings(ht.settings),
2917
+ key_info(ht.key_info),
2918
+ num_deleted(0),
2919
+ table(0)
2920
+ {
2921
+ settings.reset_thresholds(bucket_count());
2922
+ _copy_from(ht, min_buckets_wanted);
2923
+ }
2924
+
2925
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
2926
+
2927
+ sparse_hashtable(sparse_hashtable&& o) :
2928
+ settings(std::move(o.settings)),
2929
+ key_info(std::move(o.key_info)),
2930
+ num_deleted(o.num_deleted),
2931
+ table(std::move(o.table))
2932
+ {
2933
+ }
2934
+
2935
+ sparse_hashtable(sparse_hashtable&& o, const allocator_type& alloc) :
2936
+ settings(std::move(o.settings)),
2937
+ key_info(std::move(o.key_info)),
2938
+ num_deleted(o.num_deleted),
2939
+ table(std::move(o.table), alloc)
2940
+ {
2941
+ }
2942
+
2943
+ sparse_hashtable& operator=(sparse_hashtable&& o)
2944
+ {
2945
+ using std::swap;
2946
+
2947
+ sparse_hashtable tmp(std::move(o));
2948
+ swap(tmp, *this);
2949
+ return *this;
2950
+ }
2951
+ #endif
2952
+
2953
+ sparse_hashtable(MoveDontCopyT mover,
2954
+ sparse_hashtable& ht,
2955
+ size_type min_buckets_wanted = HT_DEFAULT_STARTING_BUCKETS)
2956
+ : settings(ht.settings),
2957
+ key_info(ht.key_info),
2958
+ num_deleted(0),
2959
+ table(min_buckets_wanted, ht.table.get_allocator())
2960
+ //table(min_buckets_wanted)
2961
+ {
2962
+ settings.reset_thresholds(bucket_count());
2963
+ _move_from(mover, ht, min_buckets_wanted);
2964
+ }
2965
+
2966
+ sparse_hashtable& operator=(const sparse_hashtable& ht)
2967
+ {
2968
+ if (&ht == this)
2969
+ return *this; // don't copy onto ourselves
2970
+ settings = ht.settings;
2971
+ key_info = ht.key_info;
2972
+ num_deleted = ht.num_deleted;
2973
+
2974
+ // _copy_from() calls clear and sets num_deleted to 0 too
2975
+ _copy_from(ht, HT_MIN_BUCKETS);
2976
+
2977
+ // we purposefully don't copy the allocator, which may not be copyable
2978
+ return *this;
2979
+ }
2980
+
2981
+ // Many STL algorithms use swap instead of copy constructors
2982
+ void swap(sparse_hashtable& ht)
2983
+ {
2984
+ using std::swap;
2985
+
2986
+ swap(settings, ht.settings);
2987
+ swap(key_info, ht.key_info);
2988
+ swap(num_deleted, ht.num_deleted);
2989
+ table.swap(ht.table);
2990
+ settings.reset_thresholds(bucket_count()); // also resets consider_shrink
2991
+ ht.settings.reset_thresholds(ht.bucket_count());
2992
+ // we purposefully don't swap the allocator, which may not be swap-able
2993
+ }
2994
+
2995
+ // It's always nice to be able to clear a table without deallocating it
2996
+ void clear()
2997
+ {
2998
+ if (!empty() || num_deleted != 0)
2999
+ {
3000
+ table.clear();
3001
+ table = Table(HT_DEFAULT_STARTING_BUCKETS);
3002
+ }
3003
+ settings.reset_thresholds(bucket_count());
3004
+ num_deleted = 0;
3005
+ }
3006
+
3007
+ // LOOKUP ROUTINES
3008
+ private:
3009
+
3010
+ enum pos_type { pt_empty = 0, pt_erased, pt_full };
3011
+ // -------------------------------------------------------------------
3012
+ class Position
3013
+ {
3014
+ public:
3015
+
3016
+ Position() : _t(pt_empty) {}
3017
+ Position(pos_type t, size_type idx) : _t(t), _idx(idx) {}
3018
+
3019
+ pos_type _t;
3020
+ size_type _idx;
3021
+ };
3022
+
3023
+ // Returns a pair:
3024
+ // - 'first' is a code, 2 if key already present, 0 or 1 otherwise.
3025
+ // - 'second' is a position, where the key should go
3026
+ // Note: because of deletions where-to-insert is not trivial: it's the
3027
+ // first deleted bucket we see, as long as we don't find the key later
3028
+ // -------------------------------------------------------------------
3029
+ Position _find_position(const key_type &key) const
3030
+ {
3031
+ size_type num_probes = 0; // how many times we've probed
3032
+ const size_type bucket_count_minus_one = (const size_type)(bucket_count() - 1);
3033
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3034
+ Position pos;
3035
+
3036
+ while (1)
3037
+ {
3038
+ // probe until something happens
3039
+ // -----------------------------
3040
+ typename Table::GrpPos grp_pos(table, bucknum);
3041
+
3042
+ if (!grp_pos.test_strict())
3043
+ {
3044
+ // bucket is empty => key not present
3045
+ return pos._t ? pos : Position(pt_empty, bucknum);
3046
+ }
3047
+ else if (grp_pos.test())
3048
+ {
3049
+ reference ref(grp_pos.unsafe_get());
3050
+
3051
+ if (equals(key, get_key(ref)))
3052
+ return Position(pt_full, bucknum);
3053
+ }
3054
+ else if (pos._t == pt_empty)
3055
+ {
3056
+ // first erased position
3057
+ pos._t = pt_erased;
3058
+ pos._idx = bucknum;
3059
+ }
3060
+
3061
+ ++num_probes; // we're doing another probe
3062
+ bucknum = (size_type)((bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one);
3063
+ assert(num_probes < bucket_count()
3064
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3065
+ }
3066
+ }
3067
+
3068
+ public:
3069
+ // I hate to duplicate find() like that, but it is
3070
+ // significantly faster to not have the intermediate pair
3071
+ // ------------------------------------------------------------------
3072
+ iterator find(const key_type& key)
3073
+ {
3074
+ size_type num_probes = 0; // how many times we've probed
3075
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3076
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3077
+
3078
+ while (1) // probe until something happens
3079
+ {
3080
+ typename Table::GrpPos grp_pos(table, bucknum);
3081
+
3082
+ if (!grp_pos.test_strict())
3083
+ return end(); // bucket is empty
3084
+ if (grp_pos.test())
3085
+ {
3086
+ reference ref(grp_pos.unsafe_get());
3087
+
3088
+ if (equals(key, get_key(ref)))
3089
+ return grp_pos.get_iter(ref);
3090
+ }
3091
+ ++num_probes; // we're doing another probe
3092
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3093
+ assert(num_probes < bucket_count()
3094
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3095
+ }
3096
+ }
3097
+
3098
+ // Wish I could avoid the duplicate find() const and non-const.
3099
+ // ------------------------------------------------------------
3100
+ const_iterator find(const key_type& key) const
3101
+ {
3102
+ size_type num_probes = 0; // how many times we've probed
3103
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3104
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3105
+
3106
+ while (1) // probe until something happens
3107
+ {
3108
+ typename Table::GrpPos grp_pos(table, bucknum);
3109
+
3110
+ if (!grp_pos.test_strict())
3111
+ return end(); // bucket is empty
3112
+ else if (grp_pos.test())
3113
+ {
3114
+ reference ref(grp_pos.unsafe_get());
3115
+
3116
+ if (equals(key, get_key(ref)))
3117
+ return _mk_const_iterator(table.get_iter(bucknum, &ref));
3118
+ }
3119
+ ++num_probes; // we're doing another probe
3120
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3121
+ assert(num_probes < bucket_count()
3122
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3123
+ }
3124
+ }
3125
+
3126
+ // This is a tr1 method: the bucket a given key is in, or what bucket
3127
+ // it would be put in, if it were to be inserted. Shrug.
3128
+ // ------------------------------------------------------------------
3129
+ size_type bucket(const key_type& key) const
3130
+ {
3131
+ Position pos = _find_position(key);
3132
+ return pos._idx;
3133
+ }
3134
+
3135
+ // Counts how many elements have key key. For maps, it's either 0 or 1.
3136
+ // ---------------------------------------------------------------------
3137
+ size_type count(const key_type &key) const
3138
+ {
3139
+ Position pos = _find_position(key);
3140
+ return (size_type)(pos._t == pt_full ? 1 : 0);
3141
+ }
3142
+
3143
+ // Likewise, equal_range doesn't really make sense for us. Oh well.
3144
+ // -----------------------------------------------------------------
3145
+ std::pair<iterator,iterator> equal_range(const key_type& key)
3146
+ {
3147
+ iterator pos = find(key); // either an iterator or end
3148
+ if (pos == end())
3149
+ return std::pair<iterator,iterator>(pos, pos);
3150
+ else
3151
+ {
3152
+ const iterator startpos = pos++;
3153
+ return std::pair<iterator,iterator>(startpos, pos);
3154
+ }
3155
+ }
3156
+
3157
+ std::pair<const_iterator,const_iterator> equal_range(const key_type& key) const
3158
+ {
3159
+ const_iterator pos = find(key); // either an iterator or end
3160
+ if (pos == end())
3161
+ return std::pair<const_iterator,const_iterator>(pos, pos);
3162
+ else
3163
+ {
3164
+ const const_iterator startpos = pos++;
3165
+ return std::pair<const_iterator,const_iterator>(startpos, pos);
3166
+ }
3167
+ }
3168
+
3169
+
3170
+ // INSERTION ROUTINES
3171
+ private:
3172
+ // Private method used by insert_noresize and find_or_insert.
3173
+ template <class T>
3174
+ reference _insert_at(T& obj, size_type pos, bool erased)
3175
+ {
3176
+ if (size() >= max_size())
3177
+ {
3178
+ throw_exception(std::length_error("insert overflow"));
3179
+ }
3180
+ if (erased)
3181
+ {
3182
+ assert(num_deleted);
3183
+ --num_deleted;
3184
+ }
3185
+ return table.set(pos, obj);
3186
+ }
3187
+
3188
+ // If you know *this is big enough to hold obj, use this routine
3189
+ template <class T>
3190
+ std::pair<iterator, bool> _insert_noresize(T& obj)
3191
+ {
3192
+ Position pos = _find_position(get_key(obj));
3193
+ bool already_there = (pos._t == pt_full);
3194
+
3195
+ if (!already_there)
3196
+ {
3197
+ reference ref(_insert_at(obj, pos._idx, pos._t == pt_erased));
3198
+ return std::pair<iterator, bool>(_mk_iterator(table.get_iter(pos._idx, &ref)), true);
3199
+ }
3200
+ return std::pair<iterator,bool>(_mk_iterator(table.get_iter(pos._idx)), false);
3201
+ }
3202
+
3203
+ // Specializations of insert(it, it) depending on the power of the iterator:
3204
+ // (1) Iterator supports operator-, resize before inserting
3205
+ template <class ForwardIterator>
3206
+ void _insert(ForwardIterator f, ForwardIterator l, std::forward_iterator_tag /*unused*/)
3207
+ {
3208
+ int64_t dist = std::distance(f, l);
3209
+ if (dist < 0 || static_cast<size_t>(dist) >= (std::numeric_limits<size_type>::max)())
3210
+ throw_exception(std::length_error("insert-range overflow"));
3211
+
3212
+ _resize_delta(static_cast<size_type>(dist));
3213
+
3214
+ for (; dist > 0; --dist, ++f)
3215
+ _insert_noresize(*f);
3216
+ }
3217
+
3218
+ // (2) Arbitrary iterator, can't tell how much to resize
3219
+ template <class InputIterator>
3220
+ void _insert(InputIterator f, InputIterator l, std::input_iterator_tag /*unused*/)
3221
+ {
3222
+ for (; f != l; ++f)
3223
+ _insert(*f);
3224
+ }
3225
+
3226
+ public:
3227
+
3228
+ #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
3229
+ template <class... Args>
3230
+ std::pair<iterator, bool> emplace(Args&&... args)
3231
+ {
3232
+ _resize_delta(1);
3233
+ value_type obj(std::forward<Args>(args)...);
3234
+ return _insert_noresize(obj);
3235
+ }
3236
+ #endif
3237
+
3238
+ // This is the normal insert routine, used by the outside world
3239
+ std::pair<iterator, bool> insert(const_reference obj)
3240
+ {
3241
+ _resize_delta(1); // adding an object, grow if need be
3242
+ return _insert_noresize(obj);
3243
+ }
3244
+
3245
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
3246
+ template< class P >
3247
+ std::pair<iterator, bool> insert(P &&obj)
3248
+ {
3249
+ _resize_delta(1); // adding an object, grow if need be
3250
+ value_type val(std::forward<value_type>(obj));
3251
+ return _insert_noresize(val);
3252
+ }
3253
+ #endif
3254
+
3255
+ // When inserting a lot at a time, we specialize on the type of iterator
3256
+ template <class InputIterator>
3257
+ void insert(InputIterator f, InputIterator l)
3258
+ {
3259
+ // specializes on iterator type
3260
+ _insert(f, l,
3261
+ typename std::iterator_traits<InputIterator>::iterator_category());
3262
+ }
3263
+
3264
+ // DefaultValue is a functor that takes a key and returns a value_type
3265
+ // representing the default value to be inserted if none is found.
3266
+ template <class DefaultValue>
3267
+ value_type& find_or_insert(const key_type& key)
3268
+ {
3269
+ size_type num_probes = 0; // how many times we've probed
3270
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3271
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3272
+ DefaultValue default_value;
3273
+ size_type erased_pos = 0;
3274
+ bool erased = false;
3275
+
3276
+ while (1) // probe until something happens
3277
+ {
3278
+ typename Table::GrpPos grp_pos(table, bucknum);
3279
+
3280
+ if (!grp_pos.test_strict())
3281
+ {
3282
+ // not found
3283
+ if (_resize_delta(1))
3284
+ {
3285
+ // needed to rehash to make room
3286
+ // Since we resized, we can't use pos, so recalculate where to insert.
3287
+ value_type def(default_value(key));
3288
+ return *(_insert_noresize(def).first);
3289
+ }
3290
+ else
3291
+ {
3292
+ // no need to rehash, insert right here
3293
+ value_type def(default_value(key));
3294
+ return _insert_at(def, erased ? erased_pos : bucknum, erased);
3295
+ }
3296
+ }
3297
+ if (grp_pos.test())
3298
+ {
3299
+ reference ref(grp_pos.unsafe_get());
3300
+
3301
+ if (equals(key, get_key(ref)))
3302
+ return ref;
3303
+ }
3304
+ else if (!erased)
3305
+ {
3306
+ // first erased position
3307
+ erased_pos = bucknum;
3308
+ erased = true;
3309
+ }
3310
+
3311
+ ++num_probes; // we're doing another probe
3312
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3313
+ assert(num_probes < bucket_count()
3314
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3315
+ }
3316
+ }
3317
+
3318
+ size_type erase(const key_type& key)
3319
+ {
3320
+ size_type num_probes = 0; // how many times we've probed
3321
+ const size_type bucket_count_minus_one = bucket_count() - 1;
3322
+ size_type bucknum = hash(key) & bucket_count_minus_one;
3323
+
3324
+ while (1) // probe until something happens
3325
+ {
3326
+ typename Table::GrpPos grp_pos(table, bucknum);
3327
+
3328
+ if (!grp_pos.test_strict())
3329
+ return 0; // bucket is empty, we deleted nothing
3330
+ if (grp_pos.test())
3331
+ {
3332
+ reference ref(grp_pos.unsafe_get());
3333
+
3334
+ if (equals(key, get_key(ref)))
3335
+ {
3336
+ grp_pos.erase(table);
3337
+ ++num_deleted;
3338
+ settings.set_consider_shrink(true); // will think about shrink after next insert
3339
+ return 1; // because we deleted one thing
3340
+ }
3341
+ }
3342
+ ++num_probes; // we're doing another probe
3343
+ bucknum = (bucknum + JUMP_(key, num_probes)) & bucket_count_minus_one;
3344
+ assert(num_probes < bucket_count()
3345
+ && "Hashtable is full: an error in key_equal<> or hash<>");
3346
+ }
3347
+ }
3348
+
3349
+ const_iterator erase(const_iterator pos)
3350
+ {
3351
+ if (pos == cend())
3352
+ return cend(); // sanity check
3353
+
3354
+ const_iterator nextpos = table.erase(pos);
3355
+ ++num_deleted;
3356
+ settings.set_consider_shrink(true);
3357
+ return nextpos;
3358
+ }
3359
+
3360
+ const_iterator erase(const_iterator f, const_iterator l)
3361
+ {
3362
+ if (f == cend())
3363
+ return cend(); // sanity check
3364
+
3365
+ size_type num_before = table.num_nonempty();
3366
+ const_iterator nextpos = table.erase(f, l);
3367
+ num_deleted += num_before - table.num_nonempty();
3368
+ settings.set_consider_shrink(true);
3369
+ return nextpos;
3370
+ }
3371
+
3372
+ // Deleted key routines - just to keep google test framework happy
3373
+ // we don't actually use the deleted key
3374
+ // ---------------------------------------------------------------
3375
+ void set_deleted_key(const key_type&)
3376
+ {
3377
+ }
3378
+
3379
+ void clear_deleted_key()
3380
+ {
3381
+ }
3382
+
3383
+ bool operator==(const sparse_hashtable& ht) const
3384
+ {
3385
+ if (this == &ht)
3386
+ return true;
3387
+
3388
+ if (size() != ht.size())
3389
+ return false;
3390
+
3391
+ for (const_iterator it = begin(); it != end(); ++it)
3392
+ {
3393
+ const_iterator it2 = ht.find(get_key(*it));
3394
+ if ((it2 == ht.end()) || (*it != *it2))
3395
+ return false;
3396
+ }
3397
+
3398
+ return true;
3399
+ }
3400
+
3401
+ bool operator!=(const sparse_hashtable& ht) const
3402
+ {
3403
+ return !(*this == ht);
3404
+ }
3405
+
3406
+
3407
+ // I/O
3408
+ // We support reading and writing hashtables to disk. NOTE that
3409
+ // this only stores the hashtable metadata, not the stuff you've
3410
+ // actually put in the hashtable! Alas, since I don't know how to
3411
+ // write a hasher or key_equal, you have to make sure everything
3412
+ // but the table is the same. We compact before writing.
3413
+ //
3414
+ // The OUTPUT type needs to support a Write() operation. File and
3415
+ // OutputBuffer are appropriate types to pass in.
3416
+ //
3417
+ // The INPUT type needs to support a Read() operation. File and
3418
+ // InputBuffer are appropriate types to pass in.
3419
+ // -------------------------------------------------------------
3420
+ template <typename OUTPUT>
3421
+ bool write_metadata(OUTPUT *fp)
3422
+ {
3423
+ return table.write_metadata(fp);
3424
+ }
3425
+
3426
+ template <typename INPUT>
3427
+ bool read_metadata(INPUT *fp)
3428
+ {
3429
+ num_deleted = 0; // since we got rid before writing
3430
+ const bool result = table.read_metadata(fp);
3431
+ settings.reset_thresholds(bucket_count());
3432
+ return result;
3433
+ }
3434
+
3435
+ // Only meaningful if value_type is a POD.
3436
+ template <typename OUTPUT>
3437
+ bool write_nopointer_data(OUTPUT *fp)
3438
+ {
3439
+ return table.write_nopointer_data(fp);
3440
+ }
3441
+
3442
+ // Only meaningful if value_type is a POD.
3443
+ template <typename INPUT>
3444
+ bool read_nopointer_data(INPUT *fp)
3445
+ {
3446
+ return table.read_nopointer_data(fp);
3447
+ }
3448
+
3449
+ // INPUT and OUTPUT must be either a FILE, *or* a C++ stream
3450
+ // (istream, ostream, etc) *or* a class providing
3451
+ // Read(void*, size_t) and Write(const void*, size_t)
3452
+ // (respectively), which writes a buffer into a stream
3453
+ // (which the INPUT/OUTPUT instance presumably owns).
3454
+
3455
+ typedef sparsehash_internal::pod_serializer<value_type> NopointerSerializer;
3456
+
3457
+ // ValueSerializer: a functor. operator()(OUTPUT*, const value_type&)
3458
+ template <typename ValueSerializer, typename OUTPUT>
3459
+ bool serialize(ValueSerializer serializer, OUTPUT *fp)
3460
+ {
3461
+ return table.serialize(serializer, fp);
3462
+ }
3463
+
3464
+ // ValueSerializer: a functor. operator()(INPUT*, value_type*)
3465
+ template <typename ValueSerializer, typename INPUT>
3466
+ bool unserialize(ValueSerializer serializer, INPUT *fp)
3467
+ {
3468
+ num_deleted = 0; // since we got rid before writing
3469
+ const bool result = table.unserialize(serializer, fp);
3470
+ settings.reset_thresholds(bucket_count());
3471
+ return result;
3472
+ }
3473
+
3474
+ private:
3475
+
3476
+ // Package templated functors with the other types to eliminate memory
3477
+ // needed for storing these zero-size operators. Since ExtractKey and
3478
+ // hasher's operator() might have the same function signature, they
3479
+ // must be packaged in different classes.
3480
+ // -------------------------------------------------------------------------
3481
+ struct Settings :
3482
+ sparsehash_internal::sh_hashtable_settings<key_type, hasher,
3483
+ size_type, HT_MIN_BUCKETS>
3484
+ {
3485
+ explicit Settings(const hasher& hf)
3486
+ : sparsehash_internal::sh_hashtable_settings<key_type, hasher, size_type,
3487
+ HT_MIN_BUCKETS>
3488
+ (hf, HT_OCCUPANCY_PCT / 100.0f, HT_EMPTY_PCT / 100.0f) {}
3489
+ };
3490
+
3491
+ // KeyInfo stores delete key and packages zero-size functors:
3492
+ // ExtractKey and SetKey.
3493
+ // ---------------------------------------------------------
3494
+ class KeyInfo : public ExtractKey, public SetKey, public EqualKey
3495
+ {
3496
+ public:
3497
+ KeyInfo(const ExtractKey& ek, const SetKey& sk, const EqualKey& eq)
3498
+ : ExtractKey(ek), SetKey(sk), EqualKey(eq)
3499
+ {
3500
+ }
3501
+
3502
+ // We want to return the exact same type as ExtractKey: Key or const Key&
3503
+ typename ExtractKey::result_type get_key(const_reference v) const
3504
+ {
3505
+ return ExtractKey::operator()(v);
3506
+ }
3507
+
3508
+ bool equals(const key_type& a, const key_type& b) const
3509
+ {
3510
+ return EqualKey::operator()(a, b);
3511
+ }
3512
+ };
3513
+
3514
+ // Utility functions to access the templated operators
3515
+ size_t hash(const key_type& v) const
3516
+ {
3517
+ return settings.hash(v);
3518
+ }
3519
+
3520
+ bool equals(const key_type& a, const key_type& b) const
3521
+ {
3522
+ return key_info.equals(a, b);
3523
+ }
3524
+
3525
+ typename ExtractKey::result_type get_key(const_reference v) const
3526
+ {
3527
+ return key_info.get_key(v);
3528
+ }
3529
+
3530
+ private:
3531
+ // Actual data
3532
+ // -----------
3533
+ Settings settings;
3534
+ KeyInfo key_info;
3535
+ size_type num_deleted;
3536
+ Table table; // holds num_buckets and num_elements too
3537
+ };
3538
+
3539
+ #undef JUMP_
3540
+
3541
+ // -----------------------------------------------------------------------------
3542
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
3543
+ const typename sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::size_type
3544
+ sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::ILLEGAL_BUCKET;
3545
+
3546
+ // How full we let the table get before we resize. Knuth says .8 is
3547
+ // good -- higher causes us to probe too much, though saves memory
3548
+ // -----------------------------------------------------------------------------
3549
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
3550
+ const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT = 50;
3551
+
3552
+ // How empty we let the table get before we resize lower.
3553
+ // It should be less than OCCUPANCY_PCT / 2 or we thrash resizing
3554
+ // -----------------------------------------------------------------------------
3555
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
3556
+ const int sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_EMPTY_PCT
3557
+ = static_cast<int>(0.4 *
3558
+ sparse_hashtable<V,K,HF,ExK,SetK,EqK,A>::HT_OCCUPANCY_PCT);
3559
+
3560
+
3561
+ // ----------------------------------------------------------------------
3562
+ // S P A R S E _ H A S H _ M A P
3563
+ // ----------------------------------------------------------------------
3564
+ template <class Key, class T,
3565
+ class HashFcn = spp_hash<Key>,
3566
+ class EqualKey = std::equal_to<Key>,
3567
+ class Alloc = SPP_DEFAULT_ALLOCATOR<std::pair<const Key, T> > >
3568
+ class sparse_hash_map
3569
+ {
3570
+ public:
3571
+ typedef typename std::pair<const Key, T> value_type;
3572
+
3573
+ private:
3574
+ // Apparently select1st is not stl-standard, so we define our own
3575
+ struct SelectKey
3576
+ {
3577
+ typedef const Key& result_type;
3578
+
3579
+ inline const Key& operator()(const value_type& p) const
3580
+ {
3581
+ return p.first;
3582
+ }
3583
+ };
3584
+
3585
+ struct SetKey
3586
+ {
3587
+ inline void operator()(value_type* value, const Key& new_key) const
3588
+ {
3589
+ *const_cast<Key*>(&value->first) = new_key;
3590
+ }
3591
+ };
3592
+
3593
+ // For operator[].
3594
+ struct DefaultValue
3595
+ {
3596
+ inline value_type operator()(const Key& key) const
3597
+ {
3598
+ return std::make_pair(key, T());
3599
+ }
3600
+ };
3601
+
3602
+ // The actual data
3603
+ typedef sparse_hashtable<value_type, Key, HashFcn, SelectKey,
3604
+ SetKey, EqualKey, Alloc> ht;
3605
+
3606
+ public:
3607
+ typedef typename ht::key_type key_type;
3608
+ typedef T data_type;
3609
+ typedef T mapped_type;
3610
+ typedef typename ht::hasher hasher;
3611
+ typedef typename ht::key_equal key_equal;
3612
+ typedef Alloc allocator_type;
3613
+
3614
+ typedef typename ht::size_type size_type;
3615
+ typedef typename ht::difference_type difference_type;
3616
+ typedef typename ht::pointer pointer;
3617
+ typedef typename ht::const_pointer const_pointer;
3618
+ typedef typename ht::reference reference;
3619
+ typedef typename ht::const_reference const_reference;
3620
+
3621
+ typedef typename ht::iterator iterator;
3622
+ typedef typename ht::const_iterator const_iterator;
3623
+ typedef typename ht::local_iterator local_iterator;
3624
+ typedef typename ht::const_local_iterator const_local_iterator;
3625
+
3626
+ // Iterator functions
3627
+ iterator begin() { return rep.begin(); }
3628
+ iterator end() { return rep.end(); }
3629
+ const_iterator begin() const { return rep.cbegin(); }
3630
+ const_iterator end() const { return rep.cend(); }
3631
+ const_iterator cbegin() const { return rep.cbegin(); }
3632
+ const_iterator cend() const { return rep.cend(); }
3633
+
3634
+ // These come from tr1's unordered_map. For us, a bucket has 0 or 1 elements.
3635
+ local_iterator begin(size_type i) { return rep.begin(i); }
3636
+ local_iterator end(size_type i) { return rep.end(i); }
3637
+ const_local_iterator begin(size_type i) const { return rep.begin(i); }
3638
+ const_local_iterator end(size_type i) const { return rep.end(i); }
3639
+ const_local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
3640
+ const_local_iterator cend(size_type i) const { return rep.cend(i); }
3641
+
3642
+ // Accessor functions
3643
+ // ------------------
3644
+ allocator_type get_allocator() const { return rep.get_allocator(); }
3645
+ hasher hash_funct() const { return rep.hash_funct(); }
3646
+ hasher hash_function() const { return hash_funct(); }
3647
+ key_equal key_eq() const { return rep.key_eq(); }
3648
+
3649
+
3650
+ // Constructors
3651
+ // ------------
3652
+ explicit sparse_hash_map(size_type n = 0,
3653
+ const hasher& hf = hasher(),
3654
+ const key_equal& eql = key_equal(),
3655
+ const allocator_type& alloc = allocator_type())
3656
+ : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
3657
+ {
3658
+ }
3659
+
3660
+ explicit sparse_hash_map(const allocator_type& alloc) :
3661
+ rep(0, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3662
+ {
3663
+ }
3664
+
3665
+ sparse_hash_map(size_type n, const allocator_type& alloc) :
3666
+ rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3667
+ {
3668
+ }
3669
+
3670
+ sparse_hash_map(size_type n, const hasher& hf, const allocator_type& alloc) :
3671
+ rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
3672
+ {
3673
+ }
3674
+
3675
+ template <class InputIterator>
3676
+ sparse_hash_map(InputIterator f, InputIterator l,
3677
+ size_type n = 0,
3678
+ const hasher& hf = hasher(),
3679
+ const key_equal& eql = key_equal(),
3680
+ const allocator_type& alloc = allocator_type())
3681
+ : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
3682
+ {
3683
+ rep.insert(f, l);
3684
+ }
3685
+
3686
+ template <class InputIterator>
3687
+ sparse_hash_map(InputIterator f, InputIterator l,
3688
+ size_type n, const allocator_type& alloc)
3689
+ : rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3690
+ {
3691
+ rep.insert(f, l);
3692
+ }
3693
+
3694
+ template <class InputIterator>
3695
+ sparse_hash_map(InputIterator f, InputIterator l,
3696
+ size_type n, const hasher& hf, const allocator_type& alloc)
3697
+ : rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
3698
+ {
3699
+ rep.insert(f, l);
3700
+ }
3701
+
3702
+ sparse_hash_map(const sparse_hash_map &o) :
3703
+ rep(o.rep)
3704
+ {}
3705
+
3706
+ sparse_hash_map(const sparse_hash_map &o,
3707
+ const allocator_type& alloc) :
3708
+ rep(o.rep, alloc)
3709
+ {}
3710
+
3711
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
3712
+ sparse_hash_map(sparse_hash_map &&o) :
3713
+ rep(std::move(o.rep))
3714
+ {}
3715
+
3716
+ sparse_hash_map(sparse_hash_map &&o,
3717
+ const allocator_type& alloc) :
3718
+ rep(std::move(o.rep), alloc)
3719
+ {}
3720
+ #endif
3721
+
3722
+ #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
3723
+ sparse_hash_map(std::initializer_list<value_type> init,
3724
+ size_type n = 0,
3725
+ const hasher& hf = hasher(),
3726
+ const key_equal& eql = key_equal(),
3727
+ const allocator_type& alloc = allocator_type())
3728
+ : rep(n, hf, eql, SelectKey(), SetKey(), alloc)
3729
+ {
3730
+ rep.insert(init.begin(), init.end());
3731
+ }
3732
+
3733
+ sparse_hash_map(std::initializer_list<value_type> init,
3734
+ size_type n, const allocator_type& alloc) :
3735
+ rep(n, hasher(), key_equal(), SelectKey(), SetKey(), alloc)
3736
+ {
3737
+ rep.insert(init.begin(), init.end());
3738
+ }
3739
+
3740
+ sparse_hash_map(std::initializer_list<value_type> init,
3741
+ size_type n, const hasher& hf, const allocator_type& alloc) :
3742
+ rep(n, hf, key_equal(), SelectKey(), SetKey(), alloc)
3743
+ {
3744
+ rep.insert(init.begin(), init.end());
3745
+ }
3746
+
3747
+ sparse_hash_map& operator=(std::initializer_list<value_type> init)
3748
+ {
3749
+ rep.clear();
3750
+ rep.insert(init.begin(), init.end());
3751
+ return *this;
3752
+ }
3753
+
3754
+ void insert(std::initializer_list<value_type> init)
3755
+ {
3756
+ rep.insert(init.begin(), init.end());
3757
+ }
3758
+ #endif
3759
+
3760
+ sparse_hash_map& operator=(const sparse_hash_map &o)
3761
+ {
3762
+ rep = o.rep;
3763
+ return *this;
3764
+ }
3765
+
3766
+ void clear() { rep.clear(); }
3767
+ void swap(sparse_hash_map& hs) { rep.swap(hs.rep); }
3768
+
3769
+ // Functions concerning size
3770
+ // -------------------------
3771
+ size_type size() const { return rep.size(); }
3772
+ size_type max_size() const { return rep.max_size(); }
3773
+ bool empty() const { return rep.empty(); }
3774
+ size_type bucket_count() const { return rep.bucket_count(); }
3775
+ size_type max_bucket_count() const { return rep.max_bucket_count(); }
3776
+
3777
+ size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
3778
+ size_type bucket(const key_type& key) const { return rep.bucket(key); }
3779
+ float load_factor() const { return size() * 1.0f / bucket_count(); }
3780
+
3781
+ float max_load_factor() const { return rep.get_enlarge_factor(); }
3782
+ void max_load_factor(float grow) { rep.set_enlarge_factor(grow); }
3783
+
3784
+ float min_load_factor() const { return rep.get_shrink_factor(); }
3785
+ void min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
3786
+
3787
+ void set_resizing_parameters(float shrink, float grow)
3788
+ {
3789
+ rep.set_resizing_parameters(shrink, grow);
3790
+ }
3791
+
3792
+ void resize(size_type cnt) { rep.resize(cnt); }
3793
+ void rehash(size_type cnt) { resize(cnt); } // c++11 name
3794
+ void reserve(size_type cnt) { resize(cnt); } // c++11
3795
+
3796
+ // Lookup
3797
+ // ------
3798
+ iterator find(const key_type& key) { return rep.find(key); }
3799
+ const_iterator find(const key_type& key) const { return rep.find(key); }
3800
+ bool contains(const key_type& key) const { return rep.find(key) != rep.end(); }
3801
+
3802
+ mapped_type& operator[](const key_type& key)
3803
+ {
3804
+ return rep.template find_or_insert<DefaultValue>(key).second;
3805
+ }
3806
+
3807
+ size_type count(const key_type& key) const { return rep.count(key); }
3808
+
3809
+ std::pair<iterator, iterator>
3810
+ equal_range(const key_type& key) { return rep.equal_range(key); }
3811
+
3812
+ std::pair<const_iterator, const_iterator>
3813
+ equal_range(const key_type& key) const { return rep.equal_range(key); }
3814
+
3815
+ mapped_type& at(const key_type& key)
3816
+ {
3817
+ iterator it = rep.find(key);
3818
+ if (it == rep.end())
3819
+ throw_exception(std::out_of_range("at: key not present"));
3820
+ return it->second;
3821
+ }
3822
+
3823
+ const mapped_type& at(const key_type& key) const
3824
+ {
3825
+ const_iterator it = rep.find(key);
3826
+ if (it == rep.cend())
3827
+ throw_exception(std::out_of_range("at: key not present"));
3828
+ return it->second;
3829
+ }
3830
+
3831
+ #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
3832
+ template <class... Args>
3833
+ std::pair<iterator, bool> emplace(Args&&... args)
3834
+ {
3835
+ return rep.emplace(std::forward<Args>(args)...);
3836
+ }
3837
+
3838
+ template <class... Args>
3839
+ iterator emplace_hint(const_iterator , Args&&... args)
3840
+ {
3841
+ return rep.emplace(std::forward<Args>(args)...).first;
3842
+ }
3843
+ #endif
3844
+
3845
+ // Insert
3846
+ // ------
3847
+ std::pair<iterator, bool>
3848
+ insert(const value_type& obj) { return rep.insert(obj); }
3849
+
3850
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
3851
+ template< class P >
3852
+ std::pair<iterator, bool> insert(P&& obj) { return rep.insert(std::forward<P>(obj)); }
3853
+ #endif
3854
+
3855
+ template <class InputIterator>
3856
+ void insert(InputIterator f, InputIterator l) { rep.insert(f, l); }
3857
+
3858
+ void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
3859
+
3860
+ iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
3861
+ iterator insert(const_iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
3862
+
3863
+ // Deleted key routines - just to keep google test framework happy
3864
+ // we don't actually use the deleted key
3865
+ // ---------------------------------------------------------------
3866
+ void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
3867
+ void clear_deleted_key() { rep.clear_deleted_key(); }
3868
+ key_type deleted_key() const { return rep.deleted_key(); }
3869
+
3870
+ // Erase
3871
+ // -----
3872
+ size_type erase(const key_type& key) { return rep.erase(key); }
3873
+ iterator erase(iterator it) { return rep.erase(it); }
3874
+ iterator erase(iterator f, iterator l) { return rep.erase(f, l); }
3875
+ iterator erase(const_iterator it) { return rep.erase(it); }
3876
+ iterator erase(const_iterator f, const_iterator l){ return rep.erase(f, l); }
3877
+
3878
+ // Comparison
3879
+ // ----------
3880
+ bool operator==(const sparse_hash_map& hs) const { return rep == hs.rep; }
3881
+ bool operator!=(const sparse_hash_map& hs) const { return rep != hs.rep; }
3882
+
3883
+
3884
+ // I/O -- this is an add-on for writing metainformation to disk
3885
+ //
3886
+ // For maximum flexibility, this does not assume a particular
3887
+ // file type (though it will probably be a FILE *). We just pass
3888
+ // the fp through to rep.
3889
+
3890
+ // If your keys and values are simple enough, you can pass this
3891
+ // serializer to serialize()/unserialize(). "Simple enough" means
3892
+ // value_type is a POD type that contains no pointers. Note,
3893
+ // however, we don't try to normalize endianness.
3894
+ // ---------------------------------------------------------------
3895
+ typedef typename ht::NopointerSerializer NopointerSerializer;
3896
+
3897
+ // serializer: a class providing operator()(OUTPUT*, const value_type&)
3898
+ // (writing value_type to OUTPUT). You can specify a
3899
+ // NopointerSerializer object if appropriate (see above).
3900
+ // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
3901
+ // pointer to a class providing size_t Write(const void*, size_t),
3902
+ // which writes a buffer into a stream (which fp presumably
3903
+ // owns) and returns the number of bytes successfully written.
3904
+ // Note basic_ostream<not_char> is not currently supported.
3905
+ // ---------------------------------------------------------------
3906
+ template <typename ValueSerializer, typename OUTPUT>
3907
+ bool serialize(ValueSerializer serializer, OUTPUT* fp)
3908
+ {
3909
+ return rep.serialize(serializer, fp);
3910
+ }
3911
+
3912
+ // serializer: a functor providing operator()(INPUT*, value_type*)
3913
+ // (reading from INPUT and into value_type). You can specify a
3914
+ // NopointerSerializer object if appropriate (see above).
3915
+ // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
3916
+ // pointer to a class providing size_t Read(void*, size_t),
3917
+ // which reads into a buffer from a stream (which fp presumably
3918
+ // owns) and returns the number of bytes successfully read.
3919
+ // Note basic_istream<not_char> is not currently supported.
3920
+ // NOTE: Since value_type is std::pair<const Key, T>, ValueSerializer
3921
+ // may need to do a const cast in order to fill in the key.
3922
+ // NOTE: if Key or T are not POD types, the serializer MUST use
3923
+ // placement-new to initialize their values, rather than a normal
3924
+ // equals-assignment or similar. (The value_type* passed into the
3925
+ // serializer points to garbage memory.)
3926
+ // ---------------------------------------------------------------
3927
+ template <typename ValueSerializer, typename INPUT>
3928
+ bool unserialize(ValueSerializer serializer, INPUT* fp)
3929
+ {
3930
+ return rep.unserialize(serializer, fp);
3931
+ }
3932
+
3933
+ // The four methods below are DEPRECATED.
3934
+ // Use serialize() and unserialize() for new code.
3935
+ // -----------------------------------------------
3936
+ template <typename OUTPUT>
3937
+ bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
3938
+
3939
+ template <typename INPUT>
3940
+ bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
3941
+
3942
+ template <typename OUTPUT>
3943
+ bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
3944
+
3945
+ template <typename INPUT>
3946
+ bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
3947
+
3948
+
3949
+ private:
3950
+ // The actual data
3951
+ // ---------------
3952
+ ht rep;
3953
+ };
3954
+
3955
+ // ----------------------------------------------------------------------
3956
+ // S P A R S E _ H A S H _ S E T
3957
+ // ----------------------------------------------------------------------
3958
+
3959
+ template <class Value,
3960
+ class HashFcn = spp_hash<Value>,
3961
+ class EqualKey = std::equal_to<Value>,
3962
+ class Alloc = SPP_DEFAULT_ALLOCATOR<Value> >
3963
+ class sparse_hash_set
3964
+ {
3965
+ private:
3966
+ // Apparently identity is not stl-standard, so we define our own
3967
+ struct Identity
3968
+ {
3969
+ typedef const Value& result_type;
3970
+ inline const Value& operator()(const Value& v) const { return v; }
3971
+ };
3972
+
3973
+ struct SetKey
3974
+ {
3975
+ inline void operator()(Value* value, const Value& new_key) const
3976
+ {
3977
+ *value = new_key;
3978
+ }
3979
+ };
3980
+
3981
+ typedef sparse_hashtable<Value, Value, HashFcn, Identity, SetKey,
3982
+ EqualKey, Alloc> ht;
3983
+
3984
+ public:
3985
+ typedef typename ht::key_type key_type;
3986
+ typedef typename ht::value_type value_type;
3987
+ typedef typename ht::hasher hasher;
3988
+ typedef typename ht::key_equal key_equal;
3989
+ typedef Alloc allocator_type;
3990
+
3991
+ typedef typename ht::size_type size_type;
3992
+ typedef typename ht::difference_type difference_type;
3993
+ typedef typename ht::const_pointer pointer;
3994
+ typedef typename ht::const_pointer const_pointer;
3995
+ typedef typename ht::const_reference reference;
3996
+ typedef typename ht::const_reference const_reference;
3997
+
3998
+ typedef typename ht::const_iterator iterator;
3999
+ typedef typename ht::const_iterator const_iterator;
4000
+ typedef typename ht::const_local_iterator local_iterator;
4001
+ typedef typename ht::const_local_iterator const_local_iterator;
4002
+
4003
+
4004
+ // Iterator functions -- recall all iterators are const
4005
+ iterator begin() const { return rep.begin(); }
4006
+ iterator end() const { return rep.end(); }
4007
+ const_iterator cbegin() const { return rep.cbegin(); }
4008
+ const_iterator cend() const { return rep.cend(); }
4009
+
4010
+ // These come from tr1's unordered_set. For us, a bucket has 0 or 1 elements.
4011
+ local_iterator begin(size_type i) const { return rep.begin(i); }
4012
+ local_iterator end(size_type i) const { return rep.end(i); }
4013
+ local_iterator cbegin(size_type i) const { return rep.cbegin(i); }
4014
+ local_iterator cend(size_type i) const { return rep.cend(i); }
4015
+
4016
+
4017
+ // Accessor functions
4018
+ // ------------------
4019
+ allocator_type get_allocator() const { return rep.get_allocator(); }
4020
+ hasher hash_funct() const { return rep.hash_funct(); }
4021
+ hasher hash_function() const { return hash_funct(); } // tr1 name
4022
+ key_equal key_eq() const { return rep.key_eq(); }
4023
+
4024
+
4025
+ // Constructors
4026
+ // ------------
4027
+ explicit sparse_hash_set(size_type n = 0,
4028
+ const hasher& hf = hasher(),
4029
+ const key_equal& eql = key_equal(),
4030
+ const allocator_type& alloc = allocator_type()) :
4031
+ rep(n, hf, eql, Identity(), SetKey(), alloc)
4032
+ {
4033
+ }
4034
+
4035
+ explicit sparse_hash_set(const allocator_type& alloc) :
4036
+ rep(0, hasher(), key_equal(), Identity(), SetKey(), alloc)
4037
+ {
4038
+ }
4039
+
4040
+ sparse_hash_set(size_type n, const allocator_type& alloc) :
4041
+ rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
4042
+ {
4043
+ }
4044
+
4045
+ sparse_hash_set(size_type n, const hasher& hf,
4046
+ const allocator_type& alloc) :
4047
+ rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
4048
+ {
4049
+ }
4050
+
4051
+ template <class InputIterator>
4052
+ sparse_hash_set(InputIterator f, InputIterator l,
4053
+ size_type n = 0,
4054
+ const hasher& hf = hasher(),
4055
+ const key_equal& eql = key_equal(),
4056
+ const allocator_type& alloc = allocator_type())
4057
+ : rep(n, hf, eql, Identity(), SetKey(), alloc)
4058
+ {
4059
+ rep.insert(f, l);
4060
+ }
4061
+
4062
+ template <class InputIterator>
4063
+ sparse_hash_set(InputIterator f, InputIterator l,
4064
+ size_type n, const allocator_type& alloc)
4065
+ : rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
4066
+ {
4067
+ rep.insert(f, l);
4068
+ }
4069
+
4070
+ template <class InputIterator>
4071
+ sparse_hash_set(InputIterator f, InputIterator l,
4072
+ size_type n, const hasher& hf, const allocator_type& alloc)
4073
+ : rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
4074
+ {
4075
+ rep.insert(f, l);
4076
+ }
4077
+
4078
+ sparse_hash_set(const sparse_hash_set &o) :
4079
+ rep(o.rep)
4080
+ {}
4081
+
4082
+ sparse_hash_set(const sparse_hash_set &o,
4083
+ const allocator_type& alloc) :
4084
+ rep(o.rep, alloc)
4085
+ {}
4086
+
4087
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
4088
+ sparse_hash_set(sparse_hash_set &&o) :
4089
+ rep(std::move(o.rep))
4090
+ {}
4091
+
4092
+ sparse_hash_set(sparse_hash_set &&o,
4093
+ const allocator_type& alloc) :
4094
+ rep(std::move(o.rep), alloc)
4095
+ {}
4096
+ #endif
4097
+
4098
+ #if !defined(SPP_NO_CXX11_HDR_INITIALIZER_LIST)
4099
+ sparse_hash_set(std::initializer_list<value_type> init,
4100
+ size_type n = 0,
4101
+ const hasher& hf = hasher(),
4102
+ const key_equal& eql = key_equal(),
4103
+ const allocator_type& alloc = allocator_type()) :
4104
+ rep(n, hf, eql, Identity(), SetKey(), alloc)
4105
+ {
4106
+ rep.insert(init.begin(), init.end());
4107
+ }
4108
+
4109
+ sparse_hash_set(std::initializer_list<value_type> init,
4110
+ size_type n, const allocator_type& alloc) :
4111
+ rep(n, hasher(), key_equal(), Identity(), SetKey(), alloc)
4112
+ {
4113
+ rep.insert(init.begin(), init.end());
4114
+ }
4115
+
4116
+ sparse_hash_set(std::initializer_list<value_type> init,
4117
+ size_type n, const hasher& hf,
4118
+ const allocator_type& alloc) :
4119
+ rep(n, hf, key_equal(), Identity(), SetKey(), alloc)
4120
+ {
4121
+ rep.insert(init.begin(), init.end());
4122
+ }
4123
+
4124
+ sparse_hash_set& operator=(std::initializer_list<value_type> init)
4125
+ {
4126
+ rep.clear();
4127
+ rep.insert(init.begin(), init.end());
4128
+ return *this;
4129
+ }
4130
+
4131
+ void insert(std::initializer_list<value_type> init)
4132
+ {
4133
+ rep.insert(init.begin(), init.end());
4134
+ }
4135
+
4136
+ #endif
4137
+
4138
+ sparse_hash_set& operator=(const sparse_hash_set &o)
4139
+ {
4140
+ rep = o.rep;
4141
+ return *this;
4142
+ }
4143
+
4144
+ void clear() { rep.clear(); }
4145
+ void swap(sparse_hash_set& hs) { rep.swap(hs.rep); }
4146
+
4147
+
4148
+ // Functions concerning size
4149
+ // -------------------------
4150
+ size_type size() const { return rep.size(); }
4151
+ size_type max_size() const { return rep.max_size(); }
4152
+ bool empty() const { return rep.empty(); }
4153
+ size_type bucket_count() const { return rep.bucket_count(); }
4154
+ size_type max_bucket_count() const { return rep.max_bucket_count(); }
4155
+
4156
+ size_type bucket_size(size_type i) const { return rep.bucket_size(i); }
4157
+ size_type bucket(const key_type& key) const { return rep.bucket(key); }
4158
+
4159
+ float load_factor() const { return size() * 1.0f / bucket_count(); }
4160
+
4161
+ float max_load_factor() const { return rep.get_enlarge_factor(); }
4162
+ void max_load_factor(float grow) { rep.set_enlarge_factor(grow); }
4163
+
4164
+ float min_load_factor() const { return rep.get_shrink_factor(); }
4165
+ void min_load_factor(float shrink){ rep.set_shrink_factor(shrink); }
4166
+
4167
+ void set_resizing_parameters(float shrink, float grow)
4168
+ {
4169
+ rep.set_resizing_parameters(shrink, grow);
4170
+ }
4171
+
4172
+ void resize(size_type cnt) { rep.resize(cnt); }
4173
+ void rehash(size_type cnt) { resize(cnt); } // c++11 name
4174
+ void reserve(size_type cnt) { resize(cnt); } // c++11
4175
+
4176
+ // Lookup
4177
+ // ------
4178
+ iterator find(const key_type& key) const { return rep.find(key); }
4179
+ bool contains(const key_type& key) const { return rep.find(key) != rep.end(); }
4180
+
4181
+ size_type count(const key_type& key) const { return rep.count(key); }
4182
+
4183
+ std::pair<iterator, iterator>
4184
+ equal_range(const key_type& key) const { return rep.equal_range(key); }
4185
+
4186
+ #if !defined(SPP_NO_CXX11_VARIADIC_TEMPLATES)
4187
+ template <class... Args>
4188
+ std::pair<iterator, bool> emplace(Args&&... args)
4189
+ {
4190
+ return rep.emplace(std::forward<Args>(args)...);
4191
+ }
4192
+
4193
+ template <class... Args>
4194
+ iterator emplace_hint(const_iterator , Args&&... args)
4195
+ {
4196
+ return rep.emplace(std::forward<Args>(args)...).first;
4197
+ }
4198
+ #endif
4199
+
4200
+ // Insert
4201
+ // ------
4202
+ std::pair<iterator, bool> insert(const value_type& obj)
4203
+ {
4204
+ std::pair<typename ht::iterator, bool> p = rep.insert(obj);
4205
+ return std::pair<iterator, bool>(p.first, p.second); // const to non-const
4206
+ }
4207
+
4208
+ #if !defined(SPP_NO_CXX11_RVALUE_REFERENCES)
4209
+ template<class P>
4210
+ std::pair<iterator, bool> insert(P&& obj) { return rep.insert(std::forward<P>(obj)); }
4211
+ #endif
4212
+
4213
+ template <class InputIterator>
4214
+ void insert(InputIterator f, InputIterator l) { rep.insert(f, l); }
4215
+
4216
+ void insert(const_iterator f, const_iterator l) { rep.insert(f, l); }
4217
+
4218
+ iterator insert(iterator /*unused*/, const value_type& obj) { return insert(obj).first; }
4219
+
4220
+ // Deleted key - do nothing - just to keep google test framework happy
4221
+ // -------------------------------------------------------------------
4222
+ void set_deleted_key(const key_type& key) { rep.set_deleted_key(key); }
4223
+ void clear_deleted_key() { rep.clear_deleted_key(); }
4224
+ key_type deleted_key() const { return rep.deleted_key(); }
4225
+
4226
+ // Erase
4227
+ // -----
4228
+ size_type erase(const key_type& key) { return rep.erase(key); }
4229
+ iterator erase(iterator it) { return rep.erase(it); }
4230
+ iterator erase(iterator f, iterator l) { return rep.erase(f, l); }
4231
+
4232
+ // Comparison
4233
+ // ----------
4234
+ bool operator==(const sparse_hash_set& hs) const { return rep == hs.rep; }
4235
+ bool operator!=(const sparse_hash_set& hs) const { return rep != hs.rep; }
4236
+
4237
+
4238
+ // I/O -- this is an add-on for writing metainformation to disk
4239
+ //
4240
+ // For maximum flexibility, this does not assume a particular
4241
+ // file type (though it will probably be a FILE *). We just pass
4242
+ // the fp through to rep.
4243
+
4244
+ // If your keys and values are simple enough, you can pass this
4245
+ // serializer to serialize()/unserialize(). "Simple enough" means
4246
+ // value_type is a POD type that contains no pointers. Note,
4247
+ // however, we don't try to normalize endianness.
4248
+ // ---------------------------------------------------------------
4249
+ typedef typename ht::NopointerSerializer NopointerSerializer;
4250
+
4251
+ // serializer: a class providing operator()(OUTPUT*, const value_type&)
4252
+ // (writing value_type to OUTPUT). You can specify a
4253
+ // NopointerSerializer object if appropriate (see above).
4254
+ // fp: either a FILE*, OR an ostream*/subclass_of_ostream*, OR a
4255
+ // pointer to a class providing size_t Write(const void*, size_t),
4256
+ // which writes a buffer into a stream (which fp presumably
4257
+ // owns) and returns the number of bytes successfully written.
4258
+ // Note basic_ostream<not_char> is not currently supported.
4259
+ // ---------------------------------------------------------------
4260
+ template <typename ValueSerializer, typename OUTPUT>
4261
+ bool serialize(ValueSerializer serializer, OUTPUT* fp)
4262
+ {
4263
+ return rep.serialize(serializer, fp);
4264
+ }
4265
+
4266
+ // serializer: a functor providing operator()(INPUT*, value_type*)
4267
+ // (reading from INPUT and into value_type). You can specify a
4268
+ // NopointerSerializer object if appropriate (see above).
4269
+ // fp: either a FILE*, OR an istream*/subclass_of_istream*, OR a
4270
+ // pointer to a class providing size_t Read(void*, size_t),
4271
+ // which reads into a buffer from a stream (which fp presumably
4272
+ // owns) and returns the number of bytes successfully read.
4273
+ // Note basic_istream<not_char> is not currently supported.
4274
+ // NOTE: Since value_type is const Key, ValueSerializer
4275
+ // may need to do a const cast in order to fill in the key.
4276
+ // NOTE: if Key is not a POD type, the serializer MUST use
4277
+ // placement-new to initialize its value, rather than a normal
4278
+ // equals-assignment or similar. (The value_type* passed into
4279
+ // the serializer points to garbage memory.)
4280
+ // ---------------------------------------------------------------
4281
+ template <typename ValueSerializer, typename INPUT>
4282
+ bool unserialize(ValueSerializer serializer, INPUT* fp)
4283
+ {
4284
+ return rep.unserialize(serializer, fp);
4285
+ }
4286
+
4287
+ // The four methods below are DEPRECATED.
4288
+ // Use serialize() and unserialize() for new code.
4289
+ // -----------------------------------------------
4290
+ template <typename OUTPUT>
4291
+ bool write_metadata(OUTPUT *fp) { return rep.write_metadata(fp); }
4292
+
4293
+ template <typename INPUT>
4294
+ bool read_metadata(INPUT *fp) { return rep.read_metadata(fp); }
4295
+
4296
+ template <typename OUTPUT>
4297
+ bool write_nopointer_data(OUTPUT *fp) { return rep.write_nopointer_data(fp); }
4298
+
4299
+ template <typename INPUT>
4300
+ bool read_nopointer_data(INPUT *fp) { return rep.read_nopointer_data(fp); }
4301
+
4302
+ private:
4303
+ // The actual data
4304
+ // ---------------
4305
+ ht rep;
4306
+ };
4307
+
4308
+ } // spp_ namespace
4309
+
4310
+
4311
+ // We need a global swap for all our classes as well
4312
+ // -------------------------------------------------
4313
+
4314
+ template <class T, class Alloc>
4315
+ inline void swap(spp_::sparsegroup<T,Alloc> &x, spp_::sparsegroup<T,Alloc> &y)
4316
+ {
4317
+ x.swap(y);
4318
+ }
4319
+
4320
+ template <class T, class Alloc>
4321
+ inline void swap(spp_::sparsetable<T,Alloc> &x, spp_::sparsetable<T,Alloc> &y)
4322
+ {
4323
+ x.swap(y);
4324
+ }
4325
+
4326
+ template <class V, class K, class HF, class ExK, class SetK, class EqK, class A>
4327
+ inline void swap(spp_::sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &x,
4328
+ spp_::sparse_hashtable<V,K,HF,ExK,SetK,EqK,A> &y)
4329
+ {
4330
+ x.swap(y);
4331
+ }
4332
+
4333
+ template <class Key, class T, class HashFcn, class EqualKey, class Alloc>
4334
+ inline void swap(spp_::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm1,
4335
+ spp_::sparse_hash_map<Key, T, HashFcn, EqualKey, Alloc>& hm2)
4336
+ {
4337
+ hm1.swap(hm2);
4338
+ }
4339
+
4340
+ template <class Val, class HashFcn, class EqualKey, class Alloc>
4341
+ inline void swap(spp_::sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
4342
+ spp_::sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2)
4343
+ {
4344
+ hs1.swap(hs2);
4345
+ }
4346
+
4347
+ #endif // sparsepp_h_guard_