datasketches 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/datasketches/version.rb +1 -1
  4. data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
  5. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  6. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  7. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
  9. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  10. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  11. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  12. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  13. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  14. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
  16. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
  17. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  18. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  19. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  20. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  21. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  22. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  23. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
  25. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  26. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  28. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  29. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  30. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  31. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  32. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  33. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  34. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  35. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  36. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  37. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  38. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  39. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  40. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  41. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  42. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  43. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  44. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  45. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  47. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  48. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  49. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  50. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  51. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  52. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  53. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  54. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  55. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  56. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  57. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  58. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  59. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  60. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  61. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  62. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  63. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  64. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  65. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  66. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  67. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
  69. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  70. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  71. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
  73. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  74. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  75. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
  76. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  78. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  79. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
  84. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  85. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
  86. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
  87. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  88. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  89. data/vendor/datasketches-cpp/setup.py +1 -1
  90. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  91. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  92. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  93. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  94. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  95. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
  97. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
  98. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
  99. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  100. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
  101. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
  103. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  105. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  106. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
  107. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  108. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  109. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  112. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  113. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  114. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  116. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
  117. metadata +8 -3
@@ -39,15 +39,15 @@ allocator_(allocator),
39
39
  lg_cur_size_(lg_cur_size),
40
40
  lg_max_size_(lg_max_size),
41
41
  num_active_(0),
42
- keys_(allocator_.allocate(1 << lg_cur_size)),
42
+ keys_(allocator_.allocate(1ULL << lg_cur_size)),
43
43
  values_(nullptr),
44
44
  states_(nullptr)
45
45
  {
46
46
  AllocV av(allocator_);
47
- values_ = av.allocate(1 << lg_cur_size);
47
+ values_ = av.allocate(1ULL << lg_cur_size);
48
48
  AllocU16 au16(allocator_);
49
- states_ = au16.allocate(1 << lg_cur_size);
50
- std::fill(states_, states_ + (1 << lg_cur_size), 0);
49
+ states_ = au16.allocate(1ULL << lg_cur_size);
50
+ std::fill(states_, states_ + (1ULL << lg_cur_size), static_cast<uint16_t>(0));
51
51
  }
52
52
 
53
53
  template<typename K, typename V, typename H, typename E, typename A>
@@ -56,14 +56,14 @@ allocator_(other.allocator_),
56
56
  lg_cur_size_(other.lg_cur_size_),
57
57
  lg_max_size_(other.lg_max_size_),
58
58
  num_active_(other.num_active_),
59
- keys_(allocator_.allocate(1 << lg_cur_size_)),
59
+ keys_(allocator_.allocate(1ULL << lg_cur_size_)),
60
60
  values_(nullptr),
61
61
  states_(nullptr)
62
62
  {
63
63
  AllocV av(allocator_);
64
- values_ = av.allocate(1 << lg_cur_size_);
64
+ values_ = av.allocate(1ULL << lg_cur_size_);
65
65
  AllocU16 au16(allocator_);
66
- states_ = au16.allocate(1 << lg_cur_size_);
66
+ states_ = au16.allocate(1ULL << lg_cur_size_);
67
67
  const uint32_t size = 1 << lg_cur_size_;
68
68
  if (num_active_ > 0) {
69
69
  auto num = num_active_;
@@ -177,7 +177,7 @@ uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
177
177
 
178
178
  template<typename K, typename V, typename H, typename E, typename A>
179
179
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
180
- return (1 << lg_cur_size_) * LOAD_FACTOR;
180
+ return static_cast<uint32_t>((1 << lg_cur_size_) * LOAD_FACTOR);
181
181
  }
182
182
 
183
183
  template<typename K, typename V, typename H, typename E, typename A>
@@ -246,7 +246,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
246
246
  // if none are found, the status is changed
247
247
  states_[delete_index] = 0; // mark as empty
248
248
  keys_[delete_index].~K();
249
- uint32_t drift = 1;
249
+ uint16_t drift = 1;
250
250
  const uint32_t mask = (1 << lg_cur_size_) - 1;
251
251
  uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
252
252
  // advance until we find a free location replacing locations as needed
@@ -322,7 +322,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
322
322
  values_ = av.allocate(new_size);
323
323
  AllocU16 au16(allocator_);
324
324
  states_ = au16.allocate(new_size);
325
- std::fill(states_, states_ + new_size, 0);
325
+ std::fill(states_, states_ + new_size, static_cast<uint16_t>(0));
326
326
  num_active_ = 0;
327
327
  lg_cur_size_ = lg_new_size;
328
328
  for (uint32_t i = 0; i < old_size; i++) {
@@ -39,8 +39,8 @@ TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
39
39
  TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
40
40
  reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
41
41
  for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
42
- int sum = 0;
43
- for (auto &it: map) sum += it.second;
42
+ uint64_t sum = 0;
43
+ for (auto it: map) sum += it.second;
44
44
  REQUIRE(sum == 11);
45
45
  }
46
46
 
@@ -26,15 +26,15 @@
26
26
  namespace datasketches {
27
27
 
28
28
  template<typename A>
29
- AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
29
+ AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
30
30
  lgConfigK(lgConfigK),
31
31
  lgAuxArrInts(lgAuxArrInts),
32
32
  auxCount(0),
33
- entries(1 << lgAuxArrInts, 0, allocator)
33
+ entries(1ULL << lgAuxArrInts, 0, allocator)
34
34
  {}
35
35
 
36
36
  template<typename A>
37
- AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
37
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
38
38
  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
39
39
  }
40
40
 
@@ -45,42 +45,42 @@ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
45
45
 
46
46
  template<typename A>
47
47
  AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
48
- int lgConfigK,
49
- int auxCount, int lgAuxArrInts,
48
+ uint8_t lgConfigK,
49
+ uint32_t auxCount, uint8_t lgAuxArrInts,
50
50
  bool srcCompact, const A& allocator) {
51
- int lgArrInts = lgAuxArrInts;
51
+ uint8_t lgArrInts = lgAuxArrInts;
52
52
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
53
53
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
54
54
  } else { // updatable
55
55
  lgArrInts = lgAuxArrInts;
56
56
  }
57
57
 
58
- int configKmask = (1 << lgConfigK) - 1;
58
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
59
59
 
60
60
  AuxHashMap<A>* auxHashMap;
61
- const int* auxPtr = static_cast<const int*>(bytes);
61
+ const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
62
62
  if (srcCompact) {
63
63
  if (len < auxCount * sizeof(int)) {
64
64
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
65
65
  }
66
66
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
67
- for (int i = 0; i < auxCount; ++i) {
68
- int pair = auxPtr[i];
69
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
- int value = HllUtil<A>::getValue(pair);
67
+ for (uint32_t i = 0; i < auxCount; ++i) {
68
+ const uint32_t pair = auxPtr[i];
69
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
+ const uint8_t value = HllUtil<A>::getValue(pair);
71
71
  auxHashMap->mustAdd(slotNo, value);
72
72
  }
73
73
  } else { // updatable
74
- int itemsToRead = 1 << lgAuxArrInts;
75
- if (len < itemsToRead * sizeof(int)) {
74
+ uint32_t itemsToRead = 1 << lgAuxArrInts;
75
+ if (len < itemsToRead * sizeof(uint32_t)) {
76
76
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
77
77
  }
78
78
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
79
- for (int i = 0; i < itemsToRead; ++i) {
80
- int pair = auxPtr[i];
81
- if (pair == HllUtil<A>::EMPTY) { continue; }
82
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
- int value = HllUtil<A>::getValue(pair);
79
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
80
+ const uint32_t pair = auxPtr[i];
81
+ if (pair == hll_constants::EMPTY) { continue; }
82
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
+ const uint8_t value = HllUtil<A>::getValue(pair);
84
84
  auxHashMap->mustAdd(slotNo, value);
85
85
  }
86
86
  }
@@ -94,10 +94,10 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
94
94
  }
95
95
 
96
96
  template<typename A>
97
- AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
98
- const int auxCount, const int lgAuxArrInts,
99
- const bool srcCompact, const A& allocator) {
100
- int lgArrInts = lgAuxArrInts;
97
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
98
+ uint32_t auxCount, uint8_t lgAuxArrInts,
99
+ bool srcCompact, const A& allocator) {
100
+ uint8_t lgArrInts = lgAuxArrInts;
101
101
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
102
102
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
103
103
  } else { // updatable
@@ -108,24 +108,22 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
108
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
109
109
  aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
110
110
 
111
- int configKmask = (1 << lgConfigK) - 1;
111
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
112
112
 
113
113
  if (srcCompact) {
114
- int pair;
115
- for (int i = 0; i < auxCount; ++i) {
116
- is.read((char*)&pair, sizeof(pair));
117
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
118
- int value = HllUtil<A>::getValue(pair);
114
+ for (uint32_t i = 0; i < auxCount; ++i) {
115
+ const auto pair = read<int>(is);
116
+ uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
117
+ uint8_t value = HllUtil<A>::getValue(pair);
119
118
  auxHashMap->mustAdd(slotNo, value);
120
119
  }
121
120
  } else { // updatable
122
- int itemsToRead = 1 << lgAuxArrInts;
123
- int pair;
124
- for (int i = 0; i < itemsToRead; ++i) {
125
- is.read((char*)&pair, sizeof(pair));
126
- if (pair == HllUtil<A>::EMPTY) { continue; }
127
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
128
- int value = HllUtil<A>::getValue(pair);
121
+ const uint32_t itemsToRead = 1 << lgAuxArrInts;
122
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
123
+ const auto pair = read<int>(is);
124
+ if (pair == hll_constants::EMPTY) { continue; }
125
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
126
+ const uint8_t value = HllUtil<A>::getValue(pair);
129
127
  auxHashMap->mustAdd(slotNo, value);
130
128
  }
131
129
  }
@@ -153,34 +151,34 @@ AuxHashMap<A>* AuxHashMap<A>::copy() const {
153
151
  }
154
152
 
155
153
  template<typename A>
156
- int AuxHashMap<A>::getAuxCount() const {
154
+ uint32_t AuxHashMap<A>::getAuxCount() const {
157
155
  return auxCount;
158
156
  }
159
157
 
160
158
  template<typename A>
161
- int* AuxHashMap<A>::getAuxIntArr(){
159
+ uint32_t* AuxHashMap<A>::getAuxIntArr(){
162
160
  return entries.data();
163
161
  }
164
162
 
165
163
  template<typename A>
166
- int AuxHashMap<A>::getLgAuxArrInts() const {
164
+ uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
167
165
  return lgAuxArrInts;
168
166
  }
169
167
 
170
168
  template<typename A>
171
- int AuxHashMap<A>::getCompactSizeBytes() const {
169
+ uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
172
170
  return auxCount << 2;
173
171
  }
174
172
 
175
173
  template<typename A>
176
- int AuxHashMap<A>::getUpdatableSizeBytes() const {
174
+ uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
177
175
  return 4 << lgAuxArrInts;
178
176
  }
179
177
 
180
178
  template<typename A>
181
- void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
182
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
183
- const int entry_pair = HllUtil<A>::pair(slotNo, value);
179
+ void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
180
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
181
+ const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
184
182
  if (index >= 0) {
185
183
  throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
186
184
  + std::to_string(slotNo) + ", Value: " + std::to_string(value));
@@ -193,8 +191,8 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
193
191
  }
194
192
 
195
193
  template<typename A>
196
- int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
197
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
194
+ uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
195
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
198
196
  if (index >= 0) {
199
197
  return HllUtil<A>::getValue(entries[index]);
200
198
  }
@@ -203,8 +201,8 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
203
201
  }
204
202
 
205
203
  template<typename A>
206
- void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
207
- const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
204
+ void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
205
+ const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
208
206
  if (idx >= 0) {
209
207
  entries[idx] = HllUtil<A>::pair(slotNo, value);
210
208
  return;
@@ -216,7 +214,7 @@ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
216
214
 
217
215
  template<typename A>
218
216
  void AuxHashMap<A>::checkGrow() {
219
- if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
217
+ if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
220
218
  growAuxSpace();
221
219
  }
222
220
  }
@@ -227,10 +225,10 @@ void AuxHashMap<A>::growAuxSpace() {
227
225
  const int newArrLen = 1 << ++lgAuxArrInts;
228
226
  vector_int entries_new(newArrLen, 0, entries.get_allocator());
229
227
  for (size_t i = 0; i < entries.size(); ++i) {
230
- const int fetched = entries[i];
231
- if (fetched != HllUtil<A>::EMPTY) {
228
+ const uint32_t fetched = entries[i];
229
+ if (fetched != hll_constants::EMPTY) {
232
230
  // find empty in new array
233
- const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
231
+ const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
234
232
  entries_new[~idx] = fetched;
235
233
  }
236
234
  }
@@ -243,21 +241,20 @@ void AuxHashMap<A>::growAuxSpace() {
243
241
  //Continues searching.
244
242
  //If the probe comes back to original index, throws an exception.
245
243
  template<typename A>
246
- int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
247
- const int slotNo) {
248
- const int auxArrMask = (1 << lgAuxArrInts) - 1;
249
- const int configKmask = (1 << lgConfigK) - 1;
250
- int probe = slotNo & auxArrMask;
251
- const int loopIndex = probe;
244
+ int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
245
+ const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
246
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
247
+ uint32_t probe = slotNo & auxArrMask;
248
+ const uint32_t loopIndex = probe;
252
249
  do {
253
- const int arrVal = auxArr[probe];
254
- if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
250
+ const uint32_t arrVal = auxArr[probe];
251
+ if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
255
252
  return ~probe; //empty
256
253
  }
257
254
  else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
258
255
  return probe; //found given slotNo, return probe = index into aux array
259
256
  }
260
- const int stride = (slotNo >> lgAuxArrInts) | 1;
257
+ const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
261
258
  probe = (probe + stride) & auxArrMask;
262
259
  } while (probe != loopIndex);
263
260
  throw std::runtime_error("Key not found and no empty slots!");
@@ -265,12 +262,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
265
262
 
266
263
  template<typename A>
267
264
  coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
268
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
265
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
269
266
  }
270
267
 
271
268
  template<typename A>
272
269
  coupon_iterator<A> AuxHashMap<A>::end() const {
273
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
270
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
274
271
  }
275
272
 
276
273
  }
@@ -31,49 +31,49 @@ namespace datasketches {
31
31
  template<typename A>
32
32
  class AuxHashMap final {
33
33
  public:
34
- AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
35
- static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
34
+ AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
35
+ static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
36
36
  static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
37
37
 
38
38
  static AuxHashMap* deserialize(const void* bytes, size_t len,
39
- int lgConfigK,
40
- int auxCount, int lgAuxArrInts,
39
+ uint8_t lgConfigK,
40
+ uint32_t auxCount, uint8_t lgAuxArrInts,
41
41
  bool srcCompact, const A& allocator);
42
- static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
43
- int auxCount, int lgAuxArrInts,
42
+ static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK,
43
+ uint32_t auxCount, uint8_t lgAuxArrInts,
44
44
  bool srcCompact, const A& allocator);
45
45
  virtual ~AuxHashMap() = default;
46
46
  static std::function<void(AuxHashMap<A>*)> make_deleter();
47
47
 
48
48
  AuxHashMap* copy() const;
49
- int getUpdatableSizeBytes() const;
50
- int getCompactSizeBytes() const;
49
+ uint32_t getUpdatableSizeBytes() const;
50
+ uint32_t getCompactSizeBytes() const;
51
51
 
52
- int getAuxCount() const;
53
- int* getAuxIntArr();
54
- int getLgAuxArrInts() const;
52
+ uint32_t getAuxCount() const;
53
+ uint32_t* getAuxIntArr();
54
+ uint8_t getLgAuxArrInts() const;
55
55
 
56
56
  coupon_iterator<A> begin(bool all = false) const;
57
57
  coupon_iterator<A> end() const;
58
58
 
59
- void mustAdd(int slotNo, int value);
60
- int mustFindValueFor(int slotNo) const;
61
- void mustReplace(int slotNo, int value);
59
+ void mustAdd(uint32_t slotNo, uint8_t value);
60
+ uint8_t mustFindValueFor(uint32_t slotNo) const;
61
+ void mustReplace(uint32_t slotNo, uint8_t value);
62
62
 
63
63
  private:
64
64
  typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
65
65
 
66
- using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
67
67
 
68
68
  // static so it can be used when resizing
69
- static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
69
+ static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo);
70
70
 
71
71
  void checkGrow();
72
72
  void growAuxSpace();
73
73
 
74
- const int lgConfigK;
75
- int lgAuxArrInts;
76
- int auxCount;
74
+ const uint8_t lgConfigK;
75
+ uint8_t lgAuxArrInts;
76
+ uint32_t auxCount;
77
77
  vector_int entries;
78
78
  };
79
79
 
@@ -27,30 +27,30 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- static const int numXArrValues = 257;
30
+ static const uint32_t numXArrValues = 257;
31
31
 
32
32
  /**
33
33
  * 18 Values, index 0 is LgK = 4, index 17 is LgK = 21.
34
34
  */
35
- static const int yStrides[] =
35
+ static const uint32_t yStrides[] =
36
36
  {1, 2, 3, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920};
37
37
 
38
38
  template<typename A>
39
- int CompositeInterpolationXTable<A>::get_y_stride(const int logK) {
40
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
41
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
42
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
39
+ uint32_t CompositeInterpolationXTable<A>::get_y_stride(uint8_t logK) {
40
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
41
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
42
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
43
43
  + std::to_string(logK));
44
44
  }
45
- return yStrides[logK - HllUtil<A>::MIN_LOG_K];
45
+ return yStrides[logK - hll_constants::MIN_LOG_K];
46
46
  }
47
47
 
48
48
  template<typename A>
49
- int CompositeInterpolationXTable<A>::get_x_arr_length() {
49
+ uint32_t CompositeInterpolationXTable<A>::get_x_arr_length() {
50
50
  return numXArrValues;
51
51
  }
52
52
 
53
- static const double xArr[18][numXArrValues] = {
53
+ static const double xArray[18][numXArrValues] = {
54
54
  {
55
55
  10.767999803534, 11.237701481774, 11.722738717438, 12.223246391222,
56
56
  12.739366773787, 13.271184824495, 13.818759686650, 14.382159835785,
@@ -797,15 +797,15 @@ static const double xArr[18][numXArrValues] = {
797
797
  };
798
798
 
799
799
  template<typename A>
800
- const double* CompositeInterpolationXTable<A>::get_x_arr(const int logK) {
801
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
802
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
803
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
800
+ const double* CompositeInterpolationXTable<A>::get_x_arr(uint8_t logK) {
801
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
802
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
803
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
804
804
  + std::to_string(logK));
805
805
  }
806
- return xArr[logK - HllUtil<A>::MIN_LOG_K];
806
+ return xArray[logK - hll_constants::MIN_LOG_K];
807
807
  }
808
808
 
809
809
  }
810
810
 
811
- #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
811
+ #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_