datasketches 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/datasketches/version.rb +1 -1
  4. data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
  5. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  6. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  7. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
  9. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  10. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  11. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  12. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  13. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  14. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
  16. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
  17. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  18. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  19. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  20. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  21. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  22. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  23. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
  25. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  26. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  28. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  29. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  30. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  31. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  32. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  33. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  34. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  35. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  36. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  37. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  38. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  39. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  40. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  41. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  42. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  43. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  44. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  45. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  47. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  48. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  49. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  50. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  51. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  52. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  53. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  54. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  55. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  56. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  57. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  58. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  59. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  60. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  61. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  62. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  63. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  64. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  65. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  66. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  67. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
  69. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  70. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  71. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
  73. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  74. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  75. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
  76. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  78. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  79. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
  84. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  85. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
  86. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
  87. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  88. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  89. data/vendor/datasketches-cpp/setup.py +1 -1
  90. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  91. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  92. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  93. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  94. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  95. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
  97. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
  98. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
  99. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  100. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
  101. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
  103. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  105. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  106. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
  107. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  108. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  109. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  112. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  113. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  114. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  116. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
  117. metadata +8 -3
@@ -39,15 +39,15 @@ allocator_(allocator),
39
39
  lg_cur_size_(lg_cur_size),
40
40
  lg_max_size_(lg_max_size),
41
41
  num_active_(0),
42
- keys_(allocator_.allocate(1 << lg_cur_size)),
42
+ keys_(allocator_.allocate(1ULL << lg_cur_size)),
43
43
  values_(nullptr),
44
44
  states_(nullptr)
45
45
  {
46
46
  AllocV av(allocator_);
47
- values_ = av.allocate(1 << lg_cur_size);
47
+ values_ = av.allocate(1ULL << lg_cur_size);
48
48
  AllocU16 au16(allocator_);
49
- states_ = au16.allocate(1 << lg_cur_size);
50
- std::fill(states_, states_ + (1 << lg_cur_size), 0);
49
+ states_ = au16.allocate(1ULL << lg_cur_size);
50
+ std::fill(states_, states_ + (1ULL << lg_cur_size), static_cast<uint16_t>(0));
51
51
  }
52
52
 
53
53
  template<typename K, typename V, typename H, typename E, typename A>
@@ -56,14 +56,14 @@ allocator_(other.allocator_),
56
56
  lg_cur_size_(other.lg_cur_size_),
57
57
  lg_max_size_(other.lg_max_size_),
58
58
  num_active_(other.num_active_),
59
- keys_(allocator_.allocate(1 << lg_cur_size_)),
59
+ keys_(allocator_.allocate(1ULL << lg_cur_size_)),
60
60
  values_(nullptr),
61
61
  states_(nullptr)
62
62
  {
63
63
  AllocV av(allocator_);
64
- values_ = av.allocate(1 << lg_cur_size_);
64
+ values_ = av.allocate(1ULL << lg_cur_size_);
65
65
  AllocU16 au16(allocator_);
66
- states_ = au16.allocate(1 << lg_cur_size_);
66
+ states_ = au16.allocate(1ULL << lg_cur_size_);
67
67
  const uint32_t size = 1 << lg_cur_size_;
68
68
  if (num_active_ > 0) {
69
69
  auto num = num_active_;
@@ -177,7 +177,7 @@ uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
177
177
 
178
178
  template<typename K, typename V, typename H, typename E, typename A>
179
179
  uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
180
- return (1 << lg_cur_size_) * LOAD_FACTOR;
180
+ return static_cast<uint32_t>((1 << lg_cur_size_) * LOAD_FACTOR);
181
181
  }
182
182
 
183
183
  template<typename K, typename V, typename H, typename E, typename A>
@@ -246,7 +246,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
246
246
  // if none are found, the status is changed
247
247
  states_[delete_index] = 0; // mark as empty
248
248
  keys_[delete_index].~K();
249
- uint32_t drift = 1;
249
+ uint16_t drift = 1;
250
250
  const uint32_t mask = (1 << lg_cur_size_) - 1;
251
251
  uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
252
252
  // advance until we find a free location replacing locations as needed
@@ -322,7 +322,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
322
322
  values_ = av.allocate(new_size);
323
323
  AllocU16 au16(allocator_);
324
324
  states_ = au16.allocate(new_size);
325
- std::fill(states_, states_ + new_size, 0);
325
+ std::fill(states_, states_ + new_size, static_cast<uint16_t>(0));
326
326
  num_active_ = 0;
327
327
  lg_cur_size_ = lg_new_size;
328
328
  for (uint32_t i = 0; i < old_size; i++) {
@@ -39,8 +39,8 @@ TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
39
39
  TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
40
40
  reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
41
41
  for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
42
- int sum = 0;
43
- for (auto &it: map) sum += it.second;
42
+ uint64_t sum = 0;
43
+ for (auto it: map) sum += it.second;
44
44
  REQUIRE(sum == 11);
45
45
  }
46
46
 
@@ -26,15 +26,15 @@
26
26
  namespace datasketches {
27
27
 
28
28
  template<typename A>
29
- AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
29
+ AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
30
30
  lgConfigK(lgConfigK),
31
31
  lgAuxArrInts(lgAuxArrInts),
32
32
  auxCount(0),
33
- entries(1 << lgAuxArrInts, 0, allocator)
33
+ entries(1ULL << lgAuxArrInts, 0, allocator)
34
34
  {}
35
35
 
36
36
  template<typename A>
37
- AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
37
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
38
38
  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
39
39
  }
40
40
 
@@ -45,42 +45,42 @@ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
45
45
 
46
46
  template<typename A>
47
47
  AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
48
- int lgConfigK,
49
- int auxCount, int lgAuxArrInts,
48
+ uint8_t lgConfigK,
49
+ uint32_t auxCount, uint8_t lgAuxArrInts,
50
50
  bool srcCompact, const A& allocator) {
51
- int lgArrInts = lgAuxArrInts;
51
+ uint8_t lgArrInts = lgAuxArrInts;
52
52
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
53
53
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
54
54
  } else { // updatable
55
55
  lgArrInts = lgAuxArrInts;
56
56
  }
57
57
 
58
- int configKmask = (1 << lgConfigK) - 1;
58
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
59
59
 
60
60
  AuxHashMap<A>* auxHashMap;
61
- const int* auxPtr = static_cast<const int*>(bytes);
61
+ const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
62
62
  if (srcCompact) {
63
63
  if (len < auxCount * sizeof(int)) {
64
64
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
65
65
  }
66
66
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
67
- for (int i = 0; i < auxCount; ++i) {
68
- int pair = auxPtr[i];
69
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
- int value = HllUtil<A>::getValue(pair);
67
+ for (uint32_t i = 0; i < auxCount; ++i) {
68
+ const uint32_t pair = auxPtr[i];
69
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
+ const uint8_t value = HllUtil<A>::getValue(pair);
71
71
  auxHashMap->mustAdd(slotNo, value);
72
72
  }
73
73
  } else { // updatable
74
- int itemsToRead = 1 << lgAuxArrInts;
75
- if (len < itemsToRead * sizeof(int)) {
74
+ uint32_t itemsToRead = 1 << lgAuxArrInts;
75
+ if (len < itemsToRead * sizeof(uint32_t)) {
76
76
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
77
77
  }
78
78
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
79
- for (int i = 0; i < itemsToRead; ++i) {
80
- int pair = auxPtr[i];
81
- if (pair == HllUtil<A>::EMPTY) { continue; }
82
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
- int value = HllUtil<A>::getValue(pair);
79
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
80
+ const uint32_t pair = auxPtr[i];
81
+ if (pair == hll_constants::EMPTY) { continue; }
82
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
+ const uint8_t value = HllUtil<A>::getValue(pair);
84
84
  auxHashMap->mustAdd(slotNo, value);
85
85
  }
86
86
  }
@@ -94,10 +94,10 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
94
94
  }
95
95
 
96
96
  template<typename A>
97
- AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
98
- const int auxCount, const int lgAuxArrInts,
99
- const bool srcCompact, const A& allocator) {
100
- int lgArrInts = lgAuxArrInts;
97
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
98
+ uint32_t auxCount, uint8_t lgAuxArrInts,
99
+ bool srcCompact, const A& allocator) {
100
+ uint8_t lgArrInts = lgAuxArrInts;
101
101
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
102
102
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
103
103
  } else { // updatable
@@ -108,24 +108,22 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
108
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
109
109
  aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
110
110
 
111
- int configKmask = (1 << lgConfigK) - 1;
111
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
112
112
 
113
113
  if (srcCompact) {
114
- int pair;
115
- for (int i = 0; i < auxCount; ++i) {
116
- is.read((char*)&pair, sizeof(pair));
117
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
118
- int value = HllUtil<A>::getValue(pair);
114
+ for (uint32_t i = 0; i < auxCount; ++i) {
115
+ const auto pair = read<int>(is);
116
+ uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
117
+ uint8_t value = HllUtil<A>::getValue(pair);
119
118
  auxHashMap->mustAdd(slotNo, value);
120
119
  }
121
120
  } else { // updatable
122
- int itemsToRead = 1 << lgAuxArrInts;
123
- int pair;
124
- for (int i = 0; i < itemsToRead; ++i) {
125
- is.read((char*)&pair, sizeof(pair));
126
- if (pair == HllUtil<A>::EMPTY) { continue; }
127
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
128
- int value = HllUtil<A>::getValue(pair);
121
+ const uint32_t itemsToRead = 1 << lgAuxArrInts;
122
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
123
+ const auto pair = read<int>(is);
124
+ if (pair == hll_constants::EMPTY) { continue; }
125
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
126
+ const uint8_t value = HllUtil<A>::getValue(pair);
129
127
  auxHashMap->mustAdd(slotNo, value);
130
128
  }
131
129
  }
@@ -153,34 +151,34 @@ AuxHashMap<A>* AuxHashMap<A>::copy() const {
153
151
  }
154
152
 
155
153
  template<typename A>
156
- int AuxHashMap<A>::getAuxCount() const {
154
+ uint32_t AuxHashMap<A>::getAuxCount() const {
157
155
  return auxCount;
158
156
  }
159
157
 
160
158
  template<typename A>
161
- int* AuxHashMap<A>::getAuxIntArr(){
159
+ uint32_t* AuxHashMap<A>::getAuxIntArr(){
162
160
  return entries.data();
163
161
  }
164
162
 
165
163
  template<typename A>
166
- int AuxHashMap<A>::getLgAuxArrInts() const {
164
+ uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
167
165
  return lgAuxArrInts;
168
166
  }
169
167
 
170
168
  template<typename A>
171
- int AuxHashMap<A>::getCompactSizeBytes() const {
169
+ uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
172
170
  return auxCount << 2;
173
171
  }
174
172
 
175
173
  template<typename A>
176
- int AuxHashMap<A>::getUpdatableSizeBytes() const {
174
+ uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
177
175
  return 4 << lgAuxArrInts;
178
176
  }
179
177
 
180
178
  template<typename A>
181
- void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
182
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
183
- const int entry_pair = HllUtil<A>::pair(slotNo, value);
179
+ void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
180
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
181
+ const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
184
182
  if (index >= 0) {
185
183
  throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
186
184
  + std::to_string(slotNo) + ", Value: " + std::to_string(value));
@@ -193,8 +191,8 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
193
191
  }
194
192
 
195
193
  template<typename A>
196
- int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
197
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
194
+ uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
195
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
198
196
  if (index >= 0) {
199
197
  return HllUtil<A>::getValue(entries[index]);
200
198
  }
@@ -203,8 +201,8 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
203
201
  }
204
202
 
205
203
  template<typename A>
206
- void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
207
- const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
204
+ void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
205
+ const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
208
206
  if (idx >= 0) {
209
207
  entries[idx] = HllUtil<A>::pair(slotNo, value);
210
208
  return;
@@ -216,7 +214,7 @@ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
216
214
 
217
215
  template<typename A>
218
216
  void AuxHashMap<A>::checkGrow() {
219
- if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
217
+ if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
220
218
  growAuxSpace();
221
219
  }
222
220
  }
@@ -227,10 +225,10 @@ void AuxHashMap<A>::growAuxSpace() {
227
225
  const int newArrLen = 1 << ++lgAuxArrInts;
228
226
  vector_int entries_new(newArrLen, 0, entries.get_allocator());
229
227
  for (size_t i = 0; i < entries.size(); ++i) {
230
- const int fetched = entries[i];
231
- if (fetched != HllUtil<A>::EMPTY) {
228
+ const uint32_t fetched = entries[i];
229
+ if (fetched != hll_constants::EMPTY) {
232
230
  // find empty in new array
233
- const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
231
+ const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
234
232
  entries_new[~idx] = fetched;
235
233
  }
236
234
  }
@@ -243,21 +241,20 @@ void AuxHashMap<A>::growAuxSpace() {
243
241
  //Continues searching.
244
242
  //If the probe comes back to original index, throws an exception.
245
243
  template<typename A>
246
- int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
247
- const int slotNo) {
248
- const int auxArrMask = (1 << lgAuxArrInts) - 1;
249
- const int configKmask = (1 << lgConfigK) - 1;
250
- int probe = slotNo & auxArrMask;
251
- const int loopIndex = probe;
244
+ int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
245
+ const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
246
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
247
+ uint32_t probe = slotNo & auxArrMask;
248
+ const uint32_t loopIndex = probe;
252
249
  do {
253
- const int arrVal = auxArr[probe];
254
- if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
250
+ const uint32_t arrVal = auxArr[probe];
251
+ if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
255
252
  return ~probe; //empty
256
253
  }
257
254
  else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
258
255
  return probe; //found given slotNo, return probe = index into aux array
259
256
  }
260
- const int stride = (slotNo >> lgAuxArrInts) | 1;
257
+ const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
261
258
  probe = (probe + stride) & auxArrMask;
262
259
  } while (probe != loopIndex);
263
260
  throw std::runtime_error("Key not found and no empty slots!");
@@ -265,12 +262,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
265
262
 
266
263
  template<typename A>
267
264
  coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
268
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
265
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
269
266
  }
270
267
 
271
268
  template<typename A>
272
269
  coupon_iterator<A> AuxHashMap<A>::end() const {
273
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
270
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
274
271
  }
275
272
 
276
273
  }
@@ -31,49 +31,49 @@ namespace datasketches {
31
31
  template<typename A>
32
32
  class AuxHashMap final {
33
33
  public:
34
- AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
35
- static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
34
+ AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
35
+ static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
36
36
  static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
37
37
 
38
38
  static AuxHashMap* deserialize(const void* bytes, size_t len,
39
- int lgConfigK,
40
- int auxCount, int lgAuxArrInts,
39
+ uint8_t lgConfigK,
40
+ uint32_t auxCount, uint8_t lgAuxArrInts,
41
41
  bool srcCompact, const A& allocator);
42
- static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
43
- int auxCount, int lgAuxArrInts,
42
+ static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK,
43
+ uint32_t auxCount, uint8_t lgAuxArrInts,
44
44
  bool srcCompact, const A& allocator);
45
45
  virtual ~AuxHashMap() = default;
46
46
  static std::function<void(AuxHashMap<A>*)> make_deleter();
47
47
 
48
48
  AuxHashMap* copy() const;
49
- int getUpdatableSizeBytes() const;
50
- int getCompactSizeBytes() const;
49
+ uint32_t getUpdatableSizeBytes() const;
50
+ uint32_t getCompactSizeBytes() const;
51
51
 
52
- int getAuxCount() const;
53
- int* getAuxIntArr();
54
- int getLgAuxArrInts() const;
52
+ uint32_t getAuxCount() const;
53
+ uint32_t* getAuxIntArr();
54
+ uint8_t getLgAuxArrInts() const;
55
55
 
56
56
  coupon_iterator<A> begin(bool all = false) const;
57
57
  coupon_iterator<A> end() const;
58
58
 
59
- void mustAdd(int slotNo, int value);
60
- int mustFindValueFor(int slotNo) const;
61
- void mustReplace(int slotNo, int value);
59
+ void mustAdd(uint32_t slotNo, uint8_t value);
60
+ uint8_t mustFindValueFor(uint32_t slotNo) const;
61
+ void mustReplace(uint32_t slotNo, uint8_t value);
62
62
 
63
63
  private:
64
64
  typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
65
65
 
66
- using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
67
67
 
68
68
  // static so it can be used when resizing
69
- static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
69
+ static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo);
70
70
 
71
71
  void checkGrow();
72
72
  void growAuxSpace();
73
73
 
74
- const int lgConfigK;
75
- int lgAuxArrInts;
76
- int auxCount;
74
+ const uint8_t lgConfigK;
75
+ uint8_t lgAuxArrInts;
76
+ uint32_t auxCount;
77
77
  vector_int entries;
78
78
  };
79
79
 
@@ -27,30 +27,30 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- static const int numXArrValues = 257;
30
+ static const uint32_t numXArrValues = 257;
31
31
 
32
32
  /**
33
33
  * 18 Values, index 0 is LgK = 4, index 17 is LgK = 21.
34
34
  */
35
- static const int yStrides[] =
35
+ static const uint32_t yStrides[] =
36
36
  {1, 2, 3, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920};
37
37
 
38
38
  template<typename A>
39
- int CompositeInterpolationXTable<A>::get_y_stride(const int logK) {
40
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
41
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
42
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
39
+ uint32_t CompositeInterpolationXTable<A>::get_y_stride(uint8_t logK) {
40
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
41
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
42
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
43
43
  + std::to_string(logK));
44
44
  }
45
- return yStrides[logK - HllUtil<A>::MIN_LOG_K];
45
+ return yStrides[logK - hll_constants::MIN_LOG_K];
46
46
  }
47
47
 
48
48
  template<typename A>
49
- int CompositeInterpolationXTable<A>::get_x_arr_length() {
49
+ uint32_t CompositeInterpolationXTable<A>::get_x_arr_length() {
50
50
  return numXArrValues;
51
51
  }
52
52
 
53
- static const double xArr[18][numXArrValues] = {
53
+ static const double xArray[18][numXArrValues] = {
54
54
  {
55
55
  10.767999803534, 11.237701481774, 11.722738717438, 12.223246391222,
56
56
  12.739366773787, 13.271184824495, 13.818759686650, 14.382159835785,
@@ -797,15 +797,15 @@ static const double xArr[18][numXArrValues] = {
797
797
  };
798
798
 
799
799
  template<typename A>
800
- const double* CompositeInterpolationXTable<A>::get_x_arr(const int logK) {
801
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
802
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
803
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
800
+ const double* CompositeInterpolationXTable<A>::get_x_arr(uint8_t logK) {
801
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
802
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
803
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
804
804
  + std::to_string(logK));
805
805
  }
806
- return xArr[logK - HllUtil<A>::MIN_LOG_K];
806
+ return xArray[logK - hll_constants::MIN_LOG_K];
807
807
  }
808
808
 
809
809
  }
810
810
 
811
- #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
811
+ #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_