datasketches 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -26,15 +26,15 @@
26
26
  namespace datasketches {
27
27
 
28
28
  template<typename A>
29
- AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
29
+ AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
30
30
  lgConfigK(lgConfigK),
31
31
  lgAuxArrInts(lgAuxArrInts),
32
32
  auxCount(0),
33
- entries(1 << lgAuxArrInts, 0, allocator)
33
+ entries(1ULL << lgAuxArrInts, 0, allocator)
34
34
  {}
35
35
 
36
36
  template<typename A>
37
- AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
37
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
38
38
  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
39
39
  }
40
40
 
@@ -45,42 +45,42 @@ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
45
45
 
46
46
  template<typename A>
47
47
  AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
48
- int lgConfigK,
49
- int auxCount, int lgAuxArrInts,
48
+ uint8_t lgConfigK,
49
+ uint32_t auxCount, uint8_t lgAuxArrInts,
50
50
  bool srcCompact, const A& allocator) {
51
- int lgArrInts = lgAuxArrInts;
51
+ uint8_t lgArrInts = lgAuxArrInts;
52
52
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
53
53
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
54
54
  } else { // updatable
55
55
  lgArrInts = lgAuxArrInts;
56
56
  }
57
57
 
58
- int configKmask = (1 << lgConfigK) - 1;
58
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
59
59
 
60
60
  AuxHashMap<A>* auxHashMap;
61
- const int* auxPtr = static_cast<const int*>(bytes);
61
+ const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
62
62
  if (srcCompact) {
63
63
  if (len < auxCount * sizeof(int)) {
64
64
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
65
65
  }
66
66
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
67
- for (int i = 0; i < auxCount; ++i) {
68
- int pair = auxPtr[i];
69
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
- int value = HllUtil<A>::getValue(pair);
67
+ for (uint32_t i = 0; i < auxCount; ++i) {
68
+ const uint32_t pair = auxPtr[i];
69
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
+ const uint8_t value = HllUtil<A>::getValue(pair);
71
71
  auxHashMap->mustAdd(slotNo, value);
72
72
  }
73
73
  } else { // updatable
74
- int itemsToRead = 1 << lgAuxArrInts;
75
- if (len < itemsToRead * sizeof(int)) {
74
+ uint32_t itemsToRead = 1 << lgAuxArrInts;
75
+ if (len < itemsToRead * sizeof(uint32_t)) {
76
76
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
77
77
  }
78
78
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
79
- for (int i = 0; i < itemsToRead; ++i) {
80
- int pair = auxPtr[i];
81
- if (pair == HllUtil<A>::EMPTY) { continue; }
82
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
- int value = HllUtil<A>::getValue(pair);
79
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
80
+ const uint32_t pair = auxPtr[i];
81
+ if (pair == hll_constants::EMPTY) { continue; }
82
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
+ const uint8_t value = HllUtil<A>::getValue(pair);
84
84
  auxHashMap->mustAdd(slotNo, value);
85
85
  }
86
86
  }
@@ -94,10 +94,10 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
94
94
  }
95
95
 
96
96
  template<typename A>
97
- AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
98
- const int auxCount, const int lgAuxArrInts,
99
- const bool srcCompact, const A& allocator) {
100
- int lgArrInts = lgAuxArrInts;
97
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
98
+ uint32_t auxCount, uint8_t lgAuxArrInts,
99
+ bool srcCompact, const A& allocator) {
100
+ uint8_t lgArrInts = lgAuxArrInts;
101
101
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
102
102
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
103
103
  } else { // updatable
@@ -108,24 +108,22 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
108
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
109
109
  aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
110
110
 
111
- int configKmask = (1 << lgConfigK) - 1;
111
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
112
112
 
113
113
  if (srcCompact) {
114
- int pair;
115
- for (int i = 0; i < auxCount; ++i) {
116
- is.read((char*)&pair, sizeof(pair));
117
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
118
- int value = HllUtil<A>::getValue(pair);
114
+ for (uint32_t i = 0; i < auxCount; ++i) {
115
+ const auto pair = read<int>(is);
116
+ uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
117
+ uint8_t value = HllUtil<A>::getValue(pair);
119
118
  auxHashMap->mustAdd(slotNo, value);
120
119
  }
121
120
  } else { // updatable
122
- int itemsToRead = 1 << lgAuxArrInts;
123
- int pair;
124
- for (int i = 0; i < itemsToRead; ++i) {
125
- is.read((char*)&pair, sizeof(pair));
126
- if (pair == HllUtil<A>::EMPTY) { continue; }
127
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
128
- int value = HllUtil<A>::getValue(pair);
121
+ const uint32_t itemsToRead = 1 << lgAuxArrInts;
122
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
123
+ const auto pair = read<int>(is);
124
+ if (pair == hll_constants::EMPTY) { continue; }
125
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
126
+ const uint8_t value = HllUtil<A>::getValue(pair);
129
127
  auxHashMap->mustAdd(slotNo, value);
130
128
  }
131
129
  }
@@ -153,34 +151,34 @@ AuxHashMap<A>* AuxHashMap<A>::copy() const {
153
151
  }
154
152
 
155
153
  template<typename A>
156
- int AuxHashMap<A>::getAuxCount() const {
154
+ uint32_t AuxHashMap<A>::getAuxCount() const {
157
155
  return auxCount;
158
156
  }
159
157
 
160
158
  template<typename A>
161
- int* AuxHashMap<A>::getAuxIntArr(){
159
+ uint32_t* AuxHashMap<A>::getAuxIntArr(){
162
160
  return entries.data();
163
161
  }
164
162
 
165
163
  template<typename A>
166
- int AuxHashMap<A>::getLgAuxArrInts() const {
164
+ uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
167
165
  return lgAuxArrInts;
168
166
  }
169
167
 
170
168
  template<typename A>
171
- int AuxHashMap<A>::getCompactSizeBytes() const {
169
+ uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
172
170
  return auxCount << 2;
173
171
  }
174
172
 
175
173
  template<typename A>
176
- int AuxHashMap<A>::getUpdatableSizeBytes() const {
174
+ uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
177
175
  return 4 << lgAuxArrInts;
178
176
  }
179
177
 
180
178
  template<typename A>
181
- void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
182
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
183
- const int entry_pair = HllUtil<A>::pair(slotNo, value);
179
+ void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
180
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
181
+ const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
184
182
  if (index >= 0) {
185
183
  throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
186
184
  + std::to_string(slotNo) + ", Value: " + std::to_string(value));
@@ -193,8 +191,8 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
193
191
  }
194
192
 
195
193
  template<typename A>
196
- int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
197
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
194
+ uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
195
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
198
196
  if (index >= 0) {
199
197
  return HllUtil<A>::getValue(entries[index]);
200
198
  }
@@ -203,8 +201,8 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
203
201
  }
204
202
 
205
203
  template<typename A>
206
- void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
207
- const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
204
+ void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
205
+ const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
208
206
  if (idx >= 0) {
209
207
  entries[idx] = HllUtil<A>::pair(slotNo, value);
210
208
  return;
@@ -216,7 +214,7 @@ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
216
214
 
217
215
  template<typename A>
218
216
  void AuxHashMap<A>::checkGrow() {
219
- if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
217
+ if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
220
218
  growAuxSpace();
221
219
  }
222
220
  }
@@ -227,10 +225,10 @@ void AuxHashMap<A>::growAuxSpace() {
227
225
  const int newArrLen = 1 << ++lgAuxArrInts;
228
226
  vector_int entries_new(newArrLen, 0, entries.get_allocator());
229
227
  for (size_t i = 0; i < entries.size(); ++i) {
230
- const int fetched = entries[i];
231
- if (fetched != HllUtil<A>::EMPTY) {
228
+ const uint32_t fetched = entries[i];
229
+ if (fetched != hll_constants::EMPTY) {
232
230
  // find empty in new array
233
- const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
231
+ const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
234
232
  entries_new[~idx] = fetched;
235
233
  }
236
234
  }
@@ -243,21 +241,20 @@ void AuxHashMap<A>::growAuxSpace() {
243
241
  //Continues searching.
244
242
  //If the probe comes back to original index, throws an exception.
245
243
  template<typename A>
246
- int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
247
- const int slotNo) {
248
- const int auxArrMask = (1 << lgAuxArrInts) - 1;
249
- const int configKmask = (1 << lgConfigK) - 1;
250
- int probe = slotNo & auxArrMask;
251
- const int loopIndex = probe;
244
+ int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
245
+ const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
246
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
247
+ uint32_t probe = slotNo & auxArrMask;
248
+ const uint32_t loopIndex = probe;
252
249
  do {
253
- const int arrVal = auxArr[probe];
254
- if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
250
+ const uint32_t arrVal = auxArr[probe];
251
+ if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
255
252
  return ~probe; //empty
256
253
  }
257
254
  else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
258
255
  return probe; //found given slotNo, return probe = index into aux array
259
256
  }
260
- const int stride = (slotNo >> lgAuxArrInts) | 1;
257
+ const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
261
258
  probe = (probe + stride) & auxArrMask;
262
259
  } while (probe != loopIndex);
263
260
  throw std::runtime_error("Key not found and no empty slots!");
@@ -265,12 +262,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
265
262
 
266
263
  template<typename A>
267
264
  coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
268
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
265
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
269
266
  }
270
267
 
271
268
  template<typename A>
272
269
  coupon_iterator<A> AuxHashMap<A>::end() const {
273
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
270
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
274
271
  }
275
272
 
276
273
  }
@@ -31,49 +31,49 @@ namespace datasketches {
31
31
  template<typename A>
32
32
  class AuxHashMap final {
33
33
  public:
34
- AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
35
- static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
34
+ AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
35
+ static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
36
36
  static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
37
37
 
38
38
  static AuxHashMap* deserialize(const void* bytes, size_t len,
39
- int lgConfigK,
40
- int auxCount, int lgAuxArrInts,
39
+ uint8_t lgConfigK,
40
+ uint32_t auxCount, uint8_t lgAuxArrInts,
41
41
  bool srcCompact, const A& allocator);
42
- static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
43
- int auxCount, int lgAuxArrInts,
42
+ static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK,
43
+ uint32_t auxCount, uint8_t lgAuxArrInts,
44
44
  bool srcCompact, const A& allocator);
45
45
  virtual ~AuxHashMap() = default;
46
46
  static std::function<void(AuxHashMap<A>*)> make_deleter();
47
47
 
48
48
  AuxHashMap* copy() const;
49
- int getUpdatableSizeBytes() const;
50
- int getCompactSizeBytes() const;
49
+ uint32_t getUpdatableSizeBytes() const;
50
+ uint32_t getCompactSizeBytes() const;
51
51
 
52
- int getAuxCount() const;
53
- int* getAuxIntArr();
54
- int getLgAuxArrInts() const;
52
+ uint32_t getAuxCount() const;
53
+ uint32_t* getAuxIntArr();
54
+ uint8_t getLgAuxArrInts() const;
55
55
 
56
56
  coupon_iterator<A> begin(bool all = false) const;
57
57
  coupon_iterator<A> end() const;
58
58
 
59
- void mustAdd(int slotNo, int value);
60
- int mustFindValueFor(int slotNo) const;
61
- void mustReplace(int slotNo, int value);
59
+ void mustAdd(uint32_t slotNo, uint8_t value);
60
+ uint8_t mustFindValueFor(uint32_t slotNo) const;
61
+ void mustReplace(uint32_t slotNo, uint8_t value);
62
62
 
63
63
  private:
64
64
  typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
65
65
 
66
- using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
67
67
 
68
68
  // static so it can be used when resizing
69
- static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
69
+ static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo);
70
70
 
71
71
  void checkGrow();
72
72
  void growAuxSpace();
73
73
 
74
- const int lgConfigK;
75
- int lgAuxArrInts;
76
- int auxCount;
74
+ const uint8_t lgConfigK;
75
+ uint8_t lgAuxArrInts;
76
+ uint32_t auxCount;
77
77
  vector_int entries;
78
78
  };
79
79
 
@@ -27,30 +27,30 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- static const int numXArrValues = 257;
30
+ static const uint32_t numXArrValues = 257;
31
31
 
32
32
  /**
33
33
  * 18 Values, index 0 is LgK = 4, index 17 is LgK = 21.
34
34
  */
35
- static const int yStrides[] =
35
+ static const uint32_t yStrides[] =
36
36
  {1, 2, 3, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920};
37
37
 
38
38
  template<typename A>
39
- int CompositeInterpolationXTable<A>::get_y_stride(const int logK) {
40
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
41
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
42
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
39
+ uint32_t CompositeInterpolationXTable<A>::get_y_stride(uint8_t logK) {
40
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
41
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
42
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
43
43
  + std::to_string(logK));
44
44
  }
45
- return yStrides[logK - HllUtil<A>::MIN_LOG_K];
45
+ return yStrides[logK - hll_constants::MIN_LOG_K];
46
46
  }
47
47
 
48
48
  template<typename A>
49
- int CompositeInterpolationXTable<A>::get_x_arr_length() {
49
+ uint32_t CompositeInterpolationXTable<A>::get_x_arr_length() {
50
50
  return numXArrValues;
51
51
  }
52
52
 
53
- static const double xArr[18][numXArrValues] = {
53
+ static const double xArray[18][numXArrValues] = {
54
54
  {
55
55
  10.767999803534, 11.237701481774, 11.722738717438, 12.223246391222,
56
56
  12.739366773787, 13.271184824495, 13.818759686650, 14.382159835785,
@@ -797,15 +797,15 @@ static const double xArr[18][numXArrValues] = {
797
797
  };
798
798
 
799
799
  template<typename A>
800
- const double* CompositeInterpolationXTable<A>::get_x_arr(const int logK) {
801
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
802
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
803
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
800
+ const double* CompositeInterpolationXTable<A>::get_x_arr(uint8_t logK) {
801
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
802
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
803
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
804
804
  + std::to_string(logK));
805
805
  }
806
- return xArr[logK - HllUtil<A>::MIN_LOG_K];
806
+ return xArray[logK - hll_constants::MIN_LOG_K];
807
807
  }
808
808
 
809
809
  }
810
810
 
811
- #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
811
+ #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
@@ -27,10 +27,10 @@ namespace datasketches {
27
27
  template<typename A = std::allocator<uint8_t>>
28
28
  class CompositeInterpolationXTable {
29
29
  public:
30
- static int get_y_stride(int logK);
30
+ static uint32_t get_y_stride(uint8_t logK);
31
31
 
32
- static const double* get_x_arr(int logK);
33
- static int get_x_arr_length();
32
+ static const double* get_x_arr(uint8_t logK);
33
+ static uint32_t get_x_arr_length();
34
34
  };
35
35
 
36
36
  }