datasketches 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -26,15 +26,15 @@
26
26
  namespace datasketches {
27
27
 
28
28
  template<typename A>
29
- AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator):
29
+ AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
30
30
  lgConfigK(lgConfigK),
31
31
  lgAuxArrInts(lgAuxArrInts),
32
32
  auxCount(0),
33
- entries(1 << lgAuxArrInts, 0, allocator)
33
+ entries(1ULL << lgAuxArrInts, 0, allocator)
34
34
  {}
35
35
 
36
36
  template<typename A>
37
- AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator) {
37
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
38
38
  return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
39
39
  }
40
40
 
@@ -45,42 +45,42 @@ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
45
45
 
46
46
  template<typename A>
47
47
  AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
48
- int lgConfigK,
49
- int auxCount, int lgAuxArrInts,
48
+ uint8_t lgConfigK,
49
+ uint32_t auxCount, uint8_t lgAuxArrInts,
50
50
  bool srcCompact, const A& allocator) {
51
- int lgArrInts = lgAuxArrInts;
51
+ uint8_t lgArrInts = lgAuxArrInts;
52
52
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
53
53
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
54
54
  } else { // updatable
55
55
  lgArrInts = lgAuxArrInts;
56
56
  }
57
57
 
58
- int configKmask = (1 << lgConfigK) - 1;
58
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
59
59
 
60
60
  AuxHashMap<A>* auxHashMap;
61
- const int* auxPtr = static_cast<const int*>(bytes);
61
+ const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
62
62
  if (srcCompact) {
63
63
  if (len < auxCount * sizeof(int)) {
64
64
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
65
65
  }
66
66
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
67
- for (int i = 0; i < auxCount; ++i) {
68
- int pair = auxPtr[i];
69
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
- int value = HllUtil<A>::getValue(pair);
67
+ for (uint32_t i = 0; i < auxCount; ++i) {
68
+ const uint32_t pair = auxPtr[i];
69
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
+ const uint8_t value = HllUtil<A>::getValue(pair);
71
71
  auxHashMap->mustAdd(slotNo, value);
72
72
  }
73
73
  } else { // updatable
74
- int itemsToRead = 1 << lgAuxArrInts;
75
- if (len < itemsToRead * sizeof(int)) {
74
+ uint32_t itemsToRead = 1 << lgAuxArrInts;
75
+ if (len < itemsToRead * sizeof(uint32_t)) {
76
76
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
77
77
  }
78
78
  auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
79
- for (int i = 0; i < itemsToRead; ++i) {
80
- int pair = auxPtr[i];
81
- if (pair == HllUtil<A>::EMPTY) { continue; }
82
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
- int value = HllUtil<A>::getValue(pair);
79
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
80
+ const uint32_t pair = auxPtr[i];
81
+ if (pair == hll_constants::EMPTY) { continue; }
82
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
+ const uint8_t value = HllUtil<A>::getValue(pair);
84
84
  auxHashMap->mustAdd(slotNo, value);
85
85
  }
86
86
  }
@@ -94,10 +94,10 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
94
94
  }
95
95
 
96
96
  template<typename A>
97
- AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
98
- const int auxCount, const int lgAuxArrInts,
99
- const bool srcCompact, const A& allocator) {
100
- int lgArrInts = lgAuxArrInts;
97
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
98
+ uint32_t auxCount, uint8_t lgAuxArrInts,
99
+ bool srcCompact, const A& allocator) {
100
+ uint8_t lgArrInts = lgAuxArrInts;
101
101
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
102
102
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
103
103
  } else { // updatable
@@ -108,24 +108,22 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
108
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
109
109
  aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
110
110
 
111
- int configKmask = (1 << lgConfigK) - 1;
111
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
112
112
 
113
113
  if (srcCompact) {
114
- int pair;
115
- for (int i = 0; i < auxCount; ++i) {
116
- is.read((char*)&pair, sizeof(pair));
117
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
118
- int value = HllUtil<A>::getValue(pair);
114
+ for (uint32_t i = 0; i < auxCount; ++i) {
115
+ const auto pair = read<int>(is);
116
+ uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
117
+ uint8_t value = HllUtil<A>::getValue(pair);
119
118
  auxHashMap->mustAdd(slotNo, value);
120
119
  }
121
120
  } else { // updatable
122
- int itemsToRead = 1 << lgAuxArrInts;
123
- int pair;
124
- for (int i = 0; i < itemsToRead; ++i) {
125
- is.read((char*)&pair, sizeof(pair));
126
- if (pair == HllUtil<A>::EMPTY) { continue; }
127
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
128
- int value = HllUtil<A>::getValue(pair);
121
+ const uint32_t itemsToRead = 1 << lgAuxArrInts;
122
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
123
+ const auto pair = read<int>(is);
124
+ if (pair == hll_constants::EMPTY) { continue; }
125
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
126
+ const uint8_t value = HllUtil<A>::getValue(pair);
129
127
  auxHashMap->mustAdd(slotNo, value);
130
128
  }
131
129
  }
@@ -153,34 +151,34 @@ AuxHashMap<A>* AuxHashMap<A>::copy() const {
153
151
  }
154
152
 
155
153
  template<typename A>
156
- int AuxHashMap<A>::getAuxCount() const {
154
+ uint32_t AuxHashMap<A>::getAuxCount() const {
157
155
  return auxCount;
158
156
  }
159
157
 
160
158
  template<typename A>
161
- int* AuxHashMap<A>::getAuxIntArr(){
159
+ uint32_t* AuxHashMap<A>::getAuxIntArr(){
162
160
  return entries.data();
163
161
  }
164
162
 
165
163
  template<typename A>
166
- int AuxHashMap<A>::getLgAuxArrInts() const {
164
+ uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
167
165
  return lgAuxArrInts;
168
166
  }
169
167
 
170
168
  template<typename A>
171
- int AuxHashMap<A>::getCompactSizeBytes() const {
169
+ uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
172
170
  return auxCount << 2;
173
171
  }
174
172
 
175
173
  template<typename A>
176
- int AuxHashMap<A>::getUpdatableSizeBytes() const {
174
+ uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
177
175
  return 4 << lgAuxArrInts;
178
176
  }
179
177
 
180
178
  template<typename A>
181
- void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
182
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
183
- const int entry_pair = HllUtil<A>::pair(slotNo, value);
179
+ void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
180
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
181
+ const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
184
182
  if (index >= 0) {
185
183
  throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
186
184
  + std::to_string(slotNo) + ", Value: " + std::to_string(value));
@@ -193,8 +191,8 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
193
191
  }
194
192
 
195
193
  template<typename A>
196
- int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
197
- const int index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
194
+ uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
195
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
198
196
  if (index >= 0) {
199
197
  return HllUtil<A>::getValue(entries[index]);
200
198
  }
@@ -203,8 +201,8 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
203
201
  }
204
202
 
205
203
  template<typename A>
206
- void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
207
- const int idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
204
+ void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
205
+ const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
208
206
  if (idx >= 0) {
209
207
  entries[idx] = HllUtil<A>::pair(slotNo, value);
210
208
  return;
@@ -216,7 +214,7 @@ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
216
214
 
217
215
  template<typename A>
218
216
  void AuxHashMap<A>::checkGrow() {
219
- if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
217
+ if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
220
218
  growAuxSpace();
221
219
  }
222
220
  }
@@ -227,10 +225,10 @@ void AuxHashMap<A>::growAuxSpace() {
227
225
  const int newArrLen = 1 << ++lgAuxArrInts;
228
226
  vector_int entries_new(newArrLen, 0, entries.get_allocator());
229
227
  for (size_t i = 0; i < entries.size(); ++i) {
230
- const int fetched = entries[i];
231
- if (fetched != HllUtil<A>::EMPTY) {
228
+ const uint32_t fetched = entries[i];
229
+ if (fetched != hll_constants::EMPTY) {
232
230
  // find empty in new array
233
- const int idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
231
+ const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
234
232
  entries_new[~idx] = fetched;
235
233
  }
236
234
  }
@@ -243,21 +241,20 @@ void AuxHashMap<A>::growAuxSpace() {
243
241
  //Continues searching.
244
242
  //If the probe comes back to original index, throws an exception.
245
243
  template<typename A>
246
- int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
247
- const int slotNo) {
248
- const int auxArrMask = (1 << lgAuxArrInts) - 1;
249
- const int configKmask = (1 << lgConfigK) - 1;
250
- int probe = slotNo & auxArrMask;
251
- const int loopIndex = probe;
244
+ int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
245
+ const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
246
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
247
+ uint32_t probe = slotNo & auxArrMask;
248
+ const uint32_t loopIndex = probe;
252
249
  do {
253
- const int arrVal = auxArr[probe];
254
- if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
250
+ const uint32_t arrVal = auxArr[probe];
251
+ if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
255
252
  return ~probe; //empty
256
253
  }
257
254
  else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
258
255
  return probe; //found given slotNo, return probe = index into aux array
259
256
  }
260
- const int stride = (slotNo >> lgAuxArrInts) | 1;
257
+ const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
261
258
  probe = (probe + stride) & auxArrMask;
262
259
  } while (probe != loopIndex);
263
260
  throw std::runtime_error("Key not found and no empty slots!");
@@ -265,12 +262,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
265
262
 
266
263
  template<typename A>
267
264
  coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
268
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 0, all);
265
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
269
266
  }
270
267
 
271
268
  template<typename A>
272
269
  coupon_iterator<A> AuxHashMap<A>::end() const {
273
- return coupon_iterator<A>(entries.data(), 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
270
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
274
271
  }
275
272
 
276
273
  }
@@ -31,49 +31,49 @@ namespace datasketches {
31
31
  template<typename A>
32
32
  class AuxHashMap final {
33
33
  public:
34
- AuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
35
- static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK, const A& allocator);
34
+ AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
35
+ static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
36
36
  static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
37
37
 
38
38
  static AuxHashMap* deserialize(const void* bytes, size_t len,
39
- int lgConfigK,
40
- int auxCount, int lgAuxArrInts,
39
+ uint8_t lgConfigK,
40
+ uint32_t auxCount, uint8_t lgAuxArrInts,
41
41
  bool srcCompact, const A& allocator);
42
- static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
43
- int auxCount, int lgAuxArrInts,
42
+ static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK,
43
+ uint32_t auxCount, uint8_t lgAuxArrInts,
44
44
  bool srcCompact, const A& allocator);
45
45
  virtual ~AuxHashMap() = default;
46
46
  static std::function<void(AuxHashMap<A>*)> make_deleter();
47
47
 
48
48
  AuxHashMap* copy() const;
49
- int getUpdatableSizeBytes() const;
50
- int getCompactSizeBytes() const;
49
+ uint32_t getUpdatableSizeBytes() const;
50
+ uint32_t getCompactSizeBytes() const;
51
51
 
52
- int getAuxCount() const;
53
- int* getAuxIntArr();
54
- int getLgAuxArrInts() const;
52
+ uint32_t getAuxCount() const;
53
+ uint32_t* getAuxIntArr();
54
+ uint8_t getLgAuxArrInts() const;
55
55
 
56
56
  coupon_iterator<A> begin(bool all = false) const;
57
57
  coupon_iterator<A> end() const;
58
58
 
59
- void mustAdd(int slotNo, int value);
60
- int mustFindValueFor(int slotNo) const;
61
- void mustReplace(int slotNo, int value);
59
+ void mustAdd(uint32_t slotNo, uint8_t value);
60
+ uint8_t mustFindValueFor(uint32_t slotNo) const;
61
+ void mustReplace(uint32_t slotNo, uint8_t value);
62
62
 
63
63
  private:
64
64
  typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
65
65
 
66
- using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
66
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
67
67
 
68
68
  // static so it can be used when resizing
69
- static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
69
+ static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo);
70
70
 
71
71
  void checkGrow();
72
72
  void growAuxSpace();
73
73
 
74
- const int lgConfigK;
75
- int lgAuxArrInts;
76
- int auxCount;
74
+ const uint8_t lgConfigK;
75
+ uint8_t lgAuxArrInts;
76
+ uint32_t auxCount;
77
77
  vector_int entries;
78
78
  };
79
79
 
@@ -27,30 +27,30 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- static const int numXArrValues = 257;
30
+ static const uint32_t numXArrValues = 257;
31
31
 
32
32
  /**
33
33
  * 18 Values, index 0 is LgK = 4, index 17 is LgK = 21.
34
34
  */
35
- static const int yStrides[] =
35
+ static const uint32_t yStrides[] =
36
36
  {1, 2, 3, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920};
37
37
 
38
38
  template<typename A>
39
- int CompositeInterpolationXTable<A>::get_y_stride(const int logK) {
40
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
41
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
42
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
39
+ uint32_t CompositeInterpolationXTable<A>::get_y_stride(uint8_t logK) {
40
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
41
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
42
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
43
43
  + std::to_string(logK));
44
44
  }
45
- return yStrides[logK - HllUtil<A>::MIN_LOG_K];
45
+ return yStrides[logK - hll_constants::MIN_LOG_K];
46
46
  }
47
47
 
48
48
  template<typename A>
49
- int CompositeInterpolationXTable<A>::get_x_arr_length() {
49
+ uint32_t CompositeInterpolationXTable<A>::get_x_arr_length() {
50
50
  return numXArrValues;
51
51
  }
52
52
 
53
- static const double xArr[18][numXArrValues] = {
53
+ static const double xArray[18][numXArrValues] = {
54
54
  {
55
55
  10.767999803534, 11.237701481774, 11.722738717438, 12.223246391222,
56
56
  12.739366773787, 13.271184824495, 13.818759686650, 14.382159835785,
@@ -797,15 +797,15 @@ static const double xArr[18][numXArrValues] = {
797
797
  };
798
798
 
799
799
  template<typename A>
800
- const double* CompositeInterpolationXTable<A>::get_x_arr(const int logK) {
801
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
802
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
803
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
800
+ const double* CompositeInterpolationXTable<A>::get_x_arr(uint8_t logK) {
801
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
802
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
803
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
804
804
  + std::to_string(logK));
805
805
  }
806
- return xArr[logK - HllUtil<A>::MIN_LOG_K];
806
+ return xArray[logK - hll_constants::MIN_LOG_K];
807
807
  }
808
808
 
809
809
  }
810
810
 
811
- #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
811
+ #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
@@ -27,10 +27,10 @@ namespace datasketches {
27
27
  template<typename A = std::allocator<uint8_t>>
28
28
  class CompositeInterpolationXTable {
29
29
  public:
30
- static int get_y_stride(int logK);
30
+ static uint32_t get_y_stride(uint8_t logK);
31
31
 
32
- static const double* get_x_arr(int logK);
33
- static int get_x_arr_length();
32
+ static const double* get_x_arr(uint8_t logK);
33
+ static uint32_t get_x_arr_length();
34
34
  };
35
35
 
36
36
  }