datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -26,75 +26,61 @@
26
26
  namespace datasketches {
27
27
 
28
28
  template<typename A>
29
- AuxHashMap<A>::AuxHashMap(int lgAuxArrInts, int lgConfigK)
30
- : lgConfigK(lgConfigK),
31
- lgAuxArrInts(lgAuxArrInts),
32
- auxCount(0) {
33
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
34
- const int numItems = 1 << lgAuxArrInts;
35
- auxIntArr = intAlloc().allocate(numItems);
36
- std::fill(auxIntArr, auxIntArr + numItems, 0);
37
- }
29
+ AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
30
+ lgConfigK(lgConfigK),
31
+ lgAuxArrInts(lgAuxArrInts),
32
+ auxCount(0),
33
+ entries(1ULL << lgAuxArrInts, 0, allocator)
34
+ {}
38
35
 
39
36
  template<typename A>
40
- AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(int lgAuxArrInts, int lgConfigK) {
41
- return new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK);
42
- }
43
-
44
- template<typename A>
45
- AuxHashMap<A>::AuxHashMap(const AuxHashMap& that)
46
- : lgConfigK(that.lgConfigK),
47
- lgAuxArrInts(that.lgAuxArrInts),
48
- auxCount(that.auxCount) {
49
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
50
- const int numItems = 1 << lgAuxArrInts;
51
- auxIntArr = intAlloc().allocate(numItems);
52
- std::copy(that.auxIntArr, that.auxIntArr + numItems, auxIntArr);
37
+ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
38
+ return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
53
39
  }
54
40
 
55
41
  template<typename A>
56
42
  AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
57
- return new (ahmAlloc().allocate(1)) AuxHashMap<A>(that);
43
+ return new (ahmAlloc(that.entries.get_allocator()).allocate(1)) AuxHashMap<A>(that);
58
44
  }
59
45
 
60
46
  template<typename A>
61
47
  AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
62
- int lgConfigK,
63
- int auxCount, int lgAuxArrInts,
64
- bool srcCompact) {
65
- int lgArrInts = lgAuxArrInts;
48
+ uint8_t lgConfigK,
49
+ uint32_t auxCount, uint8_t lgAuxArrInts,
50
+ bool srcCompact, const A& allocator) {
51
+ uint8_t lgArrInts = lgAuxArrInts;
66
52
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
67
53
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
68
54
  } else { // updatable
69
55
  lgArrInts = lgAuxArrInts;
70
56
  }
71
57
 
72
- int configKmask = (1 << lgConfigK) - 1;
58
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
73
59
 
74
60
  AuxHashMap<A>* auxHashMap;
75
- const int* auxPtr = static_cast<const int*>(bytes);
61
+ const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
76
62
  if (srcCompact) {
77
63
  if (len < auxCount * sizeof(int)) {
78
64
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
79
65
  }
80
- auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
81
- for (int i = 0; i < auxCount; ++i) {
82
- int pair = auxPtr[i];
83
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
84
- int value = HllUtil<A>::getValue(pair);
66
+ auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
67
+ for (uint32_t i = 0; i < auxCount; ++i) {
68
+ const uint32_t pair = auxPtr[i];
69
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
70
+ const uint8_t value = HllUtil<A>::getValue(pair);
85
71
  auxHashMap->mustAdd(slotNo, value);
86
72
  }
87
73
  } else { // updatable
88
- int itemsToRead = 1 << lgAuxArrInts;
89
- if (len < itemsToRead * sizeof(int)) {
74
+ uint32_t itemsToRead = 1 << lgAuxArrInts;
75
+ if (len < itemsToRead * sizeof(uint32_t)) {
90
76
  throw std::out_of_range("Input array too small to hold AuxHashMap image");
91
77
  }
92
- auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
93
- for (int i = 0; i < itemsToRead; ++i) {
94
- int pair = auxPtr[i];
95
- if (pair == HllUtil<A>::EMPTY) { continue; }
96
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
97
- int value = HllUtil<A>::getValue(pair);
78
+ auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
79
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
80
+ const uint32_t pair = auxPtr[i];
81
+ if (pair == hll_constants::EMPTY) { continue; }
82
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
83
+ const uint8_t value = HllUtil<A>::getValue(pair);
98
84
  auxHashMap->mustAdd(slotNo, value);
99
85
  }
100
86
  }
@@ -108,38 +94,36 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
108
94
  }
109
95
 
110
96
  template<typename A>
111
- AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
112
- const int auxCount, const int lgAuxArrInts,
113
- const bool srcCompact) {
114
- int lgArrInts = lgAuxArrInts;
97
+ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
98
+ uint32_t auxCount, uint8_t lgAuxArrInts,
99
+ bool srcCompact, const A& allocator) {
100
+ uint8_t lgArrInts = lgAuxArrInts;
115
101
  if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
116
102
  lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
117
103
  } else { // updatable
118
104
  lgArrInts = lgAuxArrInts;
119
105
  }
120
106
 
121
- AuxHashMap<A>* auxHashMap = new (ahmAlloc().allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK);
107
+ AuxHashMap<A>* auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
122
108
  typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
123
109
  aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
124
110
 
125
- int configKmask = (1 << lgConfigK) - 1;
111
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
126
112
 
127
113
  if (srcCompact) {
128
- int pair;
129
- for (int i = 0; i < auxCount; ++i) {
130
- is.read((char*)&pair, sizeof(pair));
131
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
132
- int value = HllUtil<A>::getValue(pair);
114
+ for (uint32_t i = 0; i < auxCount; ++i) {
115
+ const auto pair = read<int>(is);
116
+ uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
117
+ uint8_t value = HllUtil<A>::getValue(pair);
133
118
  auxHashMap->mustAdd(slotNo, value);
134
119
  }
135
120
  } else { // updatable
136
- int itemsToRead = 1 << lgAuxArrInts;
137
- int pair;
138
- for (int i = 0; i < itemsToRead; ++i) {
139
- is.read((char*)&pair, sizeof(pair));
140
- if (pair == HllUtil<A>::EMPTY) { continue; }
141
- int slotNo = HllUtil<A>::getLow26(pair) & configKmask;
142
- int value = HllUtil<A>::getValue(pair);
121
+ const uint32_t itemsToRead = 1 << lgAuxArrInts;
122
+ for (uint32_t i = 0; i < itemsToRead; ++i) {
123
+ const auto pair = read<int>(is);
124
+ if (pair == hll_constants::EMPTY) { continue; }
125
+ const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
126
+ const uint8_t value = HllUtil<A>::getValue(pair);
143
127
  auxHashMap->mustAdd(slotNo, value);
144
128
  }
145
129
  }
@@ -152,81 +136,75 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
152
136
  return aux_ptr.release();
153
137
  }
154
138
 
155
- template<typename A>
156
- AuxHashMap<A>::~AuxHashMap<A>() {
157
- // should be no way to have an object without a valid array
158
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
159
- intAlloc().deallocate(auxIntArr, 1 << lgAuxArrInts);
160
- }
161
-
162
139
  template<typename A>
163
140
  std::function<void(AuxHashMap<A>*)> AuxHashMap<A>::make_deleter() {
164
141
  return [](AuxHashMap<A>* ptr) {
142
+ ahmAlloc alloc(ptr->entries.get_allocator());
165
143
  ptr->~AuxHashMap();
166
- ahmAlloc().deallocate(ptr, 1);
144
+ alloc.deallocate(ptr, 1);
167
145
  };
168
146
  }
169
147
 
170
148
  template<typename A>
171
149
  AuxHashMap<A>* AuxHashMap<A>::copy() const {
172
- return new (ahmAlloc().allocate(1)) AuxHashMap<A>(*this);
150
+ return new (ahmAlloc(entries.get_allocator()).allocate(1)) AuxHashMap<A>(*this);
173
151
  }
174
152
 
175
153
  template<typename A>
176
- int AuxHashMap<A>::getAuxCount() const {
154
+ uint32_t AuxHashMap<A>::getAuxCount() const {
177
155
  return auxCount;
178
156
  }
179
157
 
180
158
  template<typename A>
181
- int* AuxHashMap<A>::getAuxIntArr(){
182
- return auxIntArr;
159
+ uint32_t* AuxHashMap<A>::getAuxIntArr(){
160
+ return entries.data();
183
161
  }
184
162
 
185
163
  template<typename A>
186
- int AuxHashMap<A>::getLgAuxArrInts() const {
164
+ uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
187
165
  return lgAuxArrInts;
188
166
  }
189
167
 
190
168
  template<typename A>
191
- int AuxHashMap<A>::getCompactSizeBytes() const {
169
+ uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
192
170
  return auxCount << 2;
193
171
  }
194
172
 
195
173
  template<typename A>
196
- int AuxHashMap<A>::getUpdatableSizeBytes() const {
174
+ uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
197
175
  return 4 << lgAuxArrInts;
198
176
  }
199
177
 
200
178
  template<typename A>
201
- void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
202
- const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
203
- const int entry_pair = HllUtil<A>::pair(slotNo, value);
179
+ void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
180
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
181
+ const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
204
182
  if (index >= 0) {
205
183
  throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
206
184
  + std::to_string(slotNo) + ", Value: " + std::to_string(value));
207
185
  }
208
186
 
209
187
  // found empty entry
210
- auxIntArr[~index] = entry_pair;
188
+ entries[~index] = entry_pair;
211
189
  ++auxCount;
212
190
  checkGrow();
213
191
  }
214
192
 
215
193
  template<typename A>
216
- int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
217
- const int index = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
194
+ uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
195
+ const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
218
196
  if (index >= 0) {
219
- return HllUtil<A>::getValue(auxIntArr[index]);
197
+ return HllUtil<A>::getValue(entries[index]);
220
198
  }
221
199
 
222
200
  throw std::invalid_argument("slotNo not found: " + std::to_string(slotNo));
223
201
  }
224
202
 
225
203
  template<typename A>
226
- void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
227
- const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, slotNo);
204
+ void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
205
+ const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
228
206
  if (idx >= 0) {
229
- auxIntArr[idx] = HllUtil<A>::pair(slotNo, value);
207
+ entries[idx] = HllUtil<A>::pair(slotNo, value);
230
208
  return;
231
209
  }
232
210
 
@@ -236,30 +214,25 @@ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
236
214
 
237
215
  template<typename A>
238
216
  void AuxHashMap<A>::checkGrow() {
239
- if ((HllUtil<A>::RESIZE_DENOM * auxCount) > (HllUtil<A>::RESIZE_NUMER * (1 << lgAuxArrInts))) {
217
+ if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
240
218
  growAuxSpace();
241
219
  }
242
220
  }
243
221
 
244
222
  template<typename A>
245
223
  void AuxHashMap<A>::growAuxSpace() {
246
- int* oldArray = auxIntArr;
247
- const int oldArrLen = 1 << lgAuxArrInts;
248
224
  const int configKmask = (1 << lgConfigK) - 1;
249
225
  const int newArrLen = 1 << ++lgAuxArrInts;
250
- typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
251
- auxIntArr = intAlloc().allocate(newArrLen);
252
- std::fill(auxIntArr, auxIntArr + newArrLen, 0);
253
- for (int i = 0; i < oldArrLen; ++i) {
254
- const int fetched = oldArray[i];
255
- if (fetched != HllUtil<A>::EMPTY) {
226
+ vector_int entries_new(newArrLen, 0, entries.get_allocator());
227
+ for (size_t i = 0; i < entries.size(); ++i) {
228
+ const uint32_t fetched = entries[i];
229
+ if (fetched != hll_constants::EMPTY) {
256
230
  // find empty in new array
257
- const int idx = find(auxIntArr, lgAuxArrInts, lgConfigK, fetched & configKmask);
258
- auxIntArr[~idx] = fetched;
231
+ const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
232
+ entries_new[~idx] = fetched;
259
233
  }
260
234
  }
261
-
262
- intAlloc().deallocate(oldArray, oldArrLen);
235
+ entries = std::move(entries_new);
263
236
  }
264
237
 
265
238
  //Searches the Aux arr hash table for an empty or a matching slotNo depending on the context.
@@ -268,21 +241,20 @@ void AuxHashMap<A>::growAuxSpace() {
268
241
  //Continues searching.
269
242
  //If the probe comes back to original index, throws an exception.
270
243
  template<typename A>
271
- int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgConfigK,
272
- const int slotNo) {
273
- const int auxArrMask = (1 << lgAuxArrInts) - 1;
274
- const int configKmask = (1 << lgConfigK) - 1;
275
- int probe = slotNo & auxArrMask;
276
- const int loopIndex = probe;
244
+ int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
245
+ const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
246
+ const uint32_t configKmask = (1 << lgConfigK) - 1;
247
+ uint32_t probe = slotNo & auxArrMask;
248
+ const uint32_t loopIndex = probe;
277
249
  do {
278
- const int arrVal = auxArr[probe];
279
- if (arrVal == HllUtil<A>::EMPTY) { //Compares on entire entry
250
+ const uint32_t arrVal = auxArr[probe];
251
+ if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
280
252
  return ~probe; //empty
281
253
  }
282
254
  else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
283
255
  return probe; //found given slotNo, return probe = index into aux array
284
256
  }
285
- const int stride = (slotNo >> lgAuxArrInts) | 1;
257
+ const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
286
258
  probe = (probe + stride) & auxArrMask;
287
259
  } while (probe != loopIndex);
288
260
  throw std::runtime_error("Key not found and no empty slots!");
@@ -290,12 +262,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
290
262
 
291
263
  template<typename A>
292
264
  coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
293
- return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 0, all);
265
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
294
266
  }
295
267
 
296
268
  template<typename A>
297
269
  coupon_iterator<A> AuxHashMap<A>::end() const {
298
- return coupon_iterator<A>(auxIntArr, 1 << lgAuxArrInts, 1 << lgAuxArrInts, false);
270
+ return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
299
271
  }
300
272
 
301
273
  }
@@ -28,52 +28,53 @@
28
28
 
29
29
  namespace datasketches {
30
30
 
31
- template<typename A = std::allocator<char>>
31
+ template<typename A>
32
32
  class AuxHashMap final {
33
33
  public:
34
- explicit AuxHashMap(int lgAuxArrInts, int lgConfigK);
35
- explicit AuxHashMap(const AuxHashMap<A>& that);
36
- static AuxHashMap* newAuxHashMap(int lgAuxArrInts, int lgConfigK);
34
+ AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
35
+ static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
37
36
  static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
38
37
 
39
38
  static AuxHashMap* deserialize(const void* bytes, size_t len,
40
- int lgConfigK,
41
- int auxCount, int lgAuxArrInts,
42
- bool srcCompact);
43
- static AuxHashMap* deserialize(std::istream& is, int lgConfigK,
44
- int auxCount, int lgAuxArrInts,
45
- bool srcCompact);
46
- virtual ~AuxHashMap();
39
+ uint8_t lgConfigK,
40
+ uint32_t auxCount, uint8_t lgAuxArrInts,
41
+ bool srcCompact, const A& allocator);
42
+ static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK,
43
+ uint32_t auxCount, uint8_t lgAuxArrInts,
44
+ bool srcCompact, const A& allocator);
45
+ virtual ~AuxHashMap() = default;
47
46
  static std::function<void(AuxHashMap<A>*)> make_deleter();
48
47
 
49
48
  AuxHashMap* copy() const;
50
- int getUpdatableSizeBytes() const;
51
- int getCompactSizeBytes() const;
49
+ uint32_t getUpdatableSizeBytes() const;
50
+ uint32_t getCompactSizeBytes() const;
52
51
 
53
- int getAuxCount() const;
54
- int* getAuxIntArr();
55
- int getLgAuxArrInts() const;
52
+ uint32_t getAuxCount() const;
53
+ uint32_t* getAuxIntArr();
54
+ uint8_t getLgAuxArrInts() const;
56
55
 
57
56
  coupon_iterator<A> begin(bool all = false) const;
58
57
  coupon_iterator<A> end() const;
59
58
 
60
- void mustAdd(int slotNo, int value);
61
- int mustFindValueFor(int slotNo) const;
62
- void mustReplace(int slotNo, int value);
59
+ void mustAdd(uint32_t slotNo, uint8_t value);
60
+ uint8_t mustFindValueFor(uint32_t slotNo) const;
61
+ void mustReplace(uint32_t slotNo, uint8_t value);
63
62
 
64
63
  private:
65
64
  typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
66
65
 
66
+ using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
67
+
67
68
  // static so it can be used when resizing
68
- static int find(const int* auxArr, int lgAuxArrInts, int lgConfigK, int slotNo);
69
+ static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo);
69
70
 
70
71
  void checkGrow();
71
72
  void growAuxSpace();
72
73
 
73
- const int lgConfigK;
74
- int lgAuxArrInts;
75
- int auxCount;
76
- int* auxIntArr;
74
+ const uint8_t lgConfigK;
75
+ uint8_t lgAuxArrInts;
76
+ uint32_t auxCount;
77
+ vector_int entries;
77
78
  };
78
79
 
79
80
  }
@@ -27,30 +27,30 @@
27
27
 
28
28
  namespace datasketches {
29
29
 
30
- static const int numXArrValues = 257;
30
+ static const uint32_t numXArrValues = 257;
31
31
 
32
32
  /**
33
33
  * 18 Values, index 0 is LgK = 4, index 17 is LgK = 21.
34
34
  */
35
- static const int yStrides[] =
35
+ static const uint32_t yStrides[] =
36
36
  {1, 2, 3, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920};
37
37
 
38
38
  template<typename A>
39
- int CompositeInterpolationXTable<A>::get_y_stride(const int logK) {
40
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
41
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
42
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
39
+ uint32_t CompositeInterpolationXTable<A>::get_y_stride(uint8_t logK) {
40
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
41
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
42
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
43
43
  + std::to_string(logK));
44
44
  }
45
- return yStrides[logK - HllUtil<A>::MIN_LOG_K];
45
+ return yStrides[logK - hll_constants::MIN_LOG_K];
46
46
  }
47
47
 
48
48
  template<typename A>
49
- int CompositeInterpolationXTable<A>::get_x_arr_length() {
49
+ uint32_t CompositeInterpolationXTable<A>::get_x_arr_length() {
50
50
  return numXArrValues;
51
51
  }
52
52
 
53
- static const double xArr[18][numXArrValues] = {
53
+ static const double xArray[18][numXArrValues] = {
54
54
  {
55
55
  10.767999803534, 11.237701481774, 11.722738717438, 12.223246391222,
56
56
  12.739366773787, 13.271184824495, 13.818759686650, 14.382159835785,
@@ -797,15 +797,15 @@ static const double xArr[18][numXArrValues] = {
797
797
  };
798
798
 
799
799
  template<typename A>
800
- const double* CompositeInterpolationXTable<A>::get_x_arr(const int logK) {
801
- if (logK < HllUtil<A>::MIN_LOG_K || logK > HllUtil<A>::MAX_LOG_K) {
802
- throw std::invalid_argument("logK must be in range [" + std::to_string(HllUtil<A>::MIN_LOG_K)
803
- + ", " + std::to_string(HllUtil<A>::MAX_LOG_K) + "]. Found: "
800
+ const double* CompositeInterpolationXTable<A>::get_x_arr(uint8_t logK) {
801
+ if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
802
+ throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
803
+ + ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
804
804
  + std::to_string(logK));
805
805
  }
806
- return xArr[logK - HllUtil<A>::MIN_LOG_K];
806
+ return xArray[logK - hll_constants::MIN_LOG_K];
807
807
  }
808
808
 
809
809
  }
810
810
 
811
- #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
811
+ #endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
@@ -24,17 +24,17 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- template<typename A = std::allocator<char>>
27
+ template<typename A = std::allocator<uint8_t>>
28
28
  class CompositeInterpolationXTable {
29
29
  public:
30
- static int get_y_stride(int logK);
30
+ static uint32_t get_y_stride(uint8_t logK);
31
31
 
32
- static const double* get_x_arr(int logK);
33
- static int get_x_arr_length();
32
+ static const double* get_x_arr(uint8_t logK);
33
+ static uint32_t get_x_arr_length();
34
34
  };
35
35
 
36
36
  }
37
37
 
38
38
  #include "CompositeInterpolationXTable-internal.hpp"
39
39
 
40
- #endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */
40
+ #endif /* _COMPOSITEINTERPOLATIONXTABLE_HPP_ */