datasketches 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -56,9 +56,9 @@ void init_hll(py::module &m) {
56
56
  .export_values();
57
57
 
58
58
  py::class_<hll_sketch>(m, "hll_sketch")
59
- .def(py::init<int>(), py::arg("lg_k"))
60
- .def(py::init<int, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
61
- .def(py::init<int, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
59
+ .def(py::init<uint8_t>(), py::arg("lg_k"))
60
+ .def(py::init<uint8_t, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
61
+ .def(py::init<uint8_t, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
62
62
  .def_static("deserialize", &dspy::hll_sketch_deserialize,
63
63
  "Reads a bytes object and returns the corresponding hll_sketch")
64
64
  .def("serialize_compact", &dspy::hll_sketch_serialize_compact,
@@ -104,7 +104,7 @@ void init_hll(py::module &m) {
104
104
  ;
105
105
 
106
106
  py::class_<hll_union>(m, "hll_union")
107
- .def(py::init<int>(), py::arg("lg_max_k"))
107
+ .def(py::init<uint8_t>(), py::arg("lg_max_k"))
108
108
  .def_property_readonly("lg_config_k", &hll_union::get_lg_config_k, "Configured lg_k value for the union")
109
109
  .def_property_readonly("tgt_type", &hll_union::get_target_type, "Returns the HLL type (4, 6, or 8) when in estimation mode")
110
110
  .def("get_estimate", &hll_union::get_estimate,
@@ -116,7 +116,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
116
116
  using namespace datasketches;
117
117
 
118
118
  py::class_<kll_sketch<T>>(m, name)
119
- .def(py::init<uint16_t>(), py::arg("k")=kll_sketch<T>::DEFAULT_K)
119
+ .def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
120
120
  .def(py::init<const kll_sketch<T>&>())
121
121
  .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
122
122
  "Updates the sketch with the given value")
@@ -64,8 +64,8 @@ compact_theta_sketch compact_theta_sketch_deserialize(py::bytes skBytes, uint64_
64
64
  return compact_theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed);
65
65
  }
66
66
 
67
- py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b) {
68
- return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b));
67
+ py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b, uint64_t seed) {
68
+ return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b, seed));
69
69
  }
70
70
 
71
71
  }
@@ -103,7 +103,7 @@ void init_theta(py::module &m) {
103
103
 
104
104
  py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
105
105
  .def(py::init(&dspy::update_theta_sketch_factory),
106
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
106
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
107
107
  .def(py::init<const update_theta_sketch&>())
108
108
  .def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
109
109
  "Updates the sketch with the given integral value")
@@ -127,7 +127,7 @@ void init_theta(py::module &m) {
127
127
 
128
128
  py::class_<theta_union>(m, "theta_union")
129
129
  .def(py::init(&dspy::theta_union_factory),
130
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
130
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
131
131
  .def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
132
132
  "Updates the union with the given sketch")
133
133
  .def("get_result", &theta_union::get_result, py::arg("ordered")=true,
@@ -153,18 +153,18 @@ void init_theta(py::module &m) {
153
153
 
154
154
  py::class_<theta_jaccard_similarity>(m, "theta_jaccard_similarity")
155
155
  .def_static("jaccard", &dspy::theta_jaccard_sim_computation,
156
- py::arg("sketch_a"), py::arg("sketch_b"),
156
+ py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
157
157
  "Returns a list with {lower_bound, estimate, upper_bound} of the Jaccard similarity between sketches")
158
158
  .def_static("exactly_equal", &theta_jaccard_similarity::exactly_equal<const theta_sketch&, const theta_sketch&>,
159
- py::arg("sketch_a"), py::arg("sketch_b"),
159
+ py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
160
160
  "Returns True if sketch_a and sketch_b are equivalent, otherwise False")
161
161
  .def_static("similarity_test", &theta_jaccard_similarity::similarity_test<const theta_sketch&, const theta_sketch&>,
162
- py::arg("actual"), py::arg("expected"), py::arg("threshold"),
162
+ py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
163
163
  "Tests similarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
164
164
  "index J_{LB} of the actual and expected sketches. If J_{LB} >= threshold, then the sketches are considered "
165
165
  "to be similar sith a confidence of 97.7% and returns True, otherwise False.")
166
166
  .def_static("dissimilarity_test", &theta_jaccard_similarity::dissimilarity_test<const theta_sketch&, const theta_sketch&>,
167
- py::arg("actual"), py::arg("expected"), py::arg("threshold"),
167
+ py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
168
168
  "Tests dissimilarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
169
169
  "index J_{UB} of the actual and expected sketches. If J_{UB} <= threshold, then the sketches are considered "
170
170
  "to be dissimilar sith a confidence of 97.7% and returns True, otherwise False.")
@@ -29,14 +29,20 @@ namespace py = pybind11;
29
29
 
30
30
  namespace datasketches {
31
31
 
32
+ namespace vector_of_kll_constants {
33
+ static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
34
+ static const uint32_t DEFAULT_D = 1;
35
+ }
36
+
32
37
  // Wrapper class for Numpy compatibility
33
38
  template <typename T, typename C = std::less<T>, typename S = serde<T>>
34
39
  class vector_of_kll_sketches {
35
40
  public:
36
- static const uint32_t DEFAULT_K = kll_sketch<T, C, S>::DEFAULT_K;
37
- static const uint32_t DEFAULT_D = 1;
41
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
42
+ static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
43
+ static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
38
44
 
39
- explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
45
+ explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
40
46
  vector_of_kll_sketches(const vector_of_kll_sketches& other);
41
47
  vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
42
48
  vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
@@ -432,8 +438,8 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
432
438
  using namespace datasketches;
433
439
 
434
440
  py::class_<vector_of_kll_sketches<T>>(m, name)
435
- .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_sketches<T>::DEFAULT_K,
436
- py::arg("d")=vector_of_kll_sketches<T>::DEFAULT_D)
441
+ .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K,
442
+ py::arg("d")=vector_of_kll_constants::DEFAULT_D)
437
443
  .def(py::init<const vector_of_kll_sketches<T>&>())
438
444
  // allow user to retrieve k or d, in case it's instantiated w/ defaults
439
445
  .def("get_k", &vector_of_kll_sketches<T>::get_k,
@@ -32,7 +32,7 @@ namespace python {
32
32
  template<typename T>
33
33
  py::list vo_sketch_get_samples(const var_opt_sketch<T>& sk) {
34
34
  py::list list;
35
- for (auto& item : sk) {
35
+ for (auto item : sk) {
36
36
  py::tuple t = py::make_tuple(item.first, item.second);
37
37
  list.append(t);
38
38
  }
@@ -57,7 +57,7 @@ std::string vo_sketch_to_string(const var_opt_sketch<T>& sk, bool print_items) {
57
57
  ss << sk.to_string();
58
58
  ss << "### VarOpt Sketch Items" << std::endl;
59
59
  int i = 0;
60
- for (auto& item : sk) {
60
+ for (auto item : sk) {
61
61
  // item.second is always a double
62
62
  // item.first is an arbitrary py::object, so get the value by
63
63
  // using internal str() method then casting to C++ std::string
@@ -58,7 +58,7 @@ class HllTest(unittest.TestCase):
58
58
  self.assertEqual(len(sk_bytes), result.get_compact_serialization_bytes())
59
59
  new_hll = hll_sketch.deserialize(sk_bytes)
60
60
 
61
- # the sketch can self-report its configuation and status
61
+ # the sketch can self-report its configuration and status
62
62
  self.assertEqual(new_hll.lg_config_k, k)
63
63
  self.assertEqual(new_hll.tgt_type, tgt_hll_type.HLL_4)
64
64
  self.assertFalse(new_hll.is_empty())
@@ -30,10 +30,10 @@ class KllTest(unittest.TestCase):
30
30
  kll.update(0.0)
31
31
 
32
32
  # 0 should be near the median
33
- self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.025)
33
+ self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.035)
34
34
 
35
35
  # the median should be near 0
36
- self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.025)
36
+ self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.035)
37
37
 
38
38
  # we also track the min/max independently from the rest of the data
39
39
  # which lets us know the full observed data range
@@ -30,10 +30,10 @@ class reqTest(unittest.TestCase):
30
30
  req.update(0.0)
31
31
 
32
32
  # 0 should be near the median
33
- self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.03)
33
+ self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.045)
34
34
 
35
35
  # the median should be near 0
36
- self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.03)
36
+ self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.045)
37
37
 
38
38
  # we also track the min/max independently from the rest of the data
39
39
  # which lets us know the full observed data range
@@ -39,9 +39,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
39
39
  kll.update(dat)
40
40
 
41
41
  # 0 should be near the median
42
- np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.025)
42
+ np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
43
43
  # the median should be near 0
44
- np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.025)
44
+ np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
45
45
  # we also track the min/max independently from the rest of the data
46
46
  # which lets us know the full observed data range
47
47
  np.testing.assert_allclose(kll.get_min_values(), smin)
@@ -118,9 +118,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
118
118
  kll.update(dat)
119
119
 
120
120
  # 0 should be near the median
121
- np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.025)
121
+ np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
122
122
  # the median should be near 0
123
- np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.025)
123
+ np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
124
124
  # we also track the min/max independently from the rest of the data
125
125
  # which lets us know the full observed data range
126
126
  np.testing.assert_allclose(kll.get_min_values(), smin)
@@ -46,7 +46,7 @@ class VoTest(unittest.TestCase):
46
46
  self.assertEqual(len(items), k)
47
47
 
48
48
  # we can also apply a predicate to the sketch to get an estimate
49
- # (with optimially minimal variance) of the subset sum of items
49
+ # (with optimally minimal variance) of the subset sum of items
50
50
  # matching that predicate among the entire population
51
51
 
52
52
  # we'll use a lambda here, but any function operating on a single
@@ -89,11 +89,11 @@ class VoTest(unittest.TestCase):
89
89
  # the union and a sketch.
90
90
  print(union)
91
91
 
92
- # if we want to print the list of itmes, there must be a
92
+ # if we want to print the list of items, there must be a
93
93
  # __str__() method for each item (which need not be the same
94
94
  # type; they're all generic python objects when used from
95
95
  # python), otherwise you may trigger an exception.
96
- # to_string() is provided as a convenince to avoid direct
96
+ # to_string() is provided as a convenience to avoid direct
97
97
  # calls to __str__() with parameters.
98
98
  print(result.to_string(True))
99
99
 
@@ -32,29 +32,16 @@ target_include_directories(req
32
32
  target_link_libraries(req INTERFACE common)
33
33
  target_compile_features(req INTERFACE cxx_std_11)
34
34
 
35
- set(req_HEADERS "")
36
- list(APPEND req_HEADERS "include/req_common.hpp")
37
- list(APPEND req_HEADERS "include/req_sketch.hpp")
38
- list(APPEND req_HEADERS "include/req_sketch_impl.hpp")
39
- list(APPEND req_HEADERS "include/req_compactor.hpp")
40
- list(APPEND req_HEADERS "include/req_compactor_impl.hpp")
41
- list(APPEND req_HEADERS "include/req_quantile_calculator.hpp")
42
- list(APPEND req_HEADERS "include/req_quantile_calculator_impl.hpp")
43
-
44
35
  install(TARGETS req
45
36
  EXPORT ${PROJECT_NAME}
46
37
  )
47
38
 
48
- install(FILES ${req_HEADERS}
39
+ install(FILES
40
+ include/req_common.hpp
41
+ include/req_sketch.hpp
42
+ include/req_sketch_impl.hpp
43
+ include/req_compactor.hpp
44
+ include/req_compactor_impl.hpp
45
+ include/req_quantile_calculator.hpp
46
+ include/req_quantile_calculator_impl.hpp
49
47
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
50
-
51
- target_sources(req
52
- INTERFACE
53
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_common.hpp
54
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch.hpp
55
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch_impl.hpp
56
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor.hpp
57
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor_impl.hpp
58
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator.hpp
59
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator_impl.hpp
60
- )
@@ -29,7 +29,8 @@
29
29
  namespace datasketches {
30
30
 
31
31
  // TODO: have a common random bit with KLL
32
- static std::independent_bits_engine<std::mt19937, 1, unsigned> req_random_bit(std::chrono::system_clock::now().time_since_epoch().count());
32
+ static std::independent_bits_engine<std::mt19937, 1, unsigned>
33
+ req_random_bit(static_cast<unsigned>(std::chrono::system_clock::now().time_since_epoch().count()));
33
34
 
34
35
  namespace req_constants {
35
36
  static const uint16_t MIN_K = 4;
@@ -110,8 +110,8 @@ private:
110
110
 
111
111
  bool ensure_enough_sections();
112
112
  std::pair<uint32_t, uint32_t> compute_compaction_range(uint32_t secs_to_compact) const;
113
- void grow(size_t new_capacity);
114
- void ensure_space(size_t num);
113
+ void grow(uint32_t new_capacity);
114
+ void ensure_space(uint32_t num);
115
115
 
116
116
  static uint32_t nearest_even(float value);
117
117
 
@@ -123,10 +123,10 @@ private:
123
123
  req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
124
124
 
125
125
  template<typename S>
126
- static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, size_t num);
126
+ static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, uint32_t num);
127
127
 
128
128
  template<typename S>
129
- static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, size_t num);
129
+ static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
130
130
 
131
131
  };
132
132
 
@@ -38,7 +38,7 @@ lg_weight_(lg_weight),
38
38
  hra_(hra),
39
39
  coin_(false),
40
40
  sorted_(sorted),
41
- section_size_raw_(section_size),
41
+ section_size_raw_(static_cast<float>(section_size)),
42
42
  section_size_(section_size),
43
43
  num_sections_(req_constants::INIT_NUM_SECTIONS),
44
44
  state_(0),
@@ -72,9 +72,9 @@ items_(nullptr)
72
72
  {
73
73
  if (other.items_ != nullptr) {
74
74
  items_ = allocator_.allocate(capacity_);
75
- const size_t from = hra_ ? capacity_ - num_items_ : 0;
76
- const size_t to = hra_ ? capacity_ : num_items_;
77
- for (size_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
75
+ const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
76
+ const uint32_t to = hra_ ? capacity_ : num_items_;
77
+ for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
78
78
  }
79
79
  }
80
80
 
@@ -165,16 +165,16 @@ template<typename T, typename C, typename A>
165
165
  template<typename FwdT>
166
166
  void req_compactor<T, C, A>::append(FwdT&& item) {
167
167
  if (num_items_ == capacity_) grow(capacity_ + get_nom_capacity());
168
- const size_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
168
+ const uint32_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
169
169
  new (items_ + i) T(std::forward<FwdT>(item));
170
170
  ++num_items_;
171
171
  if (num_items_ > 1) sorted_ = false;
172
172
  }
173
173
 
174
174
  template<typename T, typename C, typename A>
175
- void req_compactor<T, C, A>::grow(size_t new_capacity) {
175
+ void req_compactor<T, C, A>::grow(uint32_t new_capacity) {
176
176
  T* new_items = allocator_.allocate(new_capacity);
177
- size_t new_i = hra_ ? new_capacity - num_items_ : 0;
177
+ uint32_t new_i = hra_ ? new_capacity - num_items_ : 0;
178
178
  for (auto it = begin(); it != end(); ++it, ++new_i) {
179
179
  new (new_items + new_i) T(std::move(*it));
180
180
  (*it).~T();
@@ -185,7 +185,7 @@ void req_compactor<T, C, A>::grow(size_t new_capacity) {
185
185
  }
186
186
 
187
187
  template<typename T, typename C, typename A>
188
- void req_compactor<T, C, A>::ensure_space(size_t num) {
188
+ void req_compactor<T, C, A>::ensure_space(uint32_t num) {
189
189
  if (num_items_ + num > capacity_) grow(num_items_ + num + get_nom_capacity());
190
190
  }
191
191
 
@@ -218,13 +218,13 @@ void req_compactor<T, C, A>::merge(FwdC&& other) {
218
218
  while (ensure_enough_sections()) {}
219
219
  ensure_space(other.get_num_items());
220
220
  sort();
221
- auto middle = hra_ ? begin() : end();
221
+ auto offset = hra_ ? capacity_ - num_items_ : num_items_;
222
222
  auto from = hra_ ? begin() - other.get_num_items() : end();
223
223
  auto to = from + other.get_num_items();
224
224
  auto other_it = other.begin();
225
225
  for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
226
226
  if (!other.sorted_) std::sort(from, to, C());
227
- if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), middle, hra_ ? end() : to, C());
227
+ if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), items_ + offset, hra_ ? end() : to, C());
228
228
  num_items_ += other.get_num_items();
229
229
  }
230
230
 
@@ -240,7 +240,7 @@ template<typename T, typename C, typename A>
240
240
  std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& next) {
241
241
  const uint32_t starting_nom_capacity = get_nom_capacity();
242
242
  // choose a part of the buffer to compact
243
- const uint32_t secs_to_compact = std::min(static_cast<uint32_t>(count_trailing_zeros_in_u32(~state_) + 1), static_cast<uint32_t>(num_sections_));
243
+ const uint32_t secs_to_compact = std::min<uint32_t>(count_trailing_zeros_in_u64(~state_) + 1, num_sections_);
244
244
  auto compaction_range = compute_compaction_range(secs_to_compact);
245
245
  if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
246
246
 
@@ -267,9 +267,9 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
267
267
 
268
268
  template<typename T, typename C, typename A>
269
269
  bool req_compactor<T, C, A>::ensure_enough_sections() {
270
- const float ssr = section_size_raw_ / sqrt(2);
270
+ const float ssr = section_size_raw_ / sqrtf(2);
271
271
  const uint32_t ne = nearest_even(ssr);
272
- if (state_ >= static_cast<uint64_t>(1 << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
272
+ if (state_ >= static_cast<uint64_t>(1ULL << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
273
273
  section_size_raw_ = ssr;
274
274
  section_size_ = ne;
275
275
  num_sections_ <<= 1;
@@ -284,8 +284,8 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compute_compaction_range(u
284
284
  uint32_t non_compact = get_nom_capacity() / 2 + (num_sections_ - secs_to_compact) * section_size_;
285
285
  // make compacted region even
286
286
  if (((num_items_ - non_compact) & 1) == 1) ++non_compact;
287
- const size_t low = hra_ ? 0 : non_compact;
288
- const size_t high = hra_ ? num_items_ - non_compact : num_items_;
287
+ const uint32_t low = hra_ ? 0 : non_compact;
288
+ const uint32_t high = hra_ ? num_items_ - non_compact : num_items_;
289
289
  return std::pair<uint32_t, uint32_t>(low, high);
290
290
  }
291
291
 
@@ -309,19 +309,6 @@ void req_compactor<T, C, A>::promote_evens_or_odds(InIter from, InIter to, bool
309
309
  }
310
310
  }
311
311
 
312
- // helpers for integral types
313
- template<typename T>
314
- static inline T read(std::istream& is) {
315
- T value;
316
- is.read(reinterpret_cast<char*>(&value), sizeof(T));
317
- return value;
318
- }
319
-
320
- template<typename T>
321
- static inline void write(std::ostream& os, T value) {
322
- os.write(reinterpret_cast<const char*>(&value), sizeof(T));
323
- }
324
-
325
312
  // implementation for fixed-size arithmetic types (integral and floating point)
326
313
  template<typename T, typename C, typename A>
327
314
  template<typename S, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
@@ -394,7 +381,7 @@ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, con
394
381
 
395
382
  template<typename T, typename C, typename A>
396
383
  template<typename S>
397
- auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, size_t num)
384
+ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, uint32_t num)
398
385
  -> std::unique_ptr<T, items_deleter> {
399
386
  A alloc(allocator);
400
387
  std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
@@ -402,7 +389,7 @@ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde,
402
389
  // serde did not throw, enable destructors
403
390
  items.get_deleter().set_destroy(true);
404
391
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
405
- return std::move(items);
392
+ return items;
406
393
  }
407
394
 
408
395
  template<typename T, typename C, typename A>
@@ -443,7 +430,7 @@ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(co
443
430
 
444
431
  template<typename T, typename C, typename A>
445
432
  template<typename S>
446
- auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, size_t num)
433
+ auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, uint32_t num)
447
434
  -> std::pair<std::unique_ptr<T, items_deleter>, size_t> {
448
435
  const char* ptr = static_cast<const char*>(bytes);
449
436
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -478,22 +465,22 @@ items_(items.release())
478
465
  template<typename T, typename C, typename A>
479
466
  class req_compactor<T, C, A>::items_deleter {
480
467
  public:
481
- items_deleter(const A& allocator, bool destroy, uint32_t num): allocator(allocator), destroy(destroy), num(num) {}
468
+ items_deleter(const A& allocator, bool destroy, size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
482
469
  void operator() (T* ptr) {
483
470
  if (ptr != nullptr) {
484
- if (destroy) {
485
- for (uint32_t i = 0; i < num; ++i) {
471
+ if (destroy_) {
472
+ for (size_t i = 0; i < num_; ++i) {
486
473
  ptr[i].~T();
487
474
  }
488
475
  }
489
- allocator.deallocate(ptr, num);
476
+ allocator_.deallocate(ptr, num_);
490
477
  }
491
478
  }
492
- void set_destroy(bool destroy) { this->destroy = destroy; }
479
+ void set_destroy(bool destroy) { destroy_ = destroy; }
493
480
  private:
494
- A allocator;
495
- bool destroy;
496
- uint32_t num;
481
+ A allocator_;
482
+ bool destroy_;
483
+ size_t num_;
497
484
  };
498
485
 
499
486
  } /* namespace datasketches */
@@ -319,7 +319,7 @@ private:
319
319
 
320
320
  // for deserialization
321
321
  class item_deleter;
322
- req_sketch(uint32_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
322
+ req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
323
323
 
324
324
  static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
325
325
  static void check_serial_version(uint8_t serial_version);
@@ -28,7 +28,7 @@ namespace datasketches {
28
28
  template<typename T, typename C, typename S, typename A>
29
29
  req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
30
30
  allocator_(allocator),
31
- k_(std::max(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
31
+ k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
32
32
  hra_(hra),
33
33
  max_nom_size_(0),
34
34
  num_retained_(0),
@@ -401,7 +401,7 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os) const {
401
401
  write(os, k_);
402
402
  const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
403
403
  write(os, num_levels);
404
- const uint8_t num_raw_items = raw_items ? n_ : 0;
404
+ const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
405
405
  write(os, num_raw_items);
406
406
  if (is_empty()) return;
407
407
  if (is_estimation_mode()) {
@@ -440,7 +440,7 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vect
440
440
  ptr += copy_to_mem(k_, ptr);
441
441
  const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
442
442
  ptr += copy_to_mem(num_levels, ptr);
443
- const uint8_t num_raw_items = raw_items ? n_ : 0;
443
+ const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
444
444
  ptr += copy_to_mem(num_raw_items, ptr);
445
445
  if (!is_empty()) {
446
446
  if (is_estimation_mode()) {
@@ -620,7 +620,7 @@ void req_sketch<T, C, S, A>::grow() {
620
620
 
621
621
  template<typename T, typename C, typename S, typename A>
622
622
  uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
623
- return compactors_.size();
623
+ return static_cast<uint8_t>(compactors_.size());
624
624
  }
625
625
 
626
626
  template<typename T, typename C, typename S, typename A>
@@ -653,7 +653,9 @@ void req_sketch<T, C, S, A>::compress() {
653
653
 
654
654
  template<typename T, typename C, typename S, typename A>
655
655
  string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
656
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
656
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
657
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
658
+ std::ostringstream os;
657
659
  os << "### REQ sketch summary:" << std::endl;
658
660
  os << " K : " << k_ << std::endl;
659
661
  os << " High Rank Acc : " << (hra_ ? "true" : "false") << std::endl;
@@ -693,7 +695,7 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
693
695
  }
694
696
  os << "### End sketch data" << std::endl;
695
697
  }
696
- return os.str();
698
+ return string<A>(os.str().c_str(), allocator_);
697
699
  }
698
700
 
699
701
  template<typename T, typename C, typename S, typename A>
@@ -711,7 +713,7 @@ class req_sketch<T, C, S, A>::item_deleter {
711
713
  };
712
714
 
713
715
  template<typename T, typename C, typename S, typename A>
714
- req_sketch<T, C, S, A>::req_sketch(uint32_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
716
+ req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
715
717
  allocator_(compactors.get_allocator()),
716
718
  k_(k),
717
719
  hra_(hra),
@@ -766,9 +768,9 @@ auto req_sketch<T, C, S, A>::end() const -> const_iterator {
766
768
 
767
769
  template<typename T, typename C, typename S, typename A>
768
770
  req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
769
- levels_it_(begin),
770
- levels_end_(end),
771
- compactor_it_((*levels_it_).begin())
771
+ levels_it_(begin),
772
+ levels_end_(end),
773
+ compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
772
774
  {}
773
775
 
774
776
  template<typename T, typename C, typename S, typename A>
@@ -802,7 +804,7 @@ bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& ot
802
804
 
803
805
  template<typename T, typename C, typename S, typename A>
804
806
  std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
805
- return std::pair<const T&, const uint64_t>(*compactor_it_, 1 << (*levels_it_).get_lg_weight());
807
+ return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
806
808
  }
807
809
 
808
810
  } /* namespace datasketches */