datasketches 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -56,9 +56,9 @@ void init_hll(py::module &m) {
56
56
  .export_values();
57
57
 
58
58
  py::class_<hll_sketch>(m, "hll_sketch")
59
- .def(py::init<int>(), py::arg("lg_k"))
60
- .def(py::init<int, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
61
- .def(py::init<int, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
59
+ .def(py::init<uint8_t>(), py::arg("lg_k"))
60
+ .def(py::init<uint8_t, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
61
+ .def(py::init<uint8_t, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
62
62
  .def_static("deserialize", &dspy::hll_sketch_deserialize,
63
63
  "Reads a bytes object and returns the corresponding hll_sketch")
64
64
  .def("serialize_compact", &dspy::hll_sketch_serialize_compact,
@@ -104,7 +104,7 @@ void init_hll(py::module &m) {
104
104
  ;
105
105
 
106
106
  py::class_<hll_union>(m, "hll_union")
107
- .def(py::init<int>(), py::arg("lg_max_k"))
107
+ .def(py::init<uint8_t>(), py::arg("lg_max_k"))
108
108
  .def_property_readonly("lg_config_k", &hll_union::get_lg_config_k, "Configured lg_k value for the union")
109
109
  .def_property_readonly("tgt_type", &hll_union::get_target_type, "Returns the HLL type (4, 6, or 8) when in estimation mode")
110
110
  .def("get_estimate", &hll_union::get_estimate,
@@ -116,7 +116,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
116
116
  using namespace datasketches;
117
117
 
118
118
  py::class_<kll_sketch<T>>(m, name)
119
- .def(py::init<uint16_t>(), py::arg("k")=kll_sketch<T>::DEFAULT_K)
119
+ .def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
120
120
  .def(py::init<const kll_sketch<T>&>())
121
121
  .def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
122
122
  "Updates the sketch with the given value")
@@ -64,8 +64,8 @@ compact_theta_sketch compact_theta_sketch_deserialize(py::bytes skBytes, uint64_
64
64
  return compact_theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed);
65
65
  }
66
66
 
67
- py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b) {
68
- return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b));
67
+ py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b, uint64_t seed) {
68
+ return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b, seed));
69
69
  }
70
70
 
71
71
  }
@@ -103,7 +103,7 @@ void init_theta(py::module &m) {
103
103
 
104
104
  py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
105
105
  .def(py::init(&dspy::update_theta_sketch_factory),
106
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
106
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
107
107
  .def(py::init<const update_theta_sketch&>())
108
108
  .def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
109
109
  "Updates the sketch with the given integral value")
@@ -127,7 +127,7 @@ void init_theta(py::module &m) {
127
127
 
128
128
  py::class_<theta_union>(m, "theta_union")
129
129
  .def(py::init(&dspy::theta_union_factory),
130
- py::arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
130
+ py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
131
131
  .def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
132
132
  "Updates the union with the given sketch")
133
133
  .def("get_result", &theta_union::get_result, py::arg("ordered")=true,
@@ -153,18 +153,18 @@ void init_theta(py::module &m) {
153
153
 
154
154
  py::class_<theta_jaccard_similarity>(m, "theta_jaccard_similarity")
155
155
  .def_static("jaccard", &dspy::theta_jaccard_sim_computation,
156
- py::arg("sketch_a"), py::arg("sketch_b"),
156
+ py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
157
157
  "Returns a list with {lower_bound, estimate, upper_bound} of the Jaccard similarity between sketches")
158
158
  .def_static("exactly_equal", &theta_jaccard_similarity::exactly_equal<const theta_sketch&, const theta_sketch&>,
159
- py::arg("sketch_a"), py::arg("sketch_b"),
159
+ py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
160
160
  "Returns True if sketch_a and sketch_b are equivalent, otherwise False")
161
161
  .def_static("similarity_test", &theta_jaccard_similarity::similarity_test<const theta_sketch&, const theta_sketch&>,
162
- py::arg("actual"), py::arg("expected"), py::arg("threshold"),
162
+ py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
163
163
  "Tests similarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
164
164
  "index J_{LB} of the actual and expected sketches. If J_{LB} >= threshold, then the sketches are considered "
165
165
  "to be similar sith a confidence of 97.7% and returns True, otherwise False.")
166
166
  .def_static("dissimilarity_test", &theta_jaccard_similarity::dissimilarity_test<const theta_sketch&, const theta_sketch&>,
167
- py::arg("actual"), py::arg("expected"), py::arg("threshold"),
167
+ py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
168
168
  "Tests dissimilarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
169
169
  "index J_{UB} of the actual and expected sketches. If J_{UB} <= threshold, then the sketches are considered "
170
170
  "to be dissimilar sith a confidence of 97.7% and returns True, otherwise False.")
@@ -29,14 +29,20 @@ namespace py = pybind11;
29
29
 
30
30
  namespace datasketches {
31
31
 
32
+ namespace vector_of_kll_constants {
33
+ static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
34
+ static const uint32_t DEFAULT_D = 1;
35
+ }
36
+
32
37
  // Wrapper class for Numpy compatibility
33
38
  template <typename T, typename C = std::less<T>, typename S = serde<T>>
34
39
  class vector_of_kll_sketches {
35
40
  public:
36
- static const uint32_t DEFAULT_K = kll_sketch<T, C, S>::DEFAULT_K;
37
- static const uint32_t DEFAULT_D = 1;
41
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
42
+ static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
43
+ static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
38
44
 
39
- explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
45
+ explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
40
46
  vector_of_kll_sketches(const vector_of_kll_sketches& other);
41
47
  vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
42
48
  vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
@@ -432,8 +438,8 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
432
438
  using namespace datasketches;
433
439
 
434
440
  py::class_<vector_of_kll_sketches<T>>(m, name)
435
- .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_sketches<T>::DEFAULT_K,
436
- py::arg("d")=vector_of_kll_sketches<T>::DEFAULT_D)
441
+ .def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K,
442
+ py::arg("d")=vector_of_kll_constants::DEFAULT_D)
437
443
  .def(py::init<const vector_of_kll_sketches<T>&>())
438
444
  // allow user to retrieve k or d, in case it's instantiated w/ defaults
439
445
  .def("get_k", &vector_of_kll_sketches<T>::get_k,
@@ -32,7 +32,7 @@ namespace python {
32
32
  template<typename T>
33
33
  py::list vo_sketch_get_samples(const var_opt_sketch<T>& sk) {
34
34
  py::list list;
35
- for (auto& item : sk) {
35
+ for (auto item : sk) {
36
36
  py::tuple t = py::make_tuple(item.first, item.second);
37
37
  list.append(t);
38
38
  }
@@ -57,7 +57,7 @@ std::string vo_sketch_to_string(const var_opt_sketch<T>& sk, bool print_items) {
57
57
  ss << sk.to_string();
58
58
  ss << "### VarOpt Sketch Items" << std::endl;
59
59
  int i = 0;
60
- for (auto& item : sk) {
60
+ for (auto item : sk) {
61
61
  // item.second is always a double
62
62
  // item.first is an arbitrary py::object, so get the value by
63
63
  // using internal str() method then casting to C++ std::string
@@ -58,7 +58,7 @@ class HllTest(unittest.TestCase):
58
58
  self.assertEqual(len(sk_bytes), result.get_compact_serialization_bytes())
59
59
  new_hll = hll_sketch.deserialize(sk_bytes)
60
60
 
61
- # the sketch can self-report its configuation and status
61
+ # the sketch can self-report its configuration and status
62
62
  self.assertEqual(new_hll.lg_config_k, k)
63
63
  self.assertEqual(new_hll.tgt_type, tgt_hll_type.HLL_4)
64
64
  self.assertFalse(new_hll.is_empty())
@@ -30,10 +30,10 @@ class KllTest(unittest.TestCase):
30
30
  kll.update(0.0)
31
31
 
32
32
  # 0 should be near the median
33
- self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.025)
33
+ self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.035)
34
34
 
35
35
  # the median should be near 0
36
- self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.025)
36
+ self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.035)
37
37
 
38
38
  # we also track the min/max independently from the rest of the data
39
39
  # which lets us know the full observed data range
@@ -30,10 +30,10 @@ class reqTest(unittest.TestCase):
30
30
  req.update(0.0)
31
31
 
32
32
  # 0 should be near the median
33
- self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.03)
33
+ self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.045)
34
34
 
35
35
  # the median should be near 0
36
- self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.03)
36
+ self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.045)
37
37
 
38
38
  # we also track the min/max independently from the rest of the data
39
39
  # which lets us know the full observed data range
@@ -39,9 +39,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
39
39
  kll.update(dat)
40
40
 
41
41
  # 0 should be near the median
42
- np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.025)
42
+ np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
43
43
  # the median should be near 0
44
- np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.025)
44
+ np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
45
45
  # we also track the min/max independently from the rest of the data
46
46
  # which lets us know the full observed data range
47
47
  np.testing.assert_allclose(kll.get_min_values(), smin)
@@ -118,9 +118,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
118
118
  kll.update(dat)
119
119
 
120
120
  # 0 should be near the median
121
- np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.025)
121
+ np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
122
122
  # the median should be near 0
123
- np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.025)
123
+ np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
124
124
  # we also track the min/max independently from the rest of the data
125
125
  # which lets us know the full observed data range
126
126
  np.testing.assert_allclose(kll.get_min_values(), smin)
@@ -46,7 +46,7 @@ class VoTest(unittest.TestCase):
46
46
  self.assertEqual(len(items), k)
47
47
 
48
48
  # we can also apply a predicate to the sketch to get an estimate
49
- # (with optimially minimal variance) of the subset sum of items
49
+ # (with optimally minimal variance) of the subset sum of items
50
50
  # matching that predicate among the entire population
51
51
 
52
52
  # we'll use a lambda here, but any function operating on a single
@@ -89,11 +89,11 @@ class VoTest(unittest.TestCase):
89
89
  # the union and a sketch.
90
90
  print(union)
91
91
 
92
- # if we want to print the list of itmes, there must be a
92
+ # if we want to print the list of items, there must be a
93
93
  # __str__() method for each item (which need not be the same
94
94
  # type; they're all generic python objects when used from
95
95
  # python), otherwise you may trigger an exception.
96
- # to_string() is provided as a convenince to avoid direct
96
+ # to_string() is provided as a convenience to avoid direct
97
97
  # calls to __str__() with parameters.
98
98
  print(result.to_string(True))
99
99
 
@@ -32,29 +32,16 @@ target_include_directories(req
32
32
  target_link_libraries(req INTERFACE common)
33
33
  target_compile_features(req INTERFACE cxx_std_11)
34
34
 
35
- set(req_HEADERS "")
36
- list(APPEND req_HEADERS "include/req_common.hpp")
37
- list(APPEND req_HEADERS "include/req_sketch.hpp")
38
- list(APPEND req_HEADERS "include/req_sketch_impl.hpp")
39
- list(APPEND req_HEADERS "include/req_compactor.hpp")
40
- list(APPEND req_HEADERS "include/req_compactor_impl.hpp")
41
- list(APPEND req_HEADERS "include/req_quantile_calculator.hpp")
42
- list(APPEND req_HEADERS "include/req_quantile_calculator_impl.hpp")
43
-
44
35
  install(TARGETS req
45
36
  EXPORT ${PROJECT_NAME}
46
37
  )
47
38
 
48
- install(FILES ${req_HEADERS}
39
+ install(FILES
40
+ include/req_common.hpp
41
+ include/req_sketch.hpp
42
+ include/req_sketch_impl.hpp
43
+ include/req_compactor.hpp
44
+ include/req_compactor_impl.hpp
45
+ include/req_quantile_calculator.hpp
46
+ include/req_quantile_calculator_impl.hpp
49
47
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
50
-
51
- target_sources(req
52
- INTERFACE
53
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_common.hpp
54
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch.hpp
55
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch_impl.hpp
56
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor.hpp
57
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor_impl.hpp
58
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator.hpp
59
- ${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator_impl.hpp
60
- )
@@ -29,7 +29,8 @@
29
29
  namespace datasketches {
30
30
 
31
31
  // TODO: have a common random bit with KLL
32
- static std::independent_bits_engine<std::mt19937, 1, unsigned> req_random_bit(std::chrono::system_clock::now().time_since_epoch().count());
32
+ static std::independent_bits_engine<std::mt19937, 1, unsigned>
33
+ req_random_bit(static_cast<unsigned>(std::chrono::system_clock::now().time_since_epoch().count()));
33
34
 
34
35
  namespace req_constants {
35
36
  static const uint16_t MIN_K = 4;
@@ -110,8 +110,8 @@ private:
110
110
 
111
111
  bool ensure_enough_sections();
112
112
  std::pair<uint32_t, uint32_t> compute_compaction_range(uint32_t secs_to_compact) const;
113
- void grow(size_t new_capacity);
114
- void ensure_space(size_t num);
113
+ void grow(uint32_t new_capacity);
114
+ void ensure_space(uint32_t num);
115
115
 
116
116
  static uint32_t nearest_even(float value);
117
117
 
@@ -123,10 +123,10 @@ private:
123
123
  req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
124
124
 
125
125
  template<typename S>
126
- static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, size_t num);
126
+ static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, uint32_t num);
127
127
 
128
128
  template<typename S>
129
- static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, size_t num);
129
+ static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
130
130
 
131
131
  };
132
132
 
@@ -38,7 +38,7 @@ lg_weight_(lg_weight),
38
38
  hra_(hra),
39
39
  coin_(false),
40
40
  sorted_(sorted),
41
- section_size_raw_(section_size),
41
+ section_size_raw_(static_cast<float>(section_size)),
42
42
  section_size_(section_size),
43
43
  num_sections_(req_constants::INIT_NUM_SECTIONS),
44
44
  state_(0),
@@ -72,9 +72,9 @@ items_(nullptr)
72
72
  {
73
73
  if (other.items_ != nullptr) {
74
74
  items_ = allocator_.allocate(capacity_);
75
- const size_t from = hra_ ? capacity_ - num_items_ : 0;
76
- const size_t to = hra_ ? capacity_ : num_items_;
77
- for (size_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
75
+ const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
76
+ const uint32_t to = hra_ ? capacity_ : num_items_;
77
+ for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
78
78
  }
79
79
  }
80
80
 
@@ -165,16 +165,16 @@ template<typename T, typename C, typename A>
165
165
  template<typename FwdT>
166
166
  void req_compactor<T, C, A>::append(FwdT&& item) {
167
167
  if (num_items_ == capacity_) grow(capacity_ + get_nom_capacity());
168
- const size_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
168
+ const uint32_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
169
169
  new (items_ + i) T(std::forward<FwdT>(item));
170
170
  ++num_items_;
171
171
  if (num_items_ > 1) sorted_ = false;
172
172
  }
173
173
 
174
174
  template<typename T, typename C, typename A>
175
- void req_compactor<T, C, A>::grow(size_t new_capacity) {
175
+ void req_compactor<T, C, A>::grow(uint32_t new_capacity) {
176
176
  T* new_items = allocator_.allocate(new_capacity);
177
- size_t new_i = hra_ ? new_capacity - num_items_ : 0;
177
+ uint32_t new_i = hra_ ? new_capacity - num_items_ : 0;
178
178
  for (auto it = begin(); it != end(); ++it, ++new_i) {
179
179
  new (new_items + new_i) T(std::move(*it));
180
180
  (*it).~T();
@@ -185,7 +185,7 @@ void req_compactor<T, C, A>::grow(size_t new_capacity) {
185
185
  }
186
186
 
187
187
  template<typename T, typename C, typename A>
188
- void req_compactor<T, C, A>::ensure_space(size_t num) {
188
+ void req_compactor<T, C, A>::ensure_space(uint32_t num) {
189
189
  if (num_items_ + num > capacity_) grow(num_items_ + num + get_nom_capacity());
190
190
  }
191
191
 
@@ -218,13 +218,13 @@ void req_compactor<T, C, A>::merge(FwdC&& other) {
218
218
  while (ensure_enough_sections()) {}
219
219
  ensure_space(other.get_num_items());
220
220
  sort();
221
- auto middle = hra_ ? begin() : end();
221
+ auto offset = hra_ ? capacity_ - num_items_ : num_items_;
222
222
  auto from = hra_ ? begin() - other.get_num_items() : end();
223
223
  auto to = from + other.get_num_items();
224
224
  auto other_it = other.begin();
225
225
  for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
226
226
  if (!other.sorted_) std::sort(from, to, C());
227
- if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), middle, hra_ ? end() : to, C());
227
+ if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), items_ + offset, hra_ ? end() : to, C());
228
228
  num_items_ += other.get_num_items();
229
229
  }
230
230
 
@@ -240,7 +240,7 @@ template<typename T, typename C, typename A>
240
240
  std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& next) {
241
241
  const uint32_t starting_nom_capacity = get_nom_capacity();
242
242
  // choose a part of the buffer to compact
243
- const uint32_t secs_to_compact = std::min(static_cast<uint32_t>(count_trailing_zeros_in_u32(~state_) + 1), static_cast<uint32_t>(num_sections_));
243
+ const uint32_t secs_to_compact = std::min<uint32_t>(count_trailing_zeros_in_u64(~state_) + 1, num_sections_);
244
244
  auto compaction_range = compute_compaction_range(secs_to_compact);
245
245
  if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
246
246
 
@@ -267,9 +267,9 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
267
267
 
268
268
  template<typename T, typename C, typename A>
269
269
  bool req_compactor<T, C, A>::ensure_enough_sections() {
270
- const float ssr = section_size_raw_ / sqrt(2);
270
+ const float ssr = section_size_raw_ / sqrtf(2);
271
271
  const uint32_t ne = nearest_even(ssr);
272
- if (state_ >= static_cast<uint64_t>(1 << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
272
+ if (state_ >= static_cast<uint64_t>(1ULL << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
273
273
  section_size_raw_ = ssr;
274
274
  section_size_ = ne;
275
275
  num_sections_ <<= 1;
@@ -284,8 +284,8 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compute_compaction_range(u
284
284
  uint32_t non_compact = get_nom_capacity() / 2 + (num_sections_ - secs_to_compact) * section_size_;
285
285
  // make compacted region even
286
286
  if (((num_items_ - non_compact) & 1) == 1) ++non_compact;
287
- const size_t low = hra_ ? 0 : non_compact;
288
- const size_t high = hra_ ? num_items_ - non_compact : num_items_;
287
+ const uint32_t low = hra_ ? 0 : non_compact;
288
+ const uint32_t high = hra_ ? num_items_ - non_compact : num_items_;
289
289
  return std::pair<uint32_t, uint32_t>(low, high);
290
290
  }
291
291
 
@@ -309,19 +309,6 @@ void req_compactor<T, C, A>::promote_evens_or_odds(InIter from, InIter to, bool
309
309
  }
310
310
  }
311
311
 
312
- // helpers for integral types
313
- template<typename T>
314
- static inline T read(std::istream& is) {
315
- T value;
316
- is.read(reinterpret_cast<char*>(&value), sizeof(T));
317
- return value;
318
- }
319
-
320
- template<typename T>
321
- static inline void write(std::ostream& os, T value) {
322
- os.write(reinterpret_cast<const char*>(&value), sizeof(T));
323
- }
324
-
325
312
  // implementation for fixed-size arithmetic types (integral and floating point)
326
313
  template<typename T, typename C, typename A>
327
314
  template<typename S, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
@@ -394,7 +381,7 @@ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, con
394
381
 
395
382
  template<typename T, typename C, typename A>
396
383
  template<typename S>
397
- auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, size_t num)
384
+ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, uint32_t num)
398
385
  -> std::unique_ptr<T, items_deleter> {
399
386
  A alloc(allocator);
400
387
  std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
@@ -402,7 +389,7 @@ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde,
402
389
  // serde did not throw, enable destructors
403
390
  items.get_deleter().set_destroy(true);
404
391
  if (!is.good()) throw std::runtime_error("error reading from std::istream");
405
- return std::move(items);
392
+ return items;
406
393
  }
407
394
 
408
395
  template<typename T, typename C, typename A>
@@ -443,7 +430,7 @@ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(co
443
430
 
444
431
  template<typename T, typename C, typename A>
445
432
  template<typename S>
446
- auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, size_t num)
433
+ auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, uint32_t num)
447
434
  -> std::pair<std::unique_ptr<T, items_deleter>, size_t> {
448
435
  const char* ptr = static_cast<const char*>(bytes);
449
436
  const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -478,22 +465,22 @@ items_(items.release())
478
465
  template<typename T, typename C, typename A>
479
466
  class req_compactor<T, C, A>::items_deleter {
480
467
  public:
481
- items_deleter(const A& allocator, bool destroy, uint32_t num): allocator(allocator), destroy(destroy), num(num) {}
468
+ items_deleter(const A& allocator, bool destroy, size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
482
469
  void operator() (T* ptr) {
483
470
  if (ptr != nullptr) {
484
- if (destroy) {
485
- for (uint32_t i = 0; i < num; ++i) {
471
+ if (destroy_) {
472
+ for (size_t i = 0; i < num_; ++i) {
486
473
  ptr[i].~T();
487
474
  }
488
475
  }
489
- allocator.deallocate(ptr, num);
476
+ allocator_.deallocate(ptr, num_);
490
477
  }
491
478
  }
492
- void set_destroy(bool destroy) { this->destroy = destroy; }
479
+ void set_destroy(bool destroy) { destroy_ = destroy; }
493
480
  private:
494
- A allocator;
495
- bool destroy;
496
- uint32_t num;
481
+ A allocator_;
482
+ bool destroy_;
483
+ size_t num_;
497
484
  };
498
485
 
499
486
  } /* namespace datasketches */
@@ -319,7 +319,7 @@ private:
319
319
 
320
320
  // for deserialization
321
321
  class item_deleter;
322
- req_sketch(uint32_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
322
+ req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
323
323
 
324
324
  static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
325
325
  static void check_serial_version(uint8_t serial_version);
@@ -28,7 +28,7 @@ namespace datasketches {
28
28
  template<typename T, typename C, typename S, typename A>
29
29
  req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
30
30
  allocator_(allocator),
31
- k_(std::max(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
31
+ k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
32
32
  hra_(hra),
33
33
  max_nom_size_(0),
34
34
  num_retained_(0),
@@ -401,7 +401,7 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os) const {
401
401
  write(os, k_);
402
402
  const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
403
403
  write(os, num_levels);
404
- const uint8_t num_raw_items = raw_items ? n_ : 0;
404
+ const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
405
405
  write(os, num_raw_items);
406
406
  if (is_empty()) return;
407
407
  if (is_estimation_mode()) {
@@ -440,7 +440,7 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vect
440
440
  ptr += copy_to_mem(k_, ptr);
441
441
  const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
442
442
  ptr += copy_to_mem(num_levels, ptr);
443
- const uint8_t num_raw_items = raw_items ? n_ : 0;
443
+ const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
444
444
  ptr += copy_to_mem(num_raw_items, ptr);
445
445
  if (!is_empty()) {
446
446
  if (is_estimation_mode()) {
@@ -620,7 +620,7 @@ void req_sketch<T, C, S, A>::grow() {
620
620
 
621
621
  template<typename T, typename C, typename S, typename A>
622
622
  uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
623
- return compactors_.size();
623
+ return static_cast<uint8_t>(compactors_.size());
624
624
  }
625
625
 
626
626
  template<typename T, typename C, typename S, typename A>
@@ -653,7 +653,9 @@ void req_sketch<T, C, S, A>::compress() {
653
653
 
654
654
  template<typename T, typename C, typename S, typename A>
655
655
  string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
656
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
656
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
657
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
658
+ std::ostringstream os;
657
659
  os << "### REQ sketch summary:" << std::endl;
658
660
  os << " K : " << k_ << std::endl;
659
661
  os << " High Rank Acc : " << (hra_ ? "true" : "false") << std::endl;
@@ -693,7 +695,7 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
693
695
  }
694
696
  os << "### End sketch data" << std::endl;
695
697
  }
696
- return os.str();
698
+ return string<A>(os.str().c_str(), allocator_);
697
699
  }
698
700
 
699
701
  template<typename T, typename C, typename S, typename A>
@@ -711,7 +713,7 @@ class req_sketch<T, C, S, A>::item_deleter {
711
713
  };
712
714
 
713
715
  template<typename T, typename C, typename S, typename A>
714
- req_sketch<T, C, S, A>::req_sketch(uint32_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
716
+ req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
715
717
  allocator_(compactors.get_allocator()),
716
718
  k_(k),
717
719
  hra_(hra),
@@ -766,9 +768,9 @@ auto req_sketch<T, C, S, A>::end() const -> const_iterator {
766
768
 
767
769
  template<typename T, typename C, typename S, typename A>
768
770
  req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
769
- levels_it_(begin),
770
- levels_end_(end),
771
- compactor_it_((*levels_it_).begin())
771
+ levels_it_(begin),
772
+ levels_end_(end),
773
+ compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
772
774
  {}
773
775
 
774
776
  template<typename T, typename C, typename S, typename A>
@@ -802,7 +804,7 @@ bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& ot
802
804
 
803
805
  template<typename T, typename C, typename S, typename A>
804
806
  std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
805
- return std::pair<const T&, const uint64_t>(*compactor_it_, 1 << (*levels_it_).get_lg_weight());
807
+ return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
806
808
  }
807
809
 
808
810
  } /* namespace datasketches */