datasketches 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -55,15 +55,15 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
55
55
 
56
56
  TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
57
57
  req_sketch<float> sketch(12, false);
58
- sketch.update(1);
58
+ sketch.update(1.0f);
59
59
  REQUIRE_FALSE(sketch.is_HRA());
60
60
  REQUIRE_FALSE(sketch.is_empty());
61
61
  REQUIRE_FALSE(sketch.is_estimation_mode());
62
62
  REQUIRE(sketch.get_n() == 1);
63
63
  REQUIRE(sketch.get_num_retained() == 1);
64
- REQUIRE(sketch.get_rank(1) == 0);
65
- REQUIRE(sketch.get_rank<true>(1) == 1);
66
- REQUIRE(sketch.get_rank(1.1) == 1);
64
+ REQUIRE(sketch.get_rank(1.0f) == 0);
65
+ REQUIRE(sketch.get_rank<true>(1.0f) == 1);
66
+ REQUIRE(sketch.get_rank(1.1f) == 1);
67
67
  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
68
68
  REQUIRE(sketch.get_quantile(0) == 1);
69
69
  REQUIRE(sketch.get_quantile(0.5) == 1);
@@ -86,43 +86,43 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
86
86
 
87
87
  TEST_CASE("req sketch: repeated values", "[req_sketch]") {
88
88
  req_sketch<float> sketch(12);
89
- sketch.update(1);
90
- sketch.update(1);
91
- sketch.update(1);
92
- sketch.update(2);
93
- sketch.update(2);
94
- sketch.update(2);
89
+ sketch.update(1.0f);
90
+ sketch.update(1.0f);
91
+ sketch.update(1.0f);
92
+ sketch.update(2.0f);
93
+ sketch.update(2.0f);
94
+ sketch.update(2.0f);
95
95
  REQUIRE_FALSE(sketch.is_empty());
96
96
  REQUIRE_FALSE(sketch.is_estimation_mode());
97
97
  REQUIRE(sketch.get_n() == 6);
98
98
  REQUIRE(sketch.get_num_retained() == 6);
99
- REQUIRE(sketch.get_rank(1) == 0);
100
- REQUIRE(sketch.get_rank<true>(1) == 0.5);
101
- REQUIRE(sketch.get_rank(2) == 0.5);
102
- REQUIRE(sketch.get_rank<true>(2) == 1);
99
+ REQUIRE(sketch.get_rank(1.0f) == 0);
100
+ REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
101
+ REQUIRE(sketch.get_rank(2.0f) == 0.5);
102
+ REQUIRE(sketch.get_rank<true>(2.0f) == 1);
103
103
  }
104
104
 
105
105
  TEST_CASE("req sketch: exact mode", "[req_sketch]") {
106
106
  req_sketch<float> sketch(12);
107
- for (size_t i = 1; i <= 10; ++i) sketch.update(i);
107
+ for (size_t i = 1; i <= 10; ++i) sketch.update(static_cast<float>(i));
108
108
  REQUIRE_FALSE(sketch.is_empty());
109
109
  REQUIRE_FALSE(sketch.is_estimation_mode());
110
110
  REQUIRE(sketch.get_n() == 10);
111
111
  REQUIRE(sketch.get_num_retained() == 10);
112
112
 
113
113
  // like KLL
114
- REQUIRE(sketch.get_rank(1) == 0);
115
- REQUIRE(sketch.get_rank(2) == 0.1);
116
- REQUIRE(sketch.get_rank(6) == 0.5);
117
- REQUIRE(sketch.get_rank(9) == 0.8);
118
- REQUIRE(sketch.get_rank(10) == 0.9);
114
+ REQUIRE(sketch.get_rank(1.0f) == 0);
115
+ REQUIRE(sketch.get_rank(2.0f) == 0.1);
116
+ REQUIRE(sketch.get_rank(6.0f) == 0.5);
117
+ REQUIRE(sketch.get_rank(9.0f) == 0.8);
118
+ REQUIRE(sketch.get_rank(10.0f) == 0.9);
119
119
 
120
120
  // inclusive
121
- REQUIRE(sketch.get_rank<true>(1) == 0.1);
122
- REQUIRE(sketch.get_rank<true>(2) == 0.2);
123
- REQUIRE(sketch.get_rank<true>(5) == 0.5);
124
- REQUIRE(sketch.get_rank<true>(9) == 0.9);
125
- REQUIRE(sketch.get_rank<true>(10) == 1);
121
+ REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
122
+ REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
123
+ REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
124
+ REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
125
+ REQUIRE(sketch.get_rank<true>(10.0f) == 1);
126
126
 
127
127
  // like KLL
128
128
  REQUIRE(sketch.get_quantile(0) == 1);
@@ -164,16 +164,16 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
164
164
  TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
165
165
  req_sketch<float> sketch(12);
166
166
  const size_t n = 100000;
167
- for (size_t i = 0; i < n; ++i) sketch.update(i);
167
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
168
168
  REQUIRE_FALSE(sketch.is_empty());
169
169
  REQUIRE(sketch.is_estimation_mode());
170
170
  REQUIRE(sketch.get_n() == n);
171
171
  // std::cout << sketch.to_string(true);
172
172
  REQUIRE(sketch.get_num_retained() < n);
173
173
  REQUIRE(sketch.get_rank(0) == 0);
174
- REQUIRE(sketch.get_rank(n) == 1);
175
- REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
176
- REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
174
+ REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
175
+ REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
176
+ REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
177
177
  REQUIRE(sketch.get_min_value() == 0);
178
178
  REQUIRE(sketch.get_max_value() == n - 1);
179
179
  REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
@@ -219,7 +219,7 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
219
219
 
220
220
  TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
221
221
  req_sketch<float> sketch(12);
222
- sketch.update(1);
222
+ sketch.update(1.0f);
223
223
 
224
224
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
225
225
  sketch.serialize(s);
@@ -235,7 +235,7 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
235
235
 
236
236
  TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
237
237
  req_sketch<float> sketch(12);
238
- sketch.update(1);
238
+ sketch.update(1.0f);
239
239
 
240
240
  auto bytes = sketch.serialize();
241
241
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
@@ -253,7 +253,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
253
253
  TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
254
254
  req_sketch<float> sketch(12);
255
255
  const size_t n = 50;
256
- for (size_t i = 0; i < n; ++i) sketch.update(i);
256
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
257
257
  REQUIRE_FALSE(sketch.is_estimation_mode());
258
258
 
259
259
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -271,7 +271,7 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
271
271
  TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
272
272
  req_sketch<float> sketch(12);
273
273
  const size_t n = 50;
274
- for (size_t i = 0; i < n; ++i) sketch.update(i);
274
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
275
275
  REQUIRE_FALSE(sketch.is_estimation_mode());
276
276
 
277
277
  auto bytes = sketch.serialize();
@@ -290,7 +290,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
290
290
  TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
291
291
  req_sketch<float> sketch(12);
292
292
  const size_t n = 100000;
293
- for (size_t i = 0; i < n; ++i) sketch.update(i);
293
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
294
294
  REQUIRE(sketch.is_estimation_mode());
295
295
 
296
296
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -308,7 +308,7 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
308
308
  TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
309
309
  req_sketch<float> sketch(12);
310
310
  const size_t n = 100000;
311
- for (size_t i = 0; i < n; ++i) sketch.update(i);
311
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
312
312
  REQUIRE(sketch.is_estimation_mode());
313
313
 
314
314
  auto bytes = sketch.serialize();
@@ -326,7 +326,7 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
326
326
  TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
327
327
  req_sketch<float> sketch(12);
328
328
  const size_t n = 100000;
329
- for (size_t i = 0; i < n; ++i) sketch.update(i);
329
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
330
330
  REQUIRE(sketch.is_estimation_mode());
331
331
 
332
332
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -373,8 +373,8 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
373
373
  REQUIRE(sketch.get_num_retained() == 1);
374
374
  REQUIRE(sketch.get_min_value() == 1);
375
375
  REQUIRE(sketch.get_max_value() == 1);
376
- REQUIRE(sketch.get_rank(1) == 0);
377
- REQUIRE(sketch.get_rank<true>(1) == 1);
376
+ REQUIRE(sketch.get_rank(1.0f) == 0);
377
+ REQUIRE(sketch.get_rank<true>(1.0f) == 1);
378
378
  }
379
379
 
380
380
  TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
@@ -388,7 +388,7 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
388
388
  REQUIRE(sketch.get_num_retained() == 4);
389
389
  REQUIRE(sketch.get_min_value() == 0);
390
390
  REQUIRE(sketch.get_max_value() == 3);
391
- REQUIRE(sketch.get_rank(2) == 0.5);
391
+ REQUIRE(sketch.get_rank(2.0f) == 0.5);
392
392
  }
393
393
 
394
394
  TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
@@ -402,7 +402,7 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
402
402
  REQUIRE(sketch.get_num_retained() == 100);
403
403
  REQUIRE(sketch.get_min_value() == 0);
404
404
  REQUIRE(sketch.get_max_value() == 99);
405
- REQUIRE(sketch.get_rank(50) == 0.5);
405
+ REQUIRE(sketch.get_rank(50.0f) == 0.5);
406
406
  }
407
407
 
408
408
  TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
@@ -416,14 +416,14 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
416
416
  REQUIRE(sketch.get_num_retained() == 2942);
417
417
  REQUIRE(sketch.get_min_value() == 0);
418
418
  REQUIRE(sketch.get_max_value() == 9999);
419
- REQUIRE(sketch.get_rank(5000) == 0.5);
419
+ REQUIRE(sketch.get_rank(5000.0f) == 0.5);
420
420
  }
421
421
 
422
422
  TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
423
423
  req_sketch<float> sketch1(40);
424
424
 
425
425
  req_sketch<float> sketch2(40);
426
- for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
426
+ for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
427
427
 
428
428
  sketch1.merge(sketch2);
429
429
  REQUIRE(sketch1.get_min_value() == 0);
@@ -431,15 +431,15 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
431
431
  REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
432
432
  REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
433
433
  REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
434
- REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
434
+ REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
435
435
  }
436
436
 
437
437
  TEST_CASE("req sketch: merge", "[req_sketch]") {
438
438
  req_sketch<float> sketch1(100);
439
- for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
439
+ for (size_t i = 0; i < 1000; ++i) sketch1.update(static_cast<float>(i));
440
440
 
441
441
  req_sketch<float> sketch2(100);
442
- for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
442
+ for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
443
443
 
444
444
  sketch1.merge(sketch2);
445
445
  REQUIRE(sketch1.get_min_value() == 0);
@@ -447,18 +447,18 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
447
447
  REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
448
448
  REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
449
449
  REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
450
- REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
450
+ REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
451
451
  }
452
452
 
453
453
  TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
454
454
  req_sketch<float> sketch1(12);
455
- for (size_t i = 0; i < 40; ++i) sketch1.update(i);
455
+ for (size_t i = 0; i < 40; ++i) sketch1.update(static_cast<float>(i));
456
456
 
457
457
  req_sketch<float> sketch2(12);
458
- for (size_t i = 40; i < 80; ++i) sketch2.update(i);
458
+ for (size_t i = 40; i < 80; ++i) sketch2.update(static_cast<float>(i));
459
459
 
460
460
  req_sketch<float> sketch3(12);
461
- for (size_t i = 80; i < 120; ++i) sketch3.update(i);
461
+ for (size_t i = 80; i < 120; ++i) sketch3.update(static_cast<float>(i));
462
462
 
463
463
  req_sketch<float> sketch(12);
464
464
  sketch.merge(sketch1);
@@ -467,15 +467,15 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
467
467
  REQUIRE(sketch.get_min_value() == 0);
468
468
  REQUIRE(sketch.get_max_value() == 119);
469
469
  REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
470
- REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
470
+ REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
471
471
  }
472
472
 
473
473
  TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
474
474
  req_sketch<float> sketch1(12);
475
- sketch1.update(1);
475
+ sketch1.update(1.0f);
476
476
 
477
477
  req_sketch<float> sketch2(12, false);
478
- sketch2.update(1);
478
+ sketch2.update(1.0f);
479
479
 
480
480
  REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
481
481
  }
@@ -32,17 +32,13 @@ target_include_directories(sampling
32
32
  target_link_libraries(sampling INTERFACE common)
33
33
  target_compile_features(sampling INTERFACE cxx_std_11)
34
34
 
35
- set(sampling_HEADERS "include/var_opt_sketch.hpp;include/var_opt_sketch_impl.hpp")
36
-
37
35
  install(TARGETS sampling
38
36
  EXPORT ${PROJECT_NAME}
39
37
  )
40
38
 
41
- install(FILES ${sampling_HEADERS}
39
+ install(FILES
40
+ include/var_opt_sketch.hpp
41
+ include/var_opt_sketch_impl.hpp
42
+ include/var_opt_union.hpp
43
+ include/var_opt_union_impl.hpp
42
44
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
43
-
44
- target_sources(sampling
45
- INTERFACE
46
- ${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch.hpp
47
- ${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch_impl.hpp
48
- )
@@ -51,18 +51,23 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- enum resize_factor { X1 = 0, X2, X4, X8 };
55
-
56
54
  template <typename T, typename S, typename A> class var_opt_union; // forward declaration
57
55
 
56
+ namespace var_opt_constants {
57
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
58
+ const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
59
+ }
60
+
58
61
  template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
59
62
  class var_opt_sketch {
60
63
 
61
64
  public:
62
- static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
- static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
65
+ static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
66
+ static const uint32_t MAX_K = var_opt_constants::MAX_K;
64
67
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
68
+ explicit var_opt_sketch(uint32_t k,
69
+ resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
70
+ const A& allocator = A());
66
71
  var_opt_sketch(const var_opt_sketch& other);
67
72
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
73
 
@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
128
128
  r_(r_count),
129
129
  n_(n),
130
130
  total_wt_r_(total_wt_r),
131
- rf_(DEFAULT_RESIZE_FACTOR),
131
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
132
132
  curr_items_alloc_(len),
133
133
  filled_data_(n > k),
134
134
  allocator_(allocator),
@@ -334,7 +334,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
334
334
  num_bytes += (h_ / 8) + (h_ % 8 > 0);
335
335
  }
336
336
  // must iterate over the items
337
- for (auto& it: *this)
337
+ for (auto it: *this)
338
338
  num_bytes += S().size_of_item(it.first);
339
339
  return num_bytes;
340
340
  }
@@ -359,21 +359,21 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
359
359
  // first prelong
360
360
  uint8_t ser_ver(SER_VER);
361
361
  uint8_t family(FAMILY_ID);
362
- ptr += copy_to_mem(&first_byte, ptr, sizeof(uint8_t));
363
- ptr += copy_to_mem(&ser_ver, ptr, sizeof(uint8_t));
364
- ptr += copy_to_mem(&family, ptr, sizeof(uint8_t));
365
- ptr += copy_to_mem(&flags, ptr, sizeof(uint8_t));
366
- ptr += copy_to_mem(&k_, ptr, sizeof(uint32_t));
362
+ ptr += copy_to_mem(first_byte, ptr);
363
+ ptr += copy_to_mem(ser_ver, ptr);
364
+ ptr += copy_to_mem(family, ptr);
365
+ ptr += copy_to_mem(flags, ptr);
366
+ ptr += copy_to_mem(k_, ptr);
367
367
 
368
368
  if (!empty) {
369
369
  // second and third prelongs
370
- ptr += copy_to_mem(&n_, ptr, sizeof(uint64_t));
371
- ptr += copy_to_mem(&h_, ptr, sizeof(uint32_t));
372
- ptr += copy_to_mem(&r_, ptr, sizeof(uint32_t));
370
+ ptr += copy_to_mem(n_, ptr);
371
+ ptr += copy_to_mem(h_, ptr);
372
+ ptr += copy_to_mem(r_, ptr);
373
373
 
374
374
  // fourth prelong, if needed
375
375
  if (r_ > 0) {
376
- ptr += copy_to_mem(&total_wt_r_, ptr, sizeof(double));
376
+ ptr += copy_to_mem(total_wt_r_, ptr);
377
377
  }
378
378
 
379
379
  // first h_ weights
@@ -388,14 +388,14 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
388
388
  }
389
389
 
390
390
  if ((i & 0x7) == 0x7) {
391
- ptr += copy_to_mem(&val, ptr, sizeof(uint8_t));
391
+ ptr += copy_to_mem(val, ptr);
392
392
  val = 0;
393
393
  }
394
394
  }
395
395
 
396
396
  // write out any remaining values
397
397
  if ((h_ & 0x7) > 0) {
398
- ptr += copy_to_mem(&val, ptr, sizeof(uint8_t));
398
+ ptr += copy_to_mem(val, ptr);
399
399
  }
400
400
  }
401
401
 
@@ -428,25 +428,25 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
428
428
  // first prelong
429
429
  const uint8_t ser_ver(SER_VER);
430
430
  const uint8_t family(FAMILY_ID);
431
- os.write((char*)&first_byte, sizeof(uint8_t));
432
- os.write((char*)&ser_ver, sizeof(uint8_t));
433
- os.write((char*)&family, sizeof(uint8_t));
434
- os.write((char*)&flags, sizeof(uint8_t));
435
- os.write((char*)&k_, sizeof(uint32_t));
431
+ write(os, first_byte);
432
+ write(os, ser_ver);
433
+ write(os, family);
434
+ write(os, flags);
435
+ write(os, k_);
436
436
 
437
437
  if (!empty) {
438
438
  // second and third prelongs
439
- os.write((char*)&n_, sizeof(uint64_t));
440
- os.write((char*)&h_, sizeof(uint32_t));
441
- os.write((char*)&r_, sizeof(uint32_t));
439
+ write(os, n_);
440
+ write(os, h_);
441
+ write(os, r_);
442
442
 
443
443
  // fourth prelong, if needed
444
444
  if (r_ > 0) {
445
- os.write((char*)&total_wt_r_, sizeof(double));
445
+ write(os, total_wt_r_);
446
446
  }
447
447
 
448
448
  // write the first h_ weights
449
- os.write((char*)weights_, h_ * sizeof(double));
449
+ write(os, weights_, h_ * sizeof(double));
450
450
 
451
451
  // write the first h_ marks as packed bytes iff we have a gadget
452
452
  if (marks_ != nullptr) {
@@ -457,14 +457,14 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
457
457
  }
458
458
 
459
459
  if ((i & 0x7) == 0x7) {
460
- os.write((char*)&val, sizeof(uint8_t));
460
+ write(os, val);
461
461
  val = 0;
462
462
  }
463
463
  }
464
464
 
465
465
  // write out any remaining values
466
466
  if ((h_ & 0x7) > 0) {
467
- os.write((char*)&val, sizeof(uint8_t));
467
+ write(os, val);
468
468
  }
469
469
  }
470
470
 
@@ -481,17 +481,17 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
481
481
  const char* base = ptr;
482
482
  const char* end_ptr = ptr + size;
483
483
  uint8_t first_byte;
484
- ptr += copy_from_mem(ptr, &first_byte, sizeof(first_byte));
484
+ ptr += copy_from_mem(ptr, first_byte);
485
485
  uint8_t preamble_longs = first_byte & 0x3f;
486
486
  resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
487
487
  uint8_t serial_version;
488
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
488
+ ptr += copy_from_mem(ptr, serial_version);
489
489
  uint8_t family_id;
490
- ptr += copy_from_mem(ptr, &family_id, sizeof(family_id));
490
+ ptr += copy_from_mem(ptr, family_id);
491
491
  uint8_t flags;
492
- ptr += copy_from_mem(ptr, &flags, sizeof(flags));
492
+ ptr += copy_from_mem(ptr, flags);
493
493
  uint32_t k;
494
- ptr += copy_from_mem(ptr, &k, sizeof(k));
494
+ ptr += copy_from_mem(ptr, k);
495
495
 
496
496
  check_preamble_longs(preamble_longs, flags);
497
497
  check_family_and_serialization_version(family_id, serial_version);
@@ -507,16 +507,16 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
507
507
  // second and third prelongs
508
508
  uint64_t n;
509
509
  uint32_t h, r;
510
- ptr += copy_from_mem(ptr, &n, sizeof(n));
511
- ptr += copy_from_mem(ptr, &h, sizeof(h));
512
- ptr += copy_from_mem(ptr, &r, sizeof(r));
510
+ ptr += copy_from_mem(ptr, n);
511
+ ptr += copy_from_mem(ptr, h);
512
+ ptr += copy_from_mem(ptr, r);
513
513
 
514
514
  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
515
515
 
516
516
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
517
517
  double total_wt_r = 0.0;
518
518
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
519
- ptr += copy_from_mem(ptr, &total_wt_r, sizeof(total_wt_r));
519
+ ptr += copy_from_mem(ptr, total_wt_r);
520
520
  if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
521
521
  throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
522
522
  "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
@@ -548,7 +548,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
548
548
  check_memory_size(ptr - base + size_marks, size);
549
549
  for (uint32_t i = 0; i < h; ++i) {
550
550
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
551
- ptr += copy_from_mem(ptr, &val, sizeof(val));
551
+ ptr += copy_from_mem(ptr, val);
552
552
  }
553
553
  marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
554
554
  num_marks_in_h += (marks.get()[i] ? 1 : 0);
@@ -571,18 +571,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
571
571
 
572
572
  template<typename T, typename S, typename A>
573
573
  var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
574
- uint8_t first_byte;
575
- is.read((char*)&first_byte, sizeof(first_byte));
574
+ const auto first_byte = read<uint8_t>(is);
576
575
  uint8_t preamble_longs = first_byte & 0x3f;
577
- resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
578
- uint8_t serial_version;
579
- is.read((char*)&serial_version, sizeof(serial_version));
580
- uint8_t family_id;
581
- is.read((char*)&family_id, sizeof(family_id));
582
- uint8_t flags;
583
- is.read((char*)&flags, sizeof(flags));
584
- uint32_t k;
585
- is.read((char*)&k, sizeof(k));
576
+ const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
577
+ const auto serial_version = read<uint8_t>(is);
578
+ const auto family_id = read<uint8_t>(is);
579
+ const auto flags = read<uint8_t>(is);
580
+ const auto k = read<uint32_t>(is);
586
581
 
587
582
  check_preamble_longs(preamble_longs, flags);
588
583
  check_family_and_serialization_version(family_id, serial_version);
@@ -598,31 +593,27 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
598
593
  }
599
594
 
600
595
  // second and third prelongs
601
- uint64_t n;
602
- uint32_t h, r;
603
- is.read((char*)&n, sizeof(n));
604
- is.read((char*)&h, sizeof(h));
605
- is.read((char*)&r, sizeof(r));
596
+ const auto n = read<uint64_t>(is);
597
+ const auto h = read<uint32_t>(is);
598
+ const auto r = read<uint32_t>(is);
606
599
 
607
600
  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
608
601
 
609
602
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
610
603
  double total_wt_r = 0.0;
611
604
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
612
- is.read((char*)&total_wt_r, sizeof(total_wt_r));
605
+ total_wt_r = read<double>(is);
613
606
  if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
614
607
  throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
615
608
  "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
616
609
  }
617
- } else {
618
- total_wt_r = 0.0;
619
610
  }
620
611
 
621
612
  // read the first h weights, fill remainder with -1.0
622
613
  std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
623
614
  weights_deleter(array_size, allocator));
624
615
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
625
- is.read((char*)wts, h * sizeof(double));
616
+ read(is, wts, h * sizeof(double));
626
617
  for (size_t i = 0; i < h; ++i) {
627
618
  if (!(wts[i] > 0.0)) {
628
619
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
@@ -638,7 +629,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
638
629
  uint8_t val = 0;
639
630
  for (uint32_t i = 0; i < h; ++i) {
640
631
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
641
- is.read((char*)&val, sizeof(val));
632
+ val = read<uint8_t>(is);
642
633
  }
643
634
  marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
644
635
  num_marks_in_h += (marks.get()[i] ? 1 : 0);
@@ -740,8 +731,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
740
731
 
741
732
  template<typename T, typename S, typename A>
742
733
  string<A> var_opt_sketch<T,S,A>::to_string() const {
743
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
744
- os << "### VarOpt SUMMARY: " << std::endl;
734
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
735
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
736
+ std::ostringstream os;
737
+ os << "### VarOpt SUMMARY:" << std::endl;
745
738
  os << " k : " << k_ << std::endl;
746
739
  os << " h : " << h_ << std::endl;
747
740
  os << " r : " << r_ << std::endl;
@@ -749,24 +742,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
749
742
  os << " Current size : " << curr_items_alloc_ << std::endl;
750
743
  os << " Resize factor: " << (1 << rf_) << std::endl;
751
744
  os << "### END SKETCH SUMMARY" << std::endl;
752
- return os.str();
745
+ return string<A>(os.str().c_str(), allocator_);
753
746
  }
754
747
 
755
748
  template<typename T, typename S, typename A>
756
749
  string<A> var_opt_sketch<T,S,A>::items_to_string() const {
757
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
750
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
751
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
752
+ std::ostringstream os;
758
753
  os << "### Sketch Items" << std::endl;
759
754
  int idx = 0;
760
755
  for (auto record : *this) {
761
756
  os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
762
757
  ++idx;
763
758
  }
764
- return os.str();
759
+ return string<A>(os.str().c_str(), allocator_);
765
760
  }
766
761
 
767
762
  template<typename T, typename S, typename A>
768
763
  string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
769
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
764
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
765
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
766
+ std::ostringstream os;
770
767
  os << "### Sketch Items" << std::endl;
771
768
  const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
772
769
  for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
@@ -783,7 +780,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
783
780
  ++display_idx;
784
781
  }
785
782
  }
786
- return os.str();
783
+ return string<A>(os.str().c_str(), allocator_);
787
784
  }
788
785
 
789
786
  template<typename T, typename S, typename A>
@@ -1420,7 +1417,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1420
1417
  if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
1421
1418
  throw std::logic_error("invalid sampling rate outside [0.0, 1.0]");
1422
1419
 
1423
- size_t r_true_count = 0;
1420
+ uint32_t r_true_count = 0;
1424
1421
  ++idx; // skip the gap
1425
1422
  for (; idx < (k_ + 1); ++idx) {
1426
1423
  if (predicate(data_[idx])) {