datasketches 0.2.0 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -55,15 +55,15 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
55
55
 
56
56
  TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
57
57
  req_sketch<float> sketch(12, false);
58
- sketch.update(1);
58
+ sketch.update(1.0f);
59
59
  REQUIRE_FALSE(sketch.is_HRA());
60
60
  REQUIRE_FALSE(sketch.is_empty());
61
61
  REQUIRE_FALSE(sketch.is_estimation_mode());
62
62
  REQUIRE(sketch.get_n() == 1);
63
63
  REQUIRE(sketch.get_num_retained() == 1);
64
- REQUIRE(sketch.get_rank(1) == 0);
65
- REQUIRE(sketch.get_rank<true>(1) == 1);
66
- REQUIRE(sketch.get_rank(1.1) == 1);
64
+ REQUIRE(sketch.get_rank(1.0f) == 0);
65
+ REQUIRE(sketch.get_rank<true>(1.0f) == 1);
66
+ REQUIRE(sketch.get_rank(1.1f) == 1);
67
67
  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
68
68
  REQUIRE(sketch.get_quantile(0) == 1);
69
69
  REQUIRE(sketch.get_quantile(0.5) == 1);
@@ -86,43 +86,43 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
86
86
 
87
87
  TEST_CASE("req sketch: repeated values", "[req_sketch]") {
88
88
  req_sketch<float> sketch(12);
89
- sketch.update(1);
90
- sketch.update(1);
91
- sketch.update(1);
92
- sketch.update(2);
93
- sketch.update(2);
94
- sketch.update(2);
89
+ sketch.update(1.0f);
90
+ sketch.update(1.0f);
91
+ sketch.update(1.0f);
92
+ sketch.update(2.0f);
93
+ sketch.update(2.0f);
94
+ sketch.update(2.0f);
95
95
  REQUIRE_FALSE(sketch.is_empty());
96
96
  REQUIRE_FALSE(sketch.is_estimation_mode());
97
97
  REQUIRE(sketch.get_n() == 6);
98
98
  REQUIRE(sketch.get_num_retained() == 6);
99
- REQUIRE(sketch.get_rank(1) == 0);
100
- REQUIRE(sketch.get_rank<true>(1) == 0.5);
101
- REQUIRE(sketch.get_rank(2) == 0.5);
102
- REQUIRE(sketch.get_rank<true>(2) == 1);
99
+ REQUIRE(sketch.get_rank(1.0f) == 0);
100
+ REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
101
+ REQUIRE(sketch.get_rank(2.0f) == 0.5);
102
+ REQUIRE(sketch.get_rank<true>(2.0f) == 1);
103
103
  }
104
104
 
105
105
  TEST_CASE("req sketch: exact mode", "[req_sketch]") {
106
106
  req_sketch<float> sketch(12);
107
- for (size_t i = 1; i <= 10; ++i) sketch.update(i);
107
+ for (size_t i = 1; i <= 10; ++i) sketch.update(static_cast<float>(i));
108
108
  REQUIRE_FALSE(sketch.is_empty());
109
109
  REQUIRE_FALSE(sketch.is_estimation_mode());
110
110
  REQUIRE(sketch.get_n() == 10);
111
111
  REQUIRE(sketch.get_num_retained() == 10);
112
112
 
113
113
  // like KLL
114
- REQUIRE(sketch.get_rank(1) == 0);
115
- REQUIRE(sketch.get_rank(2) == 0.1);
116
- REQUIRE(sketch.get_rank(6) == 0.5);
117
- REQUIRE(sketch.get_rank(9) == 0.8);
118
- REQUIRE(sketch.get_rank(10) == 0.9);
114
+ REQUIRE(sketch.get_rank(1.0f) == 0);
115
+ REQUIRE(sketch.get_rank(2.0f) == 0.1);
116
+ REQUIRE(sketch.get_rank(6.0f) == 0.5);
117
+ REQUIRE(sketch.get_rank(9.0f) == 0.8);
118
+ REQUIRE(sketch.get_rank(10.0f) == 0.9);
119
119
 
120
120
  // inclusive
121
- REQUIRE(sketch.get_rank<true>(1) == 0.1);
122
- REQUIRE(sketch.get_rank<true>(2) == 0.2);
123
- REQUIRE(sketch.get_rank<true>(5) == 0.5);
124
- REQUIRE(sketch.get_rank<true>(9) == 0.9);
125
- REQUIRE(sketch.get_rank<true>(10) == 1);
121
+ REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
122
+ REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
123
+ REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
124
+ REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
125
+ REQUIRE(sketch.get_rank<true>(10.0f) == 1);
126
126
 
127
127
  // like KLL
128
128
  REQUIRE(sketch.get_quantile(0) == 1);
@@ -164,16 +164,16 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
164
164
  TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
165
165
  req_sketch<float> sketch(12);
166
166
  const size_t n = 100000;
167
- for (size_t i = 0; i < n; ++i) sketch.update(i);
167
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
168
168
  REQUIRE_FALSE(sketch.is_empty());
169
169
  REQUIRE(sketch.is_estimation_mode());
170
170
  REQUIRE(sketch.get_n() == n);
171
171
  // std::cout << sketch.to_string(true);
172
172
  REQUIRE(sketch.get_num_retained() < n);
173
173
  REQUIRE(sketch.get_rank(0) == 0);
174
- REQUIRE(sketch.get_rank(n) == 1);
175
- REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
176
- REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
174
+ REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
175
+ REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
176
+ REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
177
177
  REQUIRE(sketch.get_min_value() == 0);
178
178
  REQUIRE(sketch.get_max_value() == n - 1);
179
179
  REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
@@ -219,7 +219,7 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
219
219
 
220
220
  TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
221
221
  req_sketch<float> sketch(12);
222
- sketch.update(1);
222
+ sketch.update(1.0f);
223
223
 
224
224
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
225
225
  sketch.serialize(s);
@@ -235,7 +235,7 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
235
235
 
236
236
  TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
237
237
  req_sketch<float> sketch(12);
238
- sketch.update(1);
238
+ sketch.update(1.0f);
239
239
 
240
240
  auto bytes = sketch.serialize();
241
241
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
@@ -253,7 +253,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
253
253
  TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
254
254
  req_sketch<float> sketch(12);
255
255
  const size_t n = 50;
256
- for (size_t i = 0; i < n; ++i) sketch.update(i);
256
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
257
257
  REQUIRE_FALSE(sketch.is_estimation_mode());
258
258
 
259
259
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -271,7 +271,7 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
271
271
  TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
272
272
  req_sketch<float> sketch(12);
273
273
  const size_t n = 50;
274
- for (size_t i = 0; i < n; ++i) sketch.update(i);
274
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
275
275
  REQUIRE_FALSE(sketch.is_estimation_mode());
276
276
 
277
277
  auto bytes = sketch.serialize();
@@ -290,7 +290,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
290
290
  TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
291
291
  req_sketch<float> sketch(12);
292
292
  const size_t n = 100000;
293
- for (size_t i = 0; i < n; ++i) sketch.update(i);
293
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
294
294
  REQUIRE(sketch.is_estimation_mode());
295
295
 
296
296
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -308,7 +308,7 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
308
308
  TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
309
309
  req_sketch<float> sketch(12);
310
310
  const size_t n = 100000;
311
- for (size_t i = 0; i < n; ++i) sketch.update(i);
311
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
312
312
  REQUIRE(sketch.is_estimation_mode());
313
313
 
314
314
  auto bytes = sketch.serialize();
@@ -326,7 +326,7 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
326
326
  TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
327
327
  req_sketch<float> sketch(12);
328
328
  const size_t n = 100000;
329
- for (size_t i = 0; i < n; ++i) sketch.update(i);
329
+ for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
330
330
  REQUIRE(sketch.is_estimation_mode());
331
331
 
332
332
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
@@ -373,8 +373,8 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
373
373
  REQUIRE(sketch.get_num_retained() == 1);
374
374
  REQUIRE(sketch.get_min_value() == 1);
375
375
  REQUIRE(sketch.get_max_value() == 1);
376
- REQUIRE(sketch.get_rank(1) == 0);
377
- REQUIRE(sketch.get_rank<true>(1) == 1);
376
+ REQUIRE(sketch.get_rank(1.0f) == 0);
377
+ REQUIRE(sketch.get_rank<true>(1.0f) == 1);
378
378
  }
379
379
 
380
380
  TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
@@ -388,7 +388,7 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
388
388
  REQUIRE(sketch.get_num_retained() == 4);
389
389
  REQUIRE(sketch.get_min_value() == 0);
390
390
  REQUIRE(sketch.get_max_value() == 3);
391
- REQUIRE(sketch.get_rank(2) == 0.5);
391
+ REQUIRE(sketch.get_rank(2.0f) == 0.5);
392
392
  }
393
393
 
394
394
  TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
@@ -402,7 +402,7 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
402
402
  REQUIRE(sketch.get_num_retained() == 100);
403
403
  REQUIRE(sketch.get_min_value() == 0);
404
404
  REQUIRE(sketch.get_max_value() == 99);
405
- REQUIRE(sketch.get_rank(50) == 0.5);
405
+ REQUIRE(sketch.get_rank(50.0f) == 0.5);
406
406
  }
407
407
 
408
408
  TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
@@ -416,14 +416,14 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
416
416
  REQUIRE(sketch.get_num_retained() == 2942);
417
417
  REQUIRE(sketch.get_min_value() == 0);
418
418
  REQUIRE(sketch.get_max_value() == 9999);
419
- REQUIRE(sketch.get_rank(5000) == 0.5);
419
+ REQUIRE(sketch.get_rank(5000.0f) == 0.5);
420
420
  }
421
421
 
422
422
  TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
423
423
  req_sketch<float> sketch1(40);
424
424
 
425
425
  req_sketch<float> sketch2(40);
426
- for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
426
+ for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
427
427
 
428
428
  sketch1.merge(sketch2);
429
429
  REQUIRE(sketch1.get_min_value() == 0);
@@ -431,15 +431,15 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
431
431
  REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
432
432
  REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
433
433
  REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
434
- REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
434
+ REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
435
435
  }
436
436
 
437
437
  TEST_CASE("req sketch: merge", "[req_sketch]") {
438
438
  req_sketch<float> sketch1(100);
439
- for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
439
+ for (size_t i = 0; i < 1000; ++i) sketch1.update(static_cast<float>(i));
440
440
 
441
441
  req_sketch<float> sketch2(100);
442
- for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
442
+ for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
443
443
 
444
444
  sketch1.merge(sketch2);
445
445
  REQUIRE(sketch1.get_min_value() == 0);
@@ -447,18 +447,18 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
447
447
  REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
448
448
  REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
449
449
  REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
450
- REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
450
+ REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
451
451
  }
452
452
 
453
453
  TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
454
454
  req_sketch<float> sketch1(12);
455
- for (size_t i = 0; i < 40; ++i) sketch1.update(i);
455
+ for (size_t i = 0; i < 40; ++i) sketch1.update(static_cast<float>(i));
456
456
 
457
457
  req_sketch<float> sketch2(12);
458
- for (size_t i = 40; i < 80; ++i) sketch2.update(i);
458
+ for (size_t i = 40; i < 80; ++i) sketch2.update(static_cast<float>(i));
459
459
 
460
460
  req_sketch<float> sketch3(12);
461
- for (size_t i = 80; i < 120; ++i) sketch3.update(i);
461
+ for (size_t i = 80; i < 120; ++i) sketch3.update(static_cast<float>(i));
462
462
 
463
463
  req_sketch<float> sketch(12);
464
464
  sketch.merge(sketch1);
@@ -467,15 +467,15 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
467
467
  REQUIRE(sketch.get_min_value() == 0);
468
468
  REQUIRE(sketch.get_max_value() == 119);
469
469
  REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
470
- REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
470
+ REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
471
471
  }
472
472
 
473
473
  TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
474
474
  req_sketch<float> sketch1(12);
475
- sketch1.update(1);
475
+ sketch1.update(1.0f);
476
476
 
477
477
  req_sketch<float> sketch2(12, false);
478
- sketch2.update(1);
478
+ sketch2.update(1.0f);
479
479
 
480
480
  REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
481
481
  }
@@ -32,17 +32,13 @@ target_include_directories(sampling
32
32
  target_link_libraries(sampling INTERFACE common)
33
33
  target_compile_features(sampling INTERFACE cxx_std_11)
34
34
 
35
- set(sampling_HEADERS "include/var_opt_sketch.hpp;include/var_opt_sketch_impl.hpp")
36
-
37
35
  install(TARGETS sampling
38
36
  EXPORT ${PROJECT_NAME}
39
37
  )
40
38
 
41
- install(FILES ${sampling_HEADERS}
39
+ install(FILES
40
+ include/var_opt_sketch.hpp
41
+ include/var_opt_sketch_impl.hpp
42
+ include/var_opt_union.hpp
43
+ include/var_opt_union_impl.hpp
42
44
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
43
-
44
- target_sources(sampling
45
- INTERFACE
46
- ${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch.hpp
47
- ${CMAKE_CURRENT_SOURCE_DIR}/include/var_opt_sketch_impl.hpp
48
- )
@@ -51,18 +51,23 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- enum resize_factor { X1 = 0, X2, X4, X8 };
55
-
56
54
  template <typename T, typename S, typename A> class var_opt_union; // forward declaration
57
55
 
56
+ namespace var_opt_constants {
57
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
58
+ const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
59
+ }
60
+
58
61
  template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
59
62
  class var_opt_sketch {
60
63
 
61
64
  public:
62
- static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
- static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
65
+ static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
66
+ static const uint32_t MAX_K = var_opt_constants::MAX_K;
64
67
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
68
+ explicit var_opt_sketch(uint32_t k,
69
+ resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
70
+ const A& allocator = A());
66
71
  var_opt_sketch(const var_opt_sketch& other);
67
72
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
73
 
@@ -128,7 +128,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
128
128
  r_(r_count),
129
129
  n_(n),
130
130
  total_wt_r_(total_wt_r),
131
- rf_(DEFAULT_RESIZE_FACTOR),
131
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
132
132
  curr_items_alloc_(len),
133
133
  filled_data_(n > k),
134
134
  allocator_(allocator),
@@ -334,7 +334,7 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
334
334
  num_bytes += (h_ / 8) + (h_ % 8 > 0);
335
335
  }
336
336
  // must iterate over the items
337
- for (auto& it: *this)
337
+ for (auto it: *this)
338
338
  num_bytes += S().size_of_item(it.first);
339
339
  return num_bytes;
340
340
  }
@@ -359,21 +359,21 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
359
359
  // first prelong
360
360
  uint8_t ser_ver(SER_VER);
361
361
  uint8_t family(FAMILY_ID);
362
- ptr += copy_to_mem(&first_byte, ptr, sizeof(uint8_t));
363
- ptr += copy_to_mem(&ser_ver, ptr, sizeof(uint8_t));
364
- ptr += copy_to_mem(&family, ptr, sizeof(uint8_t));
365
- ptr += copy_to_mem(&flags, ptr, sizeof(uint8_t));
366
- ptr += copy_to_mem(&k_, ptr, sizeof(uint32_t));
362
+ ptr += copy_to_mem(first_byte, ptr);
363
+ ptr += copy_to_mem(ser_ver, ptr);
364
+ ptr += copy_to_mem(family, ptr);
365
+ ptr += copy_to_mem(flags, ptr);
366
+ ptr += copy_to_mem(k_, ptr);
367
367
 
368
368
  if (!empty) {
369
369
  // second and third prelongs
370
- ptr += copy_to_mem(&n_, ptr, sizeof(uint64_t));
371
- ptr += copy_to_mem(&h_, ptr, sizeof(uint32_t));
372
- ptr += copy_to_mem(&r_, ptr, sizeof(uint32_t));
370
+ ptr += copy_to_mem(n_, ptr);
371
+ ptr += copy_to_mem(h_, ptr);
372
+ ptr += copy_to_mem(r_, ptr);
373
373
 
374
374
  // fourth prelong, if needed
375
375
  if (r_ > 0) {
376
- ptr += copy_to_mem(&total_wt_r_, ptr, sizeof(double));
376
+ ptr += copy_to_mem(total_wt_r_, ptr);
377
377
  }
378
378
 
379
379
  // first h_ weights
@@ -388,14 +388,14 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
388
388
  }
389
389
 
390
390
  if ((i & 0x7) == 0x7) {
391
- ptr += copy_to_mem(&val, ptr, sizeof(uint8_t));
391
+ ptr += copy_to_mem(val, ptr);
392
392
  val = 0;
393
393
  }
394
394
  }
395
395
 
396
396
  // write out any remaining values
397
397
  if ((h_ & 0x7) > 0) {
398
- ptr += copy_to_mem(&val, ptr, sizeof(uint8_t));
398
+ ptr += copy_to_mem(val, ptr);
399
399
  }
400
400
  }
401
401
 
@@ -428,25 +428,25 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
428
428
  // first prelong
429
429
  const uint8_t ser_ver(SER_VER);
430
430
  const uint8_t family(FAMILY_ID);
431
- os.write((char*)&first_byte, sizeof(uint8_t));
432
- os.write((char*)&ser_ver, sizeof(uint8_t));
433
- os.write((char*)&family, sizeof(uint8_t));
434
- os.write((char*)&flags, sizeof(uint8_t));
435
- os.write((char*)&k_, sizeof(uint32_t));
431
+ write(os, first_byte);
432
+ write(os, ser_ver);
433
+ write(os, family);
434
+ write(os, flags);
435
+ write(os, k_);
436
436
 
437
437
  if (!empty) {
438
438
  // second and third prelongs
439
- os.write((char*)&n_, sizeof(uint64_t));
440
- os.write((char*)&h_, sizeof(uint32_t));
441
- os.write((char*)&r_, sizeof(uint32_t));
439
+ write(os, n_);
440
+ write(os, h_);
441
+ write(os, r_);
442
442
 
443
443
  // fourth prelong, if needed
444
444
  if (r_ > 0) {
445
- os.write((char*)&total_wt_r_, sizeof(double));
445
+ write(os, total_wt_r_);
446
446
  }
447
447
 
448
448
  // write the first h_ weights
449
- os.write((char*)weights_, h_ * sizeof(double));
449
+ write(os, weights_, h_ * sizeof(double));
450
450
 
451
451
  // write the first h_ marks as packed bytes iff we have a gadget
452
452
  if (marks_ != nullptr) {
@@ -457,14 +457,14 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
457
457
  }
458
458
 
459
459
  if ((i & 0x7) == 0x7) {
460
- os.write((char*)&val, sizeof(uint8_t));
460
+ write(os, val);
461
461
  val = 0;
462
462
  }
463
463
  }
464
464
 
465
465
  // write out any remaining values
466
466
  if ((h_ & 0x7) > 0) {
467
- os.write((char*)&val, sizeof(uint8_t));
467
+ write(os, val);
468
468
  }
469
469
  }
470
470
 
@@ -481,17 +481,17 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
481
481
  const char* base = ptr;
482
482
  const char* end_ptr = ptr + size;
483
483
  uint8_t first_byte;
484
- ptr += copy_from_mem(ptr, &first_byte, sizeof(first_byte));
484
+ ptr += copy_from_mem(ptr, first_byte);
485
485
  uint8_t preamble_longs = first_byte & 0x3f;
486
486
  resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
487
487
  uint8_t serial_version;
488
- ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
488
+ ptr += copy_from_mem(ptr, serial_version);
489
489
  uint8_t family_id;
490
- ptr += copy_from_mem(ptr, &family_id, sizeof(family_id));
490
+ ptr += copy_from_mem(ptr, family_id);
491
491
  uint8_t flags;
492
- ptr += copy_from_mem(ptr, &flags, sizeof(flags));
492
+ ptr += copy_from_mem(ptr, flags);
493
493
  uint32_t k;
494
- ptr += copy_from_mem(ptr, &k, sizeof(k));
494
+ ptr += copy_from_mem(ptr, k);
495
495
 
496
496
  check_preamble_longs(preamble_longs, flags);
497
497
  check_family_and_serialization_version(family_id, serial_version);
@@ -507,16 +507,16 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
507
507
  // second and third prelongs
508
508
  uint64_t n;
509
509
  uint32_t h, r;
510
- ptr += copy_from_mem(ptr, &n, sizeof(n));
511
- ptr += copy_from_mem(ptr, &h, sizeof(h));
512
- ptr += copy_from_mem(ptr, &r, sizeof(r));
510
+ ptr += copy_from_mem(ptr, n);
511
+ ptr += copy_from_mem(ptr, h);
512
+ ptr += copy_from_mem(ptr, r);
513
513
 
514
514
  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
515
515
 
516
516
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
517
517
  double total_wt_r = 0.0;
518
518
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
519
- ptr += copy_from_mem(ptr, &total_wt_r, sizeof(total_wt_r));
519
+ ptr += copy_from_mem(ptr, total_wt_r);
520
520
  if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
521
521
  throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
522
522
  "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
@@ -548,7 +548,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
548
548
  check_memory_size(ptr - base + size_marks, size);
549
549
  for (uint32_t i = 0; i < h; ++i) {
550
550
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
551
- ptr += copy_from_mem(ptr, &val, sizeof(val));
551
+ ptr += copy_from_mem(ptr, val);
552
552
  }
553
553
  marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
554
554
  num_marks_in_h += (marks.get()[i] ? 1 : 0);
@@ -571,18 +571,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
571
571
 
572
572
  template<typename T, typename S, typename A>
573
573
  var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
574
- uint8_t first_byte;
575
- is.read((char*)&first_byte, sizeof(first_byte));
574
+ const auto first_byte = read<uint8_t>(is);
576
575
  uint8_t preamble_longs = first_byte & 0x3f;
577
- resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
578
- uint8_t serial_version;
579
- is.read((char*)&serial_version, sizeof(serial_version));
580
- uint8_t family_id;
581
- is.read((char*)&family_id, sizeof(family_id));
582
- uint8_t flags;
583
- is.read((char*)&flags, sizeof(flags));
584
- uint32_t k;
585
- is.read((char*)&k, sizeof(k));
576
+ const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
577
+ const auto serial_version = read<uint8_t>(is);
578
+ const auto family_id = read<uint8_t>(is);
579
+ const auto flags = read<uint8_t>(is);
580
+ const auto k = read<uint32_t>(is);
586
581
 
587
582
  check_preamble_longs(preamble_longs, flags);
588
583
  check_family_and_serialization_version(family_id, serial_version);
@@ -598,31 +593,27 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
598
593
  }
599
594
 
600
595
  // second and third prelongs
601
- uint64_t n;
602
- uint32_t h, r;
603
- is.read((char*)&n, sizeof(n));
604
- is.read((char*)&h, sizeof(h));
605
- is.read((char*)&r, sizeof(r));
596
+ const auto n = read<uint64_t>(is);
597
+ const auto h = read<uint32_t>(is);
598
+ const auto r = read<uint32_t>(is);
606
599
 
607
600
  const uint32_t array_size = validate_and_get_target_size(preamble_longs, k, n, h, r, rf);
608
601
 
609
602
  // current_items_alloc_ is set but validate R region weight (4th prelong), if needed, before allocating
610
603
  double total_wt_r = 0.0;
611
604
  if (preamble_longs == PREAMBLE_LONGS_FULL) {
612
- is.read((char*)&total_wt_r, sizeof(total_wt_r));
605
+ total_wt_r = read<double>(is);
613
606
  if (std::isnan(total_wt_r) || r == 0 || total_wt_r <= 0.0) {
614
607
  throw std::invalid_argument("Possible corruption: deserializing in full mode but r = 0 or invalid R weight. "
615
608
  "Found r = " + std::to_string(r) + ", R region weight = " + std::to_string(total_wt_r));
616
609
  }
617
- } else {
618
- total_wt_r = 0.0;
619
610
  }
620
611
 
621
612
  // read the first h weights, fill remainder with -1.0
622
613
  std::unique_ptr<double, weights_deleter> weights(AllocDouble(allocator).allocate(array_size),
623
614
  weights_deleter(array_size, allocator));
624
615
  double* wts = weights.get(); // to avoid lots of .get() calls -- do not delete
625
- is.read((char*)wts, h * sizeof(double));
616
+ read(is, wts, h * sizeof(double));
626
617
  for (size_t i = 0; i < h; ++i) {
627
618
  if (!(wts[i] > 0.0)) {
628
619
  throw std::invalid_argument("Possible corruption: Non-positive weight when deserializing: " + std::to_string(wts[i]));
@@ -638,7 +629,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
638
629
  uint8_t val = 0;
639
630
  for (uint32_t i = 0; i < h; ++i) {
640
631
  if ((i & 0x7) == 0x0) { // should trigger on first iteration
641
- is.read((char*)&val, sizeof(val));
632
+ val = read<uint8_t>(is);
642
633
  }
643
634
  marks.get()[i] = ((val >> (i & 0x7)) & 0x1) == 1;
644
635
  num_marks_in_h += (marks.get()[i] ? 1 : 0);
@@ -740,8 +731,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
740
731
 
741
732
  template<typename T, typename S, typename A>
742
733
  string<A> var_opt_sketch<T,S,A>::to_string() const {
743
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
744
- os << "### VarOpt SUMMARY: " << std::endl;
734
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
735
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
736
+ std::ostringstream os;
737
+ os << "### VarOpt SUMMARY:" << std::endl;
745
738
  os << " k : " << k_ << std::endl;
746
739
  os << " h : " << h_ << std::endl;
747
740
  os << " r : " << r_ << std::endl;
@@ -749,24 +742,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
749
742
  os << " Current size : " << curr_items_alloc_ << std::endl;
750
743
  os << " Resize factor: " << (1 << rf_) << std::endl;
751
744
  os << "### END SKETCH SUMMARY" << std::endl;
752
- return os.str();
745
+ return string<A>(os.str().c_str(), allocator_);
753
746
  }
754
747
 
755
748
  template<typename T, typename S, typename A>
756
749
  string<A> var_opt_sketch<T,S,A>::items_to_string() const {
757
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
750
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
751
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
752
+ std::ostringstream os;
758
753
  os << "### Sketch Items" << std::endl;
759
754
  int idx = 0;
760
755
  for (auto record : *this) {
761
756
  os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
762
757
  ++idx;
763
758
  }
764
- return os.str();
759
+ return string<A>(os.str().c_str(), allocator_);
765
760
  }
766
761
 
767
762
  template<typename T, typename S, typename A>
768
763
  string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
769
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
764
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
765
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
766
+ std::ostringstream os;
770
767
  os << "### Sketch Items" << std::endl;
771
768
  const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
772
769
  for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
@@ -783,7 +780,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
783
780
  ++display_idx;
784
781
  }
785
782
  }
786
- return os.str();
783
+ return string<A>(os.str().c_str(), allocator_);
787
784
  }
788
785
 
789
786
  template<typename T, typename S, typename A>
@@ -1420,7 +1417,7 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
1420
1417
  if (effective_sampling_rate < 0.0 || effective_sampling_rate > 1.0)
1421
1418
  throw std::logic_error("invalid sampling rate outside [0.0, 1.0]");
1422
1419
 
1423
- size_t r_true_count = 0;
1420
+ uint32_t r_true_count = 0;
1424
1421
  ++idx; // skip the gap
1425
1422
  for (; idx < (k_ + 1); ++idx) {
1426
1423
  if (predicate(data_[idx])) {