datasketches 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -23,6 +23,7 @@
23
23
  #include <cstring>
24
24
  #include <limits>
25
25
  #include <sstream>
26
+ #include <stdexcept>
26
27
 
27
28
  #include "memory_operations.hpp"
28
29
 
@@ -160,7 +161,8 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
160
161
  }
161
162
 
162
163
  template<typename T, typename W, typename H, typename E, typename S, typename A>
163
- void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const {
164
+ template<typename SerDe>
165
+ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
164
166
  const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
165
167
  write(os, preamble_longs);
166
168
  const uint8_t serial_version = SERIAL_VERSION;
@@ -198,23 +200,25 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
198
200
  }
199
201
  write(os, weights, sizeof(W) * num_items);
200
202
  aw.deallocate(weights, num_items);
201
- S().serialize(os, items, num_items);
203
+ sd.serialize(os, items, num_items);
202
204
  for (i = 0; i < num_items; i++) items[i].~T();
203
205
  alloc.deallocate(items, num_items);
204
206
  }
205
207
  }
206
208
 
207
209
  template<typename T, typename W, typename H, typename E, typename S, typename A>
208
- size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() const {
210
+ template<typename SerDe>
211
+ size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
209
212
  if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
210
213
  size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
211
- for (auto it: map) size += S().size_of_item(it.first);
214
+ for (auto it: map) size += sd.size_of_item(it.first);
212
215
  return size;
213
216
  }
214
217
 
215
218
  template<typename T, typename W, typename H, typename E, typename S, typename A>
216
- auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
217
- const size_t size = header_size_bytes + get_serialized_size_bytes();
219
+ template<typename SerDe>
220
+ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
221
+ const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
218
222
  vector_bytes bytes(size, 0, map.get_allocator());
219
223
  uint8_t* ptr = bytes.data() + header_size_bytes;
220
224
  uint8_t* end_ptr = ptr + size;
@@ -255,7 +259,7 @@ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_byt
255
259
  ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
256
260
  aw.deallocate(weights, num_items);
257
261
  const size_t bytes_remaining = end_ptr - ptr;
258
- ptr += S().serialize(ptr, bytes_remaining, items, num_items);
262
+ ptr += sd.serialize(ptr, bytes_remaining, items, num_items);
259
263
  for (i = 0; i < num_items; i++) items[i].~T();
260
264
  alloc.deallocate(items, num_items);
261
265
  }
@@ -284,6 +288,12 @@ private:
284
288
 
285
289
  template<typename T, typename W, typename H, typename E, typename S, typename A>
286
290
  frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
291
+ return deserialize(is, S(), allocator);
292
+ }
293
+
294
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
295
+ template<typename SerDe>
296
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
287
297
  const auto preamble_longs = read<uint8_t>(is);
288
298
  const auto serial_version = read<uint8_t>(is);
289
299
  const auto family_id = read<uint8_t>(is);
@@ -312,7 +322,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
312
322
  read(is, weights.data(), sizeof(W) * num_items);
313
323
  A alloc(allocator);
314
324
  std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
315
- S().deserialize(is, items.get(), num_items);
325
+ sd.deserialize(is, items.get(), num_items);
316
326
  items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
317
327
  for (uint32_t i = 0; i < num_items; i++) {
318
328
  sketch.update(std::move(items.get()[i]), weights[i]);
@@ -327,6 +337,12 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
327
337
 
328
338
  template<typename T, typename W, typename H, typename E, typename S, typename A>
329
339
  frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
340
+ return deserialize(bytes, size, S(), allocator);
341
+ }
342
+
343
+ template<typename T, typename W, typename H, typename E, typename S, typename A>
344
+ template<typename SerDe>
345
+ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
330
346
  ensure_minimum_memory(size, 8);
331
347
  const char* ptr = static_cast<const char*>(bytes);
332
348
  const char* base = static_cast<const char*>(bytes);
@@ -350,7 +366,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
350
366
  check_serial_version(serial_version);
351
367
  check_family_id(family_id);
352
368
  check_size(lg_cur_size, lg_max_size);
353
- ensure_minimum_memory(size, 1ULL << preamble_longs);
369
+ ensure_minimum_memory(size, preamble_longs * sizeof(uint64_t));
354
370
 
355
371
  frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
356
372
  if (!is_empty) {
@@ -370,7 +386,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
370
386
  A alloc(allocator);
371
387
  std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
372
388
  const size_t bytes_remaining = size - (ptr - base);
373
- ptr += S().deserialize(ptr, bytes_remaining, items.get(), num_items);
389
+ ptr += sd.deserialize(ptr, bytes_remaining, items.get(), num_items);
374
390
  items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
375
391
  for (uint32_t i = 0; i < num_items; i++) {
376
392
  sketch.update(std::move(items.get()[i]), weights[i]);
@@ -421,7 +437,9 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, ui
421
437
 
422
438
  template<typename T, typename W, typename H, typename E, typename S, typename A>
423
439
  string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
424
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
440
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
441
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
442
+ std::ostringstream os;
425
443
  os << "### Frequent items sketch summary:" << std::endl;
426
444
  os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
427
445
  os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
@@ -444,7 +462,7 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
444
462
  }
445
463
  os << "### End items" << std::endl;
446
464
  }
447
- return os.str();
465
+ return string<A>(os.str().c_str(), map.get_allocator());
448
466
  }
449
467
 
450
468
  // version for integral signed type
@@ -19,6 +19,7 @@
19
19
 
20
20
  #include <catch.hpp>
21
21
  #include <sstream>
22
+ #include <stdexcept>
22
23
 
23
24
  #include "frequent_items_sketch.hpp"
24
25
  #include "test_type.hpp"
@@ -59,7 +60,7 @@ TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
59
60
  REQUIRE(sketch.get_maximum_error() == sketch2.get_maximum_error());
60
61
 
61
62
  auto bytes = sketch.serialize();
62
- auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), 0);
63
+ auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), alloc(0));
63
64
  REQUIRE_FALSE(sketch3.is_empty());
64
65
  REQUIRE(sketch3.get_total_weight() == 17);
65
66
  REQUIRE(sketch3.get_estimate(1) == 10);
@@ -20,6 +20,7 @@
20
20
  #include <catch.hpp>
21
21
  #include <sstream>
22
22
  #include <fstream>
23
+ #include <stdexcept>
23
24
 
24
25
  #include "frequent_items_sketch.hpp"
25
26
 
@@ -32,64 +32,41 @@ target_include_directories(hll
32
32
  target_link_libraries(hll INTERFACE common)
33
33
  target_compile_features(hll INTERFACE cxx_std_11)
34
34
 
35
- # TODO: would be useful if this didn't need to be reproduced in target_sources(), too
36
- set(hll_HEADERS "")
37
- list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
38
- list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
39
- list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
40
- list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
41
- list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
42
- list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
43
- list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
44
- list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
45
- list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
46
- list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
47
- list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
48
- list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
49
- list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
50
- list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
51
- list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
52
- list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
53
-
54
35
  install(TARGETS hll
55
36
  EXPORT ${PROJECT_NAME}
56
37
  )
57
38
 
58
- install(FILES ${hll_HEADERS}
39
+ install(FILES
40
+ include/hll.hpp
41
+ include/AuxHashMap.hpp
42
+ include/CompositeInterpolationXTable.hpp
43
+ include/hll.private.hpp
44
+ include/HllSketchImplFactory.hpp
45
+ include/CouponHashSet.hpp
46
+ include/CouponList.hpp
47
+ include/CubicInterpolation.hpp
48
+ include/HarmonicNumbers.hpp
49
+ include/Hll4Array.hpp
50
+ include/Hll6Array.hpp
51
+ include/Hll8Array.hpp
52
+ include/HllArray.hpp
53
+ include/HllSketchImpl.hpp
54
+ include/HllUtil.hpp
55
+ include/coupon_iterator.hpp
56
+ include/RelativeErrorTables.hpp
57
+ include/AuxHashMap-internal.hpp
58
+ include/CompositeInterpolationXTable-internal.hpp
59
+ include/CouponHashSet-internal.hpp
60
+ include/CouponList-internal.hpp
61
+ include/CubicInterpolation-internal.hpp
62
+ include/HarmonicNumbers-internal.hpp
63
+ include/Hll4Array-internal.hpp
64
+ include/Hll6Array-internal.hpp
65
+ include/Hll8Array-internal.hpp
66
+ include/HllArray-internal.hpp
67
+ include/HllSketch-internal.hpp
68
+ include/HllSketchImpl-internal.hpp
69
+ include/HllUnion-internal.hpp
70
+ include/coupon_iterator-internal.hpp
71
+ include/RelativeErrorTables-internal.hpp
59
72
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
60
-
61
- target_sources(hll
62
- INTERFACE
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
72
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
73
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
74
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
75
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
76
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
77
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
78
- ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
79
- ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
80
- ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
81
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
82
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
83
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
84
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
85
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
86
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
87
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
88
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
89
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
90
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
91
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
92
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
93
- ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
94
- ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
95
- )
@@ -20,6 +20,8 @@
20
20
  #ifndef _AUXHASHMAP_INTERNAL_HPP_
21
21
  #define _AUXHASHMAP_INTERNAL_HPP_
22
22
 
23
+ #include <stdexcept>
24
+
23
25
  #include "HllUtil.hpp"
24
26
  #include "AuxHashMap.hpp"
25
27
 
@@ -24,6 +24,7 @@
24
24
  #include "CompositeInterpolationXTable.hpp"
25
25
 
26
26
  #include <exception>
27
+ #include <stdexcept>
27
28
 
28
29
  namespace datasketches {
29
30
 
@@ -24,6 +24,7 @@
24
24
 
25
25
  #include <cstring>
26
26
  #include <exception>
27
+ #include <stdexcept>
27
28
 
28
29
  namespace datasketches {
29
30
 
@@ -113,10 +114,9 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const
113
114
  } else {
114
115
  sketch->coupons_.resize(1ULL << lgArrInts);
115
116
  sketch->couponCount_ = couponCount;
116
- // only need to read valid coupons, unlike in stream case
117
117
  std::memcpy(sketch->coupons_.data(),
118
118
  data + hll_constants::HASH_SET_INT_ARR_START,
119
- couponCount * sizeof(uint32_t));
119
+ couponsInArray * sizeof(uint32_t));
120
120
  }
121
121
 
122
122
  return sketch;
@@ -27,6 +27,7 @@
27
27
 
28
28
  #include <algorithm>
29
29
  #include <cmath>
30
+ #include <stdexcept>
30
31
 
31
32
  namespace datasketches {
32
33
 
@@ -246,10 +246,12 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
246
246
  const bool detail,
247
247
  const bool aux_detail,
248
248
  const bool all) const {
249
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
249
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
250
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
251
+ std::stringstream os;
250
252
  if (summary) {
251
253
  os << "### HLL sketch summary:" << std::endl
252
- << " Log Config K : " << get_lg_config_k() << std::endl
254
+ << " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
253
255
  << " Hll Target : " << type_as_string() << std::endl
254
256
  << " Current Mode : " << mode_as_string() << std::endl
255
257
  << " LB : " << get_lower_bound(1) << std::endl
@@ -258,7 +260,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
258
260
  << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
259
261
  if (get_current_mode() == HLL) {
260
262
  HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
261
- os << " CurMin : " << hllArray->getCurMin() << std::endl
263
+ os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
262
264
  << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
263
265
  << " HipAccum : " << hllArray->getHipAccum() << std::endl
264
266
  << " KxQ0 : " << hllArray->getKxQ0() << std::endl
@@ -338,7 +340,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
338
340
  }
339
341
  }
340
342
 
341
- return os.str();
343
+ return string<A>(os.str().c_str(), sketch_impl->getAllocator());
342
344
  }
343
345
 
344
346
  template<typename A>
@@ -23,6 +23,8 @@
23
23
  #include "HllSketchImpl.hpp"
24
24
  #include "HllSketchImplFactory.hpp"
25
25
 
26
+ #include <stdexcept>
27
+
26
28
  namespace datasketches {
27
29
 
28
30
  template<typename A>
@@ -20,6 +20,8 @@
20
20
  #ifndef _HLLSKETCHIMPLFACTORY_HPP_
21
21
  #define _HLLSKETCHIMPLFACTORY_HPP_
22
22
 
23
+ #include <stdexcept>
24
+
23
25
  #include "HllUtil.hpp"
24
26
  #include "HllSketchImpl.hpp"
25
27
  #include "CouponList.hpp"
@@ -19,6 +19,7 @@
19
19
 
20
20
  #include <catch.hpp>
21
21
  #include <memory>
22
+ #include <stdexcept>
22
23
 
23
24
  #include "AuxHashMap.hpp"
24
25
 
@@ -26,6 +26,7 @@
26
26
  #include <cmath>
27
27
  #include <string>
28
28
  #include <exception>
29
+ #include <stdexcept>
29
30
 
30
31
  namespace datasketches {
31
32
 
@@ -23,6 +23,7 @@
23
23
  #include <cmath>
24
24
  #include <string>
25
25
  #include <exception>
26
+ #include <stdexcept>
26
27
 
27
28
  #include "hll.hpp"
28
29
  #include "CouponList.hpp"
@@ -31,6 +32,7 @@
31
32
  namespace datasketches {
32
33
 
33
34
  void println_string(std::string str) {
35
+ unused(str);
34
36
  //std::cout << str << std::endl;
35
37
  }
36
38
 
@@ -21,6 +21,7 @@
21
21
 
22
22
  #include <exception>
23
23
  #include <sstream>
24
+ #include <stdexcept>
24
25
  #include <catch.hpp>
25
26
 
26
27
  namespace datasketches {
@@ -17,6 +17,8 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #include <stdexcept>
21
+
20
22
  #include "hll.hpp"
21
23
 
22
24
  #include <catch.hpp>
@@ -382,4 +384,61 @@ TEST_CASE("hll sketch: deserialize HLL mode buffer overrun", "[hll_sketch]") {
382
384
  REQUIRE(test_allocator_total_bytes == 0);
383
385
  }
384
386
 
387
+ TEST_CASE("hll sketch: bytes serialize-deserialize-serialize list mode") {
388
+ test_allocator_total_bytes = 0;
389
+ {
390
+ hll_sketch_test_alloc s1(10, target_hll_type::HLL_4, false, 0);
391
+ s1.update(1);
392
+ s1.update(2);
393
+ s1.update(3);
394
+ std::cout << s1.to_string();
395
+ auto bytes1 = s1.serialize_compact();
396
+ auto s2 = hll_sketch_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0);
397
+ auto bytes2 = s2.serialize_compact();
398
+ REQUIRE(bytes1 == bytes2);
399
+ }
400
+ REQUIRE(test_allocator_total_bytes == 0);
401
+ }
402
+
403
+ TEST_CASE("hll sketch: updatable bytes serialize-deserialize-serialize set mode") {
404
+ test_allocator_total_bytes = 0;
405
+ {
406
+ hll_sketch_test_alloc s1(10, target_hll_type::HLL_4, false, 0);
407
+ for (int i = 0; i < 10; ++i) s1.update(i);
408
+ std::cout << s1.to_string();
409
+ auto bytes1 = s1.serialize_updatable();
410
+ auto s2 = hll_sketch_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0);
411
+
412
+ auto bytes2 = s2.serialize_updatable();
413
+ REQUIRE(bytes1 == bytes2);
414
+ }
415
+ REQUIRE(test_allocator_total_bytes == 0);
416
+ }
417
+
418
+ TEST_CASE("hll sketch: compact bytes serialize-deserialize-serialize set mode") {
419
+ test_allocator_total_bytes = 0;
420
+ {
421
+ hll_sketch_test_alloc s1(10, target_hll_type::HLL_4, false, 0);
422
+ for (int i = 0; i < 10; ++i) s1.update(i);
423
+ std::cout << s1.to_string();
424
+ auto bytes1 = s1.serialize_compact();
425
+ auto s2 = hll_sketch_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0);
426
+
427
+ // cannot just compare bytes here
428
+ // hash set does not preserve the order after reconstruction in compact mode
429
+ // add more to push them to HLL mode
430
+ for (int i = 10; i < 100; ++i) {
431
+ s1.update(i);
432
+ s2.update(i);
433
+ }
434
+ std::cout << s1.to_string();
435
+ std::cout << s2.to_string();
436
+
437
+ auto bytes2 = s1.serialize_compact();
438
+ auto bytes3 = s2.serialize_compact();
439
+ REQUIRE(bytes2 == bytes3);
440
+ }
441
+ REQUIRE(test_allocator_total_bytes == 0);
442
+ }
443
+
385
444
  } /* namespace datasketches */
@@ -19,12 +19,14 @@
19
19
 
20
20
  #include <catch.hpp>
21
21
  #include <sstream>
22
+ #include <stdexcept>
22
23
 
23
24
  #include "hll.hpp"
24
25
 
25
26
  namespace datasketches {
26
27
 
27
28
  static void println(std::string& str) {
29
+ unused(str);
28
30
  //std::cout << str << "\n";
29
31
  }
30
32
 
@@ -17,6 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
+ #include <stdexcept>
20
21
 
21
22
  #include <catch.hpp>
22
23
 
@@ -32,27 +32,13 @@ target_include_directories(kll
32
32
  target_link_libraries(kll INTERFACE common)
33
33
  target_compile_features(kll INTERFACE cxx_std_11)
34
34
 
35
- set(kll_HEADERS "")
36
- list(APPEND kll_HEADERS "include/kll_sketch.hpp")
37
- list(APPEND kll_HEADERS "include/kll_sketch_impl.hpp")
38
- list(APPEND kll_HEADERS "include/kll_helper.hpp")
39
- list(APPEND kll_HEADERS "include/kll_helper_impl.hpp")
40
- list(APPEND kll_HEADERS "include/kll_quantile_calculator.hpp")
41
- list(APPEND kll_HEADERS "include/kll_quantile_calculator_impl.hpp")
42
-
43
35
  install(TARGETS kll
44
36
  EXPORT ${PROJECT_NAME}
45
37
  )
46
38
 
47
- install(FILES ${kll_HEADERS}
39
+ install(FILES
40
+ include/kll_sketch.hpp
41
+ include/kll_sketch_impl.hpp
42
+ include/kll_helper.hpp
43
+ include/kll_helper_impl.hpp
48
44
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
49
-
50
- target_sources(kll
51
- INTERFACE
52
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper.hpp
53
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper_impl.hpp
54
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch.hpp
55
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch_impl.hpp
56
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator.hpp
57
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator_impl.hpp
58
- )
@@ -22,13 +22,9 @@
22
22
 
23
23
  #include <random>
24
24
  #include <stdexcept>
25
- #include <chrono>
26
25
 
27
26
  namespace datasketches {
28
27
 
29
- static std::independent_bits_engine<std::mt19937, 1, uint32_t>
30
- random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
31
-
32
28
  #ifdef KLL_VALIDATION
33
29
  extern uint32_t kll_next_offset;
34
30
  #endif
@@ -21,6 +21,9 @@
21
21
  #define KLL_HELPER_IMPL_HPP_
22
22
 
23
23
  #include <algorithm>
24
+ #include <stdexcept>
25
+
26
+ #include "common_defs.hpp"
24
27
 
25
28
  namespace datasketches {
26
29