RubyGems - datasketches - Versions diffs - 0.2.4 → 0.2.5 - Mend

datasketches 0.2.4 → 0.2.5

Files changed (106) hide show

data/vendor/datasketches-cpp/req/CMakeLists.txt CHANGED Viewed

@@ -42,6 +42,4 @@ install(FILES
 		include/req_sketch_impl.hpp
 		include/req_compactor.hpp
 		include/req_compactor_impl.hpp
-		include/req_quantile_calculator.hpp
-		include/req_quantile_calculator_impl.hpp
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")

data/vendor/datasketches-cpp/req/include/req_common.hpp CHANGED Viewed

@@ -21,17 +21,12 @@
 #define REQ_COMMON_HPP_
 #include <random>
-#include <chrono>
 #include "serde.hpp"
 #include "common_defs.hpp"
 namespace datasketches {
-// TODO: have a common random bit with KLL
-static std::independent_bits_engine<std::mt19937, 1, unsigned>
-  req_random_bit(static_cast<unsigned>(std::chrono::system_clock::now().time_since_epoch().count()));
 namespace req_constants {
   static const uint16_t MIN_K = 4;
   static const uint8_t INIT_NUM_SECTIONS = 3;

data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp CHANGED Viewed

@@ -26,6 +26,7 @@
 #include "count_zeros.hpp"
 #include "conditional_forward.hpp"
+#include "common_defs.hpp"
 #include <iomanip>
@@ -245,7 +246,7 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
   if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
   if ((state_ & 1) == 1) { coin_ = !coin_; } // for odd flip coin;
-  else { coin_ = req_random_bit(); } // random coin flip
+  else { coin_ = random_bit(); } // random coin flip
   const auto num = (compaction_range.second - compaction_range.first) / 2;
   next.ensure_space(num);
@@ -451,7 +452,7 @@ req_compactor<T, C, A>::req_compactor(bool hra, uint8_t lg_weight, bool sorted,
 allocator_(allocator),
 lg_weight_(lg_weight),
 hra_(hra),
-coin_(req_random_bit()),
+coin_(random_bit()),
 sorted_(sorted),
 section_size_raw_(section_size_raw),
 section_size_(nearest_even(section_size_raw)),

data/vendor/datasketches-cpp/req/include/req_sketch.hpp CHANGED Viewed

@@ -22,22 +22,25 @@
 #include "req_common.hpp"
 #include "req_compactor.hpp"
-#include "req_quantile_calculator.hpp"
+#include "quantile_sketch_sorted_view.hpp"
+#include <stdexcept>
 namespace datasketches {
 template<
   typename T,
-  typename Comparator = std::less<T>,
-  typename SerDe = serde<T>,
+  typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
+  typename S = serde<T>, // deprecated, to be removed in the next major version
   typename Allocator = std::allocator<T>
 >
 class req_sketch {
 public:
+  using value_type = T;
+  using comparator = Comparator;
   using Compactor = req_compactor<T, Comparator, Allocator>;
   using AllocCompactor = typename std::allocator_traits<Allocator>::template rebind_alloc<Compactor>;
-  using AllocDouble = typename std::allocator_traits<Allocator>::template rebind_alloc<double>;
-  using vector_double = std::vector<double, AllocDouble>;
+  using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
   /**
    * Constructor
@@ -113,6 +116,12 @@ public:
    */
   const T& get_max_value() const;
+  /**
+   * Returns an instance of the comparator for this sketch.
+   * @return comparator
+   */
+  Comparator get_comparator() const;
   /**
    * Returns an approximation to the normalized (fractional) rank of the given item from 0 to 1 inclusive.
    * With the template parameter inclusive=true the weight of the given item is included into the rank.
@@ -123,7 +132,6 @@ public:
    * @param item to be ranked
    * @return an approximate rank of the given item
    */
   template<bool inclusive = false>
   double get_rank(const T& item) const;
@@ -135,9 +143,10 @@ public:
    *
    * @param split_points an array of <i>m</i> unique, monotonically increasing values
    * that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
-   * The definition of an "interval" is inclusive of the left split point (or minimum value) and
-   * exclusive of the right split point, with the exception that the last interval will include
-   * the maximum value.
+   * If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
+   * split point, with the exception that the last interval will include the maximum value.
+   * If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
+   * split point.
    * It is not necessary to include either the min or max values in these split points.
    *
    * @return an array of m+1 doubles each of which is an approximation
@@ -178,8 +187,9 @@ public:
    * @param rank the given normalized rank
    * @return approximate quantile given the normalized rank
    */
+  using quantile_return_type = typename quantile_sketch_sorted_view<T, Comparator, Allocator>::quantile_return_type;
   template<bool inclusive = false>
-  const T& get_quantile(double rank) const;
+  quantile_return_type get_quantile(double rank) const;
   /**
    * Returns an array of quantiles that correspond to the given array of normalized ranks.
@@ -221,24 +231,28 @@ public:
   /**
    * Computes size needed to serialize the current state of the sketch.
    * This version is for fixed-size arithmetic types (integral and floating point).
+   * @param instance of a SerDe
    * @return size in bytes needed to serialize this sketch
    */
-  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
-  size_t get_serialized_size_bytes() const;
+  template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
+  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
   /**
    * Computes size needed to serialize the current state of the sketch.
    * This version is for all other types and can be expensive since every item needs to be looked at.
+   * @param instance of a SerDe
    * @return size in bytes needed to serialize this sketch
    */
-  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
-  size_t get_serialized_size_bytes() const;
+  template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
+  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
   /**
    * This method serializes the sketch into a given stream in a binary form
    * @param os output stream
+   * @param instance of a SerDe
    */
-  void serialize(std::ostream& os) const;
+  template<typename SerDe = S>
+  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
   // This is a convenience alias for users
   // The type returned by the following serialize method
@@ -250,24 +264,53 @@ public:
    * It is a blank space of a given size.
    * This header is used in Datasketches PostgreSQL extension.
    * @param header_size_bytes space to reserve in front of the sketch
+   * @param instance of a SerDe
    */
-  vector_bytes serialize(unsigned header_size_bytes = 0) const;
+  template<typename SerDe = S>
+  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
   /**
    * This method deserializes a sketch from a given stream.
    * @param is input stream
+   * @param instance of an Allocator
    * @return an instance of a sketch
+   *
+   * Deprecated, to be removed in the next major version
    */
   static req_sketch deserialize(std::istream& is, const Allocator& allocator = Allocator());
+  /**
+   * This method deserializes a sketch from a given stream.
+   * @param is input stream
+   * @param instance of a SerDe
+   * @param instance of an Allocator
+   * @return an instance of a sketch
+   */
+  template<typename SerDe = S>
+  static req_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
   /**
    * This method deserializes a sketch from a given array of bytes.
    * @param bytes pointer to the array of bytes
    * @param size the size of the array
+   * @param instance of an Allocator
    * @return an instance of a sketch
+   *
+   * Deprecated, to be removed in the next major version
    */
   static req_sketch deserialize(const void* bytes, size_t size, const Allocator& allocator = Allocator());
+  /**
+   * This method deserializes a sketch from a given array of bytes.
+   * @param bytes pointer to the array of bytes
+   * @param size the size of the array
+   * @param instance of a SerDe
+   * @param instance of an Allocator
+   * @return an instance of a sketch
+   */
+  template<typename SerDe = S>
+  static req_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
   /**
    * Prints a summary of the sketch.
    * @param print_levels if true include information about levels
@@ -279,6 +322,9 @@ public:
   const_iterator begin() const;
   const_iterator end() const;
+  template<bool inclusive = false>
+  quantile_sketch_sorted_view<T, Comparator, Allocator> get_sorted_view(bool cumulative) const;
 private:
   Allocator allocator_;
   uint16_t k_;
@@ -310,13 +356,6 @@ private:
   static double get_rank_ub(uint16_t k, uint8_t num_levels, double rank, uint8_t num_std_dev, uint64_t n, bool hra);
   static bool is_exact_rank(uint16_t k, uint8_t num_levels, double rank, uint64_t n, bool hra);
-  using QuantileCalculator = req_quantile_calculator<T, Comparator, Allocator>;
-  using AllocCalc = typename std::allocator_traits<Allocator>::template rebind_alloc<QuantileCalculator>;
-  class calculator_deleter;
-  using QuantileCalculatorPtr = typename std::unique_ptr<QuantileCalculator, calculator_deleter>;
-  template<bool inclusive>
-  QuantileCalculatorPtr get_quantile_calculator() const;
   // for deserialization
   class item_deleter;
   req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);

data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp CHANGED Viewed

@@ -196,6 +196,11 @@ const T& req_sketch<T, C, S, A>::get_max_value() const {
   return *max_value_;
 }
+template<typename T, typename C, typename S, typename A>
+C req_sketch<T, C, S, A>::get_comparator() const {
+  return C();
+}
 template<typename T, typename C, typename S, typename A>
 template<bool inclusive>
 double req_sketch<T, C, S, A>::get_rank(const T& item) const {
@@ -210,6 +215,7 @@ template<typename T, typename C, typename S, typename A>
 template<bool inclusive>
 auto req_sketch<T, C, S, A>::get_PMF(const T* split_points, uint32_t size) const -> vector_double {
   auto buckets = get_CDF<inclusive>(split_points, size);
+  if (is_empty()) return buckets;
   for (uint32_t i = size; i > 0; --i) {
     buckets[i] -= buckets[i - 1];
   }
@@ -230,14 +236,15 @@ auto req_sketch<T, C, S, A>::get_CDF(const T* split_points, uint32_t size) const
 template<typename T, typename C, typename S, typename A>
 template<bool inclusive>
-const T& req_sketch<T, C, S, A>::get_quantile(double rank) const {
+auto req_sketch<T, C, S, A>::get_quantile(double rank) const -> quantile_return_type {
   if (is_empty()) return get_invalid_value();
   if (rank == 0.0) return *min_value_;
   if (rank == 1.0) return *max_value_;
   if ((rank < 0.0) || (rank > 1.0)) {
     throw std::invalid_argument("Rank cannot be less than zero or greater than 1.0");
   }
-  return *(get_quantile_calculator<inclusive>()->get_quantile(rank));
+  // possible side-effect of sorting level zero
+  return get_sorted_view<inclusive>(true).get_quantile(rank);
 }
 template<typename T, typename C, typename S, typename A>
@@ -245,8 +252,11 @@ template<bool inclusive>
 std::vector<T, A> req_sketch<T, C, S, A>::get_quantiles(const double* ranks, uint32_t size) const {
   std::vector<T, A> quantiles(allocator_);
   if (is_empty()) return quantiles;
-  QuantileCalculatorPtr quantile_calculator(nullptr, calculator_deleter(allocator_));
   quantiles.reserve(size);
+  // possible side-effect of sorting level zero
+  auto view = get_sorted_view<inclusive>(true);
   for (uint32_t i = 0; i < size; ++i) {
     const double rank = ranks[i];
     if ((rank < 0.0) || (rank > 1.0)) {
@@ -255,47 +265,26 @@ std::vector<T, A> req_sketch<T, C, S, A>::get_quantiles(const double* ranks, uin
     if      (rank == 0.0) quantiles.push_back(*min_value_);
     else if (rank == 1.0) quantiles.push_back(*max_value_);
     else {
-      if (!quantile_calculator) {
-        // has side effect of sorting level zero if needed
-        quantile_calculator = const_cast<req_sketch*>(this)->get_quantile_calculator<inclusive>();
-      }
-      quantiles.push_back(*(quantile_calculator->get_quantile(rank)));
+      quantiles.push_back(view.get_quantile(rank));
     }
   }
   return quantiles;
 }
-template<typename T, typename C, typename S, typename A>
-class req_sketch<T, C, S, A>::calculator_deleter {
-  public:
-  calculator_deleter(const AllocCalc& allocator): allocator_(allocator) {}
-  void operator() (QuantileCalculator* ptr) {
-    if (ptr != nullptr) {
-      ptr->~QuantileCalculator();
-      allocator_.deallocate(ptr, 1);
-    }
-  }
-  private:
-  AllocCalc allocator_;
-};
 template<typename T, typename C, typename S, typename A>
 template<bool inclusive>
-auto req_sketch<T, C, S, A>::get_quantile_calculator() const -> QuantileCalculatorPtr {
+quantile_sketch_sorted_view<T, C, A> req_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
   if (!compactors_[0].is_sorted()) {
     const_cast<Compactor&>(compactors_[0]).sort(); // allow this side effect
   }
-  AllocCalc ac(allocator_);
-  QuantileCalculatorPtr quantile_calculator(
-    new (ac.allocate(1)) req_quantile_calculator<T, C, A>(n_, ac),
-    calculator_deleter(ac)
-  );
+  quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
   for (auto& compactor: compactors_) {
-    quantile_calculator->add(compactor.begin(), compactor.end(), compactor.get_lg_weight());
+    view.add(compactor.begin(), compactor.end(), 1 << compactor.get_lg_weight());
   }
-  quantile_calculator->template convert_to_cummulative<inclusive>();
-  return quantile_calculator;
+  if (cumulative) view.template convert_to_cummulative<inclusive>();
+  return view;
 }
 template<typename T, typename C, typename S, typename A>
@@ -348,8 +337,8 @@ double req_sketch<T, C, S, A>::relative_rse_factor() {
 // implementation for fixed-size arithmetic types (integral and floating point)
 template<typename T, typename C, typename S, typename A>
-template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
-size_t req_sketch<T, C, S, A>::get_serialized_size_bytes() const {
+template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
+size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
   size_t size = PREAMBLE_SIZE_BYTES;
   if (is_empty()) return size;
   if (is_estimation_mode()) {
@@ -358,32 +347,33 @@ size_t req_sketch<T, C, S, A>::get_serialized_size_bytes() const {
   if (n_ == 1) {
     size += sizeof(TT);
   } else {
-    for (const auto& compactor: compactors_) size += compactor.get_serialized_size_bytes(S());
+    for (const auto& compactor: compactors_) size += compactor.get_serialized_size_bytes(sd);
   }
   return size;
 }
 // implementation for all other types
 template<typename T, typename C, typename S, typename A>
-template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
-size_t req_sketch<T, C, S, A>::get_serialized_size_bytes() const {
+template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
+size_t req_sketch<T, C, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
   size_t size = PREAMBLE_SIZE_BYTES;
   if (is_empty()) return size;
   if (is_estimation_mode()) {
     size += sizeof(n_);
-    size += S().size_of_item(*min_value_);
-    size += S().size_of_item(*max_value_);
+    size += sd.size_of_item(*min_value_);
+    size += sd.size_of_item(*max_value_);
   }
   if (n_ == 1) {
-    size += S().size_of_item(*compactors_[0].begin());
+    size += sd.size_of_item(*compactors_[0].begin());
   } else {
-    for (const auto& compactor: compactors_) size += compactor.get_serialized_size_bytes(S());
+    for (const auto& compactor: compactors_) size += compactor.get_serialized_size_bytes(sd);
   }
   return size;
 }
 template<typename T, typename C, typename S, typename A>
-void req_sketch<T, C, S, A>::serialize(std::ostream& os) const {
+template<typename SerDe>
+void req_sketch<T, C, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
   const uint8_t preamble_ints = is_estimation_mode() ? 4 : 2;
   write(os, preamble_ints);
   const uint8_t serial_version = SERIAL_VERSION;
@@ -406,19 +396,20 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os) const {
   if (is_empty()) return;
   if (is_estimation_mode()) {
     write(os, n_);
-    S().serialize(os, min_value_, 1);
-    S().serialize(os, max_value_, 1);
+    sd.serialize(os, min_value_, 1);
+    sd.serialize(os, max_value_, 1);
   }
   if (raw_items) {
-    S().serialize(os, compactors_[0].begin(), num_raw_items);
+    sd.serialize(os, compactors_[0].begin(), num_raw_items);
   } else {
-    for (const auto& compactor: compactors_) compactor.serialize(os, S());
+    for (const auto& compactor: compactors_) compactor.serialize(os, sd);
   }
 }
 template<typename T, typename C, typename S, typename A>
-auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
-  const size_t size = header_size_bytes + get_serialized_size_bytes();
+template<typename SerDe>
+auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
+  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
   vector_bytes bytes(size, 0, allocator_);
   uint8_t* ptr = bytes.data() + header_size_bytes;
   const uint8_t* end_ptr = ptr + size;
@@ -445,13 +436,13 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vect
   if (!is_empty()) {
     if (is_estimation_mode()) {
       ptr += copy_to_mem(n_, ptr);
-      ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
-      ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
+      ptr += sd.serialize(ptr, end_ptr - ptr, min_value_, 1);
+      ptr += sd.serialize(ptr, end_ptr - ptr, max_value_, 1);
     }
     if (raw_items) {
-      ptr += S().serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
+      ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
     } else {
-      for (const auto& compactor: compactors_) ptr += compactor.serialize(ptr, end_ptr - ptr, S());
+      for (const auto& compactor: compactors_) ptr += compactor.serialize(ptr, end_ptr - ptr, sd);
     }
   }
   return bytes;
@@ -459,6 +450,12 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vect
 template<typename T, typename C, typename S, typename A>
 req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
+  return deserialize(is, S(), allocator);
+}
+template<typename T, typename C, typename S, typename A>
+template<typename SerDe>
+req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
   const auto preamble_ints = read<uint8_t>(is);
   const auto serial_version = read<uint8_t>(is);
   const auto family_id = read<uint8_t>(is);
@@ -490,19 +487,19 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
   uint64_t n = 1;
   if (num_levels > 1) {
     n = read<uint64_t>(is);
-    S().deserialize(is, min_value_buffer.get(), 1);
+    sd.deserialize(is, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
-    S().deserialize(is, max_value_buffer.get(), 1);
+    sd.deserialize(is, max_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
   if (raw_items) {
-    compactors.push_back(Compactor::deserialize(is, S(), allocator, is_level_0_sorted, k, num_raw_items, hra));
+    compactors.push_back(Compactor::deserialize(is, sd, allocator, is_level_0_sorted, k, num_raw_items, hra));
   } else {
     for (size_t i = 0; i < num_levels; ++i) {
-      compactors.push_back(Compactor::deserialize(is, S(), allocator, i == 0 ? is_level_0_sorted : true, hra));
+      compactors.push_back(Compactor::deserialize(is, sd, allocator, i == 0 ? is_level_0_sorted : true, hra));
     }
   }
   if (num_levels == 1) {
@@ -529,6 +526,12 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(std::istream& is, con
 template<typename T, typename C, typename S, typename A>
 req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
+  return deserialize(bytes, size, S(), allocator);
+}
+template<typename T, typename C, typename S, typename A>
+template<typename SerDe>
+req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* end_ptr = static_cast<const char*>(bytes) + size;
@@ -571,21 +574,21 @@ req_sketch<T, C, S, A> req_sketch<T, C, S, A>::deserialize(const void* bytes, si
   if (num_levels > 1) {
     ensure_minimum_memory(end_ptr - ptr, sizeof(n));
     ptr += copy_from_mem(ptr, n);
-    ptr += S().deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
+    ptr += sd.deserialize(ptr, end_ptr - ptr, min_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     min_value = std::unique_ptr<T, item_deleter>(min_value_buffer.release(), item_deleter(allocator));
-    ptr += S().deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
+    ptr += sd.deserialize(ptr, end_ptr - ptr, max_value_buffer.get(), 1);
     // serde call did not throw, repackage with destrtuctor
     max_value = std::unique_ptr<T, item_deleter>(max_value_buffer.release(), item_deleter(allocator));
   }
   if (raw_items) {
-    auto pair = Compactor::deserialize(ptr, end_ptr - ptr, S(), allocator, is_level_0_sorted, k, num_raw_items, hra);
+    auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, is_level_0_sorted, k, num_raw_items, hra);
     compactors.push_back(std::move(pair.first));
     ptr += pair.second;
   } else {
     for (size_t i = 0; i < num_levels; ++i) {
-      auto pair = Compactor::deserialize(ptr, end_ptr - ptr, S(), allocator, i == 0 ? is_level_0_sorted : true, hra);
+      auto pair = Compactor::deserialize(ptr, end_ptr - ptr, sd, allocator, i == 0 ? is_level_0_sorted : true, hra);
       compactors.push_back(std::move(pair.first));
       ptr += pair.second;
     }

data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp CHANGED Viewed

@@ -24,6 +24,7 @@
 #include <fstream>
 #include <sstream>
 #include <limits>
+#include <stdexcept>
 namespace datasketches {
@@ -51,6 +52,10 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
   REQUIRE(std::isnan(sketch.get_quantile(1)));
   const double ranks[3] {0, 0.5, 1};
   REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
+  const float split_points[1] {0};
+  REQUIRE(sketch.get_CDF(split_points, 1).empty());
+  REQUIRE(sketch.get_PMF(split_points, 1).empty());
 }
 TEST_CASE("req sketch: single value, lra", "[req_sketch]") {

data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp CHANGED Viewed

@@ -58,7 +58,11 @@ namespace var_opt_constants {
     const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
 }
-template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
+template<
+  typename T,
+  typename S = serde<T>, // deprecated, to be removed in the next major version
+  typename A = std::allocator<T>
+>
 class var_opt_sketch {
   public:
@@ -135,18 +139,20 @@ class var_opt_sketch {
     /**
      * Computes size needed to serialize the current state of the sketch.
      * This version is for fixed-size arithmetic types (integral and floating point).
+     * @param instance of a SerDe
      * @return size in bytes needed to serialize this sketch
      */
-    template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
-    inline size_t get_serialized_size_bytes() const;
+    template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
+    inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
     /**
      * Computes size needed to serialize the current state of the sketch.
      * This version is for all other types and can be expensive since every item needs to be looked at.
+     * @param instance of a SerDe
      * @return size in bytes needed to serialize this sketch
      */
-    template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
-    inline size_t get_serialized_size_bytes() const;
+    template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
+    inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
     // This is a convenience alias for users
     // The type returned by the following serialize method
@@ -158,30 +164,61 @@ class var_opt_sketch {
      * It is a blank space of a given size.
      * This header is used in Datasketches PostgreSQL extension.
      * @param header_size_bytes space to reserve in front of the sketch
+     * @param instance of a SerDe
      */
-    vector_bytes serialize(unsigned header_size_bytes = 0) const;
+    template<typename SerDe = S>
+    vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
     /**
      * This method serializes the sketch into a given stream in a binary form
      * @param os output stream
+     * @param instance of a SerDe
      */
-    void serialize(std::ostream& os) const;
+    template<typename SerDe = S>
+    void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
     /**
      * This method deserializes a sketch from a given stream.
      * @param is input stream
+     * @param instance of an Allocator
      * @return an instance of a sketch
+     *
+     * Deprecated, to be removed in the next major version
      */
     static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
+    /**
+     * This method deserializes a sketch from a given stream.
+     * @param is input stream
+     * @param instance of a SerDe
+     * @param instance of an Allocator
+     * @return an instance of a sketch
+     */
+    template<typename SerDe = S>
+    static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
     /**
      * This method deserializes a sketch from a given array of bytes.
      * @param bytes pointer to the array of bytes
      * @param size the size of the array
+     * @param instance of an Allocator
      * @return an instance of a sketch
+     *
+     * Deprecated, to be removed in the next major version
      */
     static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
+    /**
+     * This method deserializes a sketch from a given array of bytes.
+     * @param bytes pointer to the array of bytes
+     * @param size the size of the array
+     * @param instance of a SerDe
+     * @param instance of an Allocator
+     * @return an instance of a sketch
+     */
+    template<typename SerDe = S>
+    static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
     /**
      * Prints a summary of the sketch.
      * @return the summary as a string