RubyGems - datasketches - Versions diffs - 0.2.0 → 0.2.4 - Mend

datasketches 0.2.0 → 0.2.4

Files changed (170) hide show

data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp CHANGED Viewed

@@ -27,7 +27,7 @@ namespace datasketches {
 using hll_sketch_test_alloc = hll_sketch_alloc<test_allocator<uint8_t>>;
 using alloc = test_allocator<uint8_t>;
-static void runCheckCopy(int lgConfigK, target_hll_type tgtHllType) {
+static void runCheckCopy(uint8_t lgConfigK, target_hll_type tgtHllType) {
   hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
   for (int i = 0; i < 7; ++i) {
@@ -66,7 +66,7 @@ TEST_CASE("hll sketch: check copies", "[hll_sketch]") {
 }
 static void copyAs(target_hll_type srcType, target_hll_type dstType) {
-  int lgK = 8;
+  uint8_t lgK = 8;
   int n1 = 7;
   int n2 = 24;
   int n3 = 1000;
@@ -109,7 +109,7 @@ TEST_CASE("hll sketch: check copy as", "[hll_sketch]") {
 TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
   test_allocator_total_bytes = 0;
   {
-    int lgConfigK = 8;
+    uint8_t lgConfigK = 8;
     target_hll_type srcType = target_hll_type::HLL_8;
     hll_sketch_test_alloc sk(lgConfigK, srcType, false, 0);
@@ -124,7 +124,7 @@ TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
     sk.update(24); // HLL
     REQUIRE(sk.get_updatable_serialization_bytes() == 40 + 256);
-    const int hllBytes = HllUtil<>::HLL_BYTE_ARR_START + (1 << lgConfigK);
+    const auto hllBytes = hll_constants::HLL_BYTE_ARR_START + (1 << lgConfigK);
     REQUIRE(sk.get_compact_serialization_bytes() == hllBytes);
     REQUIRE(hll_sketch::get_max_updatable_serialization_bytes(lgConfigK, HLL_8) == hllBytes);
   }
@@ -135,22 +135,22 @@ TEST_CASE("hll sketch: check num std dev", "[hll_sketch]") {
   REQUIRE_THROWS_AS(HllUtil<>::checkNumStdDev(0), std::invalid_argument);
 }
-void checkSerializationSizes(const int lgConfigK, target_hll_type tgtHllType) {
+void checkSerializationSizes(uint8_t lgConfigK, target_hll_type tgtHllType) {
   hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
   int i;
   // LIST
   for (i = 0; i < 7; ++i) { sk.update(i); }
-  int expected = HllUtil<>::LIST_INT_ARR_START + (i << 2);
+  auto expected = hll_constants::LIST_INT_ARR_START + (i << 2);
   REQUIRE(sk.get_compact_serialization_bytes() == expected);
-  expected = HllUtil<>::LIST_INT_ARR_START + (4 << HllUtil<>::LG_INIT_LIST_SIZE);
+  expected = hll_constants::LIST_INT_ARR_START + (4 << hll_constants::LG_INIT_LIST_SIZE);
   REQUIRE(sk.get_updatable_serialization_bytes() == expected);
   // SET
   for (i = 7; i < 24; ++i) { sk.update(i); }
-  expected = HllUtil<>::HASH_SET_INT_ARR_START + (i << 2);
+  expected = hll_constants::HASH_SET_INT_ARR_START + (i << 2);
   REQUIRE(sk.get_compact_serialization_bytes() == expected);
-  expected = HllUtil<>::HASH_SET_INT_ARR_START + (4 << HllUtil<>::LG_INIT_SET_SIZE);
+  expected = hll_constants::HASH_SET_INT_ARR_START + (4 << hll_constants::LG_INIT_SET_SIZE);
   REQUIRE(sk.get_updatable_serialization_bytes() == expected);
 }
@@ -178,7 +178,7 @@ TEST_CASE("hll sketch: exercise to string", "[hll_sketch]") {
 // Creates and serializes then deserializes sketch.
 // Returns true if deserialized sketch is compact.
-static bool checkCompact(const int lgK, const int n, const target_hll_type type, bool compact) {
+static bool checkCompact(uint8_t lgK, const int n, const target_hll_type type, bool compact) {
   hll_sketch_test_alloc sk(lgK, type, false, 0);
   for (int i = 0; i < n; ++i) { sk.update(i); }
@@ -201,7 +201,7 @@ static bool checkCompact(const int lgK, const int n, const target_hll_type type,
 TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
   test_allocator_total_bytes = 0;
   {
-    int lgK = 8;
+    uint8_t lgK = 8;
     // unless/until we create non-updatable "direct" versions,
     // deserialized image should never be compact
     // LIST: follows serialization request
@@ -230,10 +230,10 @@ TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
 TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
   test_allocator_total_bytes = 0;
   {
-    hll_sketch_test_alloc sketch1(HllUtil<>::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
-    hll_sketch_test_alloc sketch2(HllUtil<>::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
-    REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
-    REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
+    hll_sketch_test_alloc sketch1(hll_constants::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
+    hll_sketch_test_alloc sketch2(hll_constants::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
+    REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
+    REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
   }
   REQUIRE(test_allocator_total_bytes == 0);
 }

data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp CHANGED Viewed

@@ -24,23 +24,19 @@
 namespace datasketches {
-static int min(int a, int b) {
-  return (a < b) ? a : b;
-}
 static void println(std::string& str) {
   //std::cout << str << "\n";
 }
 static void basicUnion(uint64_t n1, uint64_t n2,
-		                   uint64_t lgk1, uint64_t lgk2, uint64_t lgMaxK,
+		                   uint8_t lgk1, uint8_t lgk2, uint8_t lgMaxK,
                        target_hll_type type1, target_hll_type type2, target_hll_type resultType) {
   uint64_t v = 0;
   //int tot = n1 + n2;
   hll_sketch h1(lgk1, type1);
   hll_sketch h2(lgk2, type2);
-  int lgControlK = min(min(lgk1, lgk2), lgMaxK);
+  uint8_t lgControlK = std::min(std::min(lgk1, lgk2), lgMaxK);
   hll_sketch control(lgControlK, resultType);
   for (uint64_t i = 0; i < n1; ++i) {
@@ -89,9 +85,9 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
   target_hll_type type2 = HLL_8;
   target_hll_type resultType = HLL_8;
-  uint64_t lgK1 = 7;
-  uint64_t lgK2 = 7;
-  uint64_t lgMaxK = 7;
+  uint8_t lgK1 = 7;
+  uint8_t lgK2 = 7;
+  uint8_t lgMaxK = 7;
   uint64_t n1 = 7;
   uint64_t n2 = 7;
   basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
@@ -108,7 +104,7 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
   n2 = 14;
   basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
-  int i = 0;
+  uint8_t i = 0;
   for (i = 7; i <= 13; ++i) {
     lgK1 = i;
     lgK2 = i;
@@ -184,9 +180,9 @@ TEST_CASE("hll union: check composite estimate", "[hll_union]") {
 }
 TEST_CASE("hll union: check config k limits", "[hll_union]") {
-  REQUIRE_THROWS_AS(hll_union(HllUtil<>::MIN_LOG_K - 1), std::invalid_argument);
+  REQUIRE_THROWS_AS(hll_union(hll_constants::MIN_LOG_K - 1), std::invalid_argument);
-  REQUIRE_THROWS_AS(hll_union(HllUtil<>::MAX_LOG_K + 1), std::invalid_argument);
+  REQUIRE_THROWS_AS(hll_union(hll_constants::MAX_LOG_K + 1), std::invalid_argument);
 }
 static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est) {
@@ -195,7 +191,7 @@ static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est
 }
 TEST_CASE("hll union: check ub lb", "[hll_union]") {
-  int lgK = 4;
+  uint8_t lgK = 4;
   int n = 1 << 20;
   bool oooFlag = false;
@@ -223,7 +219,7 @@ TEST_CASE("hll union: check ub lb", "[hll_union]") {
 }
 TEST_CASE("hll union: check conversions", "[hll_union]") {
-  int lgK = 4;
+  uint8_t lgK = 4;
   hll_sketch sk1(lgK, HLL_8);
   hll_sketch sk2(lgK, HLL_8);
   int n = 1 << 20;

data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp CHANGED Viewed

@@ -57,7 +57,7 @@ static int get_n(int lg_k, hll_mode mode) {
 static long v = 0;
-static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode) {
+static hll_sketch build_sketch(uint8_t lg_k, target_hll_type hll_type, hll_mode mode) {
   hll_sketch sk(lg_k, hll_type);
   int n = get_n(lg_k, mode);
   for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
@@ -67,7 +67,7 @@ static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode
 // merges a sketch to an empty union and gets result of the same type, checks binary equivalence
 static void union_one_update(bool compact) {
-  for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
+  for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
     for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
       if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
       for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
@@ -102,7 +102,7 @@ TEST_CASE("hll isomorphic: union one update serialize compact", "[hll_isomorphic
 // converts a sketch to a different type and converts back to the original type to check binary equivalence
 static void convert_back_and_forth(bool compact) {
-  for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
+  for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
     for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
       if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
       for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8

data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp CHANGED Viewed

@@ -44,11 +44,11 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
   auto ser2 = sk.serialize_updatable();
   REQUIRE(ser1.size() == ser2.size());
-  int len = ser1.size();
+  size_t len = ser1.size();
   uint8_t* b1 = ser1.data();
   uint8_t* b2 = ser2.data();
-  for (int i = 0; i < len; ++i) {
+  for (size_t i = 0; i < len; ++i) {
     REQUIRE(b2[i] == b1[i]);
   }
 }
@@ -129,7 +129,7 @@ static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
   REQUIRE(sk1.get_target_type() == sk2.get_target_type());
 }
-static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const int n) {
+static void toFrom(const uint8_t lgConfigK, const target_hll_type tgtHllType, const int n) {
   hll_sketch src(lgConfigK, tgtHllType);
   for (int i = 0; i < n; ++i) {
     src.update(i);
@@ -157,7 +157,7 @@ static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const
 TEST_CASE("hll to/from byte array: to from sketch", "[hll_byte_array]") {
   for (int i = 0; i < 10; ++i) {
     int n = nArr[i];
-    for (int lgK = 4; lgK <= 13; ++lgK) {
+    for (uint8_t lgK = 4; lgK <= 13; ++lgK) {
       toFrom(lgK, HLL_4, n);
       toFrom(lgK, HLL_6, n);
       toFrom(lgK, HLL_8, n);

data/vendor/datasketches-cpp/kll/CMakeLists.txt CHANGED Viewed

@@ -32,27 +32,17 @@ target_include_directories(kll
 target_link_libraries(kll INTERFACE common)
 target_compile_features(kll INTERFACE cxx_std_11)
-set(kll_HEADERS "")
-list(APPEND kll_HEADERS "include/kll_sketch.hpp")
-list(APPEND kll_HEADERS "include/kll_sketch_impl.hpp")
-list(APPEND kll_HEADERS "include/kll_helper.hpp")
-list(APPEND kll_HEADERS "include/kll_helper_impl.hpp")
-list(APPEND kll_HEADERS "include/kll_quantile_calculator.hpp")
-list(APPEND kll_HEADERS "include/kll_quantile_calculator_impl.hpp")
 install(TARGETS kll
   EXPORT ${PROJECT_NAME}
 )
-install(FILES ${kll_HEADERS}
+install(FILES
+		include/kll_sketch.hpp
+		include/kll_sketch_impl.hpp
+		include/kll_helper.hpp
+		include/kll_helper_impl.hpp
+		include/kll_quantile_calculator.hpp
+		include/kll_quantile_calculator_impl.hpp
+		include/kolmogorov_smirnov.hpp
+		include/kolmogorov_smirnov_impl.hpp
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
-target_sources(kll
-  INTERFACE
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper_impl.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch_impl.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator.hpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator_impl.hpp
-)

data/vendor/datasketches-cpp/kll/include/kll_helper.hpp CHANGED Viewed

@@ -26,7 +26,8 @@
 namespace datasketches {
-static std::independent_bits_engine<std::mt19937, 1, uint32_t> random_bit(std::chrono::system_clock::now().time_since_epoch().count());
+static std::independent_bits_engine<std::mt19937, 1, uint32_t>
+  random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
 #ifdef KLL_VALIDATION
 extern uint32_t kll_next_offset;
@@ -46,9 +47,9 @@ class kll_helper {
     static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
     static inline uint8_t ub_on_num_levels(uint64_t n);
     static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
-    static inline uint32_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
-    static inline uint32_t int_cap_aux(uint16_t k, uint8_t depth);
-    static inline uint32_t int_cap_aux_aux(uint16_t k, uint8_t depth);
+    static inline uint16_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
+    static inline uint16_t int_cap_aux(uint16_t k, uint8_t depth);
+    static inline uint16_t int_cap_aux_aux(uint16_t k, uint8_t depth);
     static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
     /*

data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp CHANGED Viewed

@@ -55,28 +55,28 @@ uint32_t kll_helper::compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_l
   return total;
 }
-uint32_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
+uint16_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
   if (height >= numLevels) throw std::invalid_argument("height >= numLevels");
   const uint8_t depth = numLevels - height - 1;
-  return std::max((uint32_t) min_wid, int_cap_aux(k, depth));
+  return std::max<uint16_t>(min_wid, int_cap_aux(k, depth));
 }
-uint32_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
+uint16_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
   if (depth > 60) throw std::invalid_argument("depth > 60");
   if (depth <= 30) return int_cap_aux_aux(k, depth);
   const uint8_t half = depth / 2;
   const uint8_t rest = depth - half;
-  const uint32_t tmp = int_cap_aux_aux(k, half);
+  const uint16_t tmp = int_cap_aux_aux(k, half);
   return int_cap_aux_aux(tmp, rest);
 }
-uint32_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
+uint16_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
   if (depth > 30) throw std::invalid_argument("depth > 30");
   const uint64_t twok = k << 1; // for rounding, we pre-multiply by 2
   const uint64_t tmp = (uint64_t) (((uint64_t) twok << depth) / powers_of_three[depth]);
   const uint64_t result = (tmp + 1) >> 1; // then here we add 1 and divide by 2
   if (result > k) throw std::logic_error("result > k");
-  return result;
+  return static_cast<uint16_t>(result);
 }
 uint64_t kll_helper::sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels) {

data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp CHANGED Viewed

@@ -24,19 +24,27 @@
 namespace datasketches {
+// forward declaration
+template<typename T, typename C, typename S, typename A> class kll_sketch;
 template <typename T, typename C, typename A>
 class kll_quantile_calculator {
   public:
-    // assumes that all levels are sorted including level 0
-    kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator);
+    using Entry = std::pair<T, uint64_t>;
+    using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
+    using Container = std::vector<Entry, AllocEntry>;
+    using const_iterator = typename Container::const_iterator;
+    template<typename S>
+    kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
     T get_quantile(double fraction) const;
+    const_iterator begin() const;
+    const_iterator end() const;
   private:
     using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
     using vector_u32 = std::vector<uint32_t, AllocU32>;
-    using Entry = std::pair<T, uint64_t>;
-    using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
-    using Container = std::vector<Entry, AllocEntry>;
     uint64_t n_;
     vector_u32 levels_;
     Container entries_;
@@ -45,7 +53,7 @@ class kll_quantile_calculator {
     T approximately_answer_positional_query(uint64_t pos) const;
     void convert_to_preceding_cummulative();
     uint32_t chunk_containing_pos(uint64_t pos) const;
-    uint32_t search_for_chunk_containing_pos(uint64_t pos, uint32_t l, uint32_t r) const;
+    uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
     static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
     static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
     static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);

data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp CHANGED Viewed

@@ -28,24 +28,38 @@
 namespace datasketches {
-template <typename T, typename C, typename A>
-kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator):
-n_(n), levels_(num_levels + 1, 0, allocator), entries_(allocator)
+template<typename T, typename C, typename A>
+template<typename S>
+kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
+n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
 {
-  const uint32_t num_items = levels[num_levels] - levels[0];
-  entries_.reserve(num_items);
-  populate_from_sketch(items, levels, num_levels);
-  merge_sorted_blocks(entries_, levels_.data(), levels_.size() - 1, num_items);
-  if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
-  convert_to_preceding_cummulative();
+  const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
+  if (num_items > 0) {
+    entries_.reserve(num_items);
+    populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
+    if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
+    merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
+    if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
+    convert_to_preceding_cummulative();
+  }
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
   return approximately_answer_positional_query(pos_of_phi(fraction, n_));
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
+auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
+  return entries_.begin();
+}
+template<typename T, typename C, typename A>
+auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
+  return entries_.end();
+}
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
   size_t src_level = 0;
   size_t dst_level = 0;
@@ -68,7 +82,7 @@ void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, cons
   if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
   if (pos >= n_) throw std::logic_error("position out of range");
   const uint32_t num_items = levels_[levels_.size() - 1];
@@ -77,7 +91,7 @@ T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64
   return entries_[index].first;
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
   uint64_t subtotal = 0;
   for (auto& entry: entries_) {
@@ -87,13 +101,13 @@ void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
   }
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
-  const uint64_t pos = std::floor(phi * n);
+  const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
   return (pos == n) ? n - 1 : pos;
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
   if (entries_.size() < 1) throw std::logic_error("array too short");
   if (pos < entries_[0].second) throw std::logic_error("position too small");
@@ -101,19 +115,19 @@ uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) co
   return search_for_chunk_containing_pos(pos, 0, entries_.size());
 }
-template <typename T, typename C, typename A>
-uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint32_t l, uint32_t r) const {
+template<typename T, typename C, typename A>
+uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
   if (l + 1 == r) {
-    return l;
+    return static_cast<uint32_t>(l);
   }
-  const uint32_t m(l + (r - l) / 2);
+  const uint64_t m = l + (r - l) / 2;
   if (entries_[m].second <= pos) {
     return search_for_chunk_containing_pos(pos, m, r);
   }
   return search_for_chunk_containing_pos(pos, l, m);
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
   if (num_levels == 1) return;
   Container temporary(entries.get_allocator());
@@ -121,7 +135,7 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, c
   merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
     uint8_t starting_level, uint8_t num_levels) {
   if (num_levels == 1) return;
@@ -129,10 +143,11 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& ori
   const uint8_t num_levels_2 = num_levels - num_levels_1;
   const uint8_t starting_level_1 = starting_level;
   const uint8_t starting_level_2 = starting_level + num_levels_1;
-  const auto chunk_begin = temp.begin() + temp.size();
+  const auto initial_size = temp.size();
   merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
   merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
   const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
+  const auto chunk_begin = temp.begin() + initial_size;
   std::merge(
     std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
     std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
@@ -141,7 +156,7 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& ori
   temp.erase(chunk_begin, temp.end());
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
     uint8_t starting_level, uint8_t num_levels) {
   if (num_levels == 1) {

data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp CHANGED Viewed

@@ -153,15 +153,23 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
 template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
 template<typename A> using vector_d = std::vector<double, AllocD<A>>;
+namespace kll_constants {
+  const uint16_t DEFAULT_K = 200;
+}
 template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
 class kll_sketch {
   public:
+    using value_type = T;
+    using comparator = C;
     static const uint8_t DEFAULT_M = 8;
-    static const uint16_t DEFAULT_K = 200;
+    // TODO: Redundant and deprecated. Will be remove din next major version.
+    static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
     static const uint16_t MIN_K = DEFAULT_M;
     static const uint16_t MAX_K = (1 << 16) - 1;
-    explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
+    explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
     kll_sketch(const kll_sketch& other);
     kll_sketch(kll_sketch&& other) noexcept;
     ~kll_sketch();
@@ -296,7 +304,7 @@ class kll_sketch {
      *
      * @return array of approximations to the given number of evenly-spaced fractional ranks.
      */
-    std::vector<T, A> get_quantiles(size_t num) const;
+    std::vector<T, A> get_quantiles(uint32_t num) const;
     /**
      * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
@@ -383,6 +391,33 @@ class kll_sketch {
     template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
     size_t get_serialized_size_bytes() const;
+    /**
+     * Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
+     * length. The resulting size is an overestimate to make sure actual sketches don't exceed it.
+     * This method can be used if allocation of storage is necessary beforehand, but it is not
+     * optimal.
+     * This method is for arithmetic types (integral and floating point)
+     * @param k parameter that controls size of the sketch and accuracy of estimates
+     * @param n stream length
+     * @return upper bound on the serialized size
+     */
+    template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
+    static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n);
+    /**
+     * Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
+     * length. The resulting size is an overestimate to make sure actual sketches don't exceed it.
+     * This method can be used if allocation of storage is necessary beforehand, but it is not
+     * optimal.
+     * This method is for all other non-arithmetic types, and it takes a max size of an item as input.
+     * @param k parameter that controls size of the sketch and accuracy of estimates
+     * @param n stream length
+     * @param max_item_size_bytes maximum size of an item in bytes
+     * @return upper bound on the serialized size
+     */
+    template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
+    static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes);
     /**
      * This method serializes the sketch into a given stream in a binary form
      * @param os output stream
@@ -391,7 +426,7 @@ class kll_sketch {
     // This is a convenience alias for users
     // The type returned by the following serialize method
-    typedef vector_u8<A> vector_bytes;
+    using vector_bytes = vector_u8<A>;
     /**
      * This method serializes the sketch as a vector of bytes.
@@ -480,6 +515,8 @@ class kll_sketch {
     T* max_value_;
     bool is_level_zero_sorted_;
+    friend class kll_quantile_calculator<T, C, A>;
     // for deserialization
     class item_deleter;
     class items_deleter;