RubyGems - datasketches - Versions diffs - 0.2.5 → 0.2.6 - Mend

datasketches 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
-  data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
+  metadata.gz: cf1ea0f9f2d12b0e46c2d4c7dec21f41992e711e73eca68ea1ef03a4bb711077
+  data.tar.gz: 92f56b63da0254962be47d8d3e00a6950a271053bf3152167f95e6fdb99528e6
 SHA512:
-  metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
-  data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
+  metadata.gz: 5841d4a70f1e852faa150f57ebfefc7b975de020782c41eebdad87a01d016be9bdf86f86173600632bf6f56300df0c9c4196251aa5df02a47ecd357ac844ef80
+  data.tar.gz: d6ae7c811e0e2c2008b912e29f86d1b99491c74cd878790dfd800811a007f0dbf9c49bb59db30345450ff82673381f2c036a84a57dc44a6f6751610d9be2ee88

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.2.6 (2022-07-13)
+- Updated DataSketches to 3.5.0
 ## 0.2.5 (2022-05-21)
 - Updated DataSketches to 3.4.0

data/ext/datasketches/kll_wrapper.cpp CHANGED Viewed

@@ -55,12 +55,12 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
       })
     .define_method(
       "pmf",
-      [](kll_sketch<T>& self, std::vector<T> split_points) {
+      [](kll_sketch<T>& self, const std::vector<T>& split_points) {
         return self.get_PMF(&split_points[0], split_points.size());
       })
     .define_method(
       "cdf",
-      [](kll_sketch<T>& self, std::vector<T> split_points) {
+      [](kll_sketch<T>& self, const std::vector<T>& split_points) {
         return self.get_CDF(&split_points[0], split_points.size());
       })
     .define_method(

data/lib/datasketches/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module DataSketches
-  VERSION = "0.2.5"
+  VERSION = "0.2.6"
 end

data/vendor/datasketches-cpp/CMakeLists.txt CHANGED Viewed

@@ -17,7 +17,7 @@
 cmake_minimum_required(VERSION 3.16.0)
 project(DataSketches
-        VERSION 3.4.0
+        VERSION 3.5.0
         LANGUAGES CXX)
 include(GNUInstallDirs)

data/vendor/datasketches-cpp/NOTICE CHANGED Viewed

@@ -1,11 +1,12 @@
-Apache DataSketches-cpp
-Copyright 2020-2021 The Apache Software Foundation
+Apache DataSketches C++ and Python
+Copyright 2022 The Apache Software Foundation
-Copyright 2015-2018 Yahoo
-Copyright 2019 Verizon Media
+Copyright 2015-2018 Yahoo Inc.
+Copyright 2019-2020 Verizon Media
+Copyright 2021 Yahoo Inc.
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 Prior to moving to ASF, the software for this project was developed at
-Yahoo (now Verizon Media) (https://developer.yahoo.com).
+Yahoo Inc. (https://developer.yahoo.com).

data/vendor/datasketches-cpp/common/CMakeLists.txt CHANGED Viewed

@@ -43,8 +43,8 @@ install(FILES
 			include/conditional_forward.hpp
 			include/ceiling_power_of_2.hpp
 			include/bounds_binomial_proportions.hpp
-			include/kolmogorov_smirnov.hpp
-			include/kolmogorov_smirnov_impl.hpp
 			include/quantile_sketch_sorted_view.hpp
 			include/quantile_sketch_sorted_view_impl.hpp
+			include/kolmogorov_smirnov.hpp
+			include/kolmogorov_smirnov_impl.hpp
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")

data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp CHANGED Viewed

@@ -297,6 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
     // changes the implied ordering of the pairs, so we must do it before sorting.
     const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
+    if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
     const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
     const uint8_t offset = source.window_offset;
@@ -333,7 +334,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
         lg_k, source.table_data.get_allocator());
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
-    if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
+    if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
     const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
     uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);

data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp CHANGED Viewed

@@ -230,7 +230,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
       // move level over as is
       // make sure we are not moving data upwards
       if (raw_beg < out_levels[current_level]) throw std::logic_error("wrong move");
-      std::move(&items[raw_beg], &items[raw_lim], &items[out_levels[current_level]]);
+      std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
       out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
     } else {
       // The sketch is too full AND this level is too full, so we compact it
@@ -251,7 +251,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
       // level zero might not be sorted, so we must sort it if we wish to compact it
       if ((current_level == 0) && !is_level_zero_sorted) {
-        std::sort(&items[adj_beg], &items[adj_beg + adj_pop], C());
+        std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
       }
       if (pop_above == 0) { // Level above is empty, so halve up

data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp CHANGED Viewed

@@ -170,7 +170,7 @@ class kll_sketch {
     using comparator = C;
     static const uint8_t DEFAULT_M = 8;
-    // TODO: Redundant and deprecated. Will be remove din next major version.
+    // TODO: Redundant and deprecated. Will be removed in next major version.
     static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
     static const uint16_t MIN_K = DEFAULT_M;
     static const uint16_t MAX_K = (1 << 16) - 1;
@@ -182,6 +182,14 @@ class kll_sketch {
     kll_sketch& operator=(const kll_sketch& other);
     kll_sketch& operator=(kll_sketch&& other);
+    /*
+     * Type converting constructor.
+     * @param other sketch of a different type
+     * @param allocator instance of an Allocator
+     */
+    template<typename TT, typename CC, typename SS, typename AA>
+    explicit kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator = A());
     /**
      * Updates this sketch with the given data item.
      * @param value an item from a stream of items
@@ -390,7 +398,7 @@ class kll_sketch {
     /**
      * Computes size needed to serialize the current state of the sketch.
      * This version is for fixed-size arithmetic types (integral and floating point).
-     * @param instance of a SerDe
+     * @param serde instance of a SerDe
      * @return size in bytes needed to serialize this sketch
      */
     template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
@@ -399,7 +407,7 @@ class kll_sketch {
     /**
      * Computes size needed to serialize the current state of the sketch.
      * This version is for all other types and can be expensive since every item needs to be looked at.
-     * @param instance of a SerDe
+     * @param serde instance of a SerDe
      * @return size in bytes needed to serialize this sketch
      */
     template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
@@ -459,7 +467,7 @@ class kll_sketch {
     /**
      * This method deserializes a sketch from a given stream.
      * @param is input stream
-     * @param instance of an Allocator
+     * @param allocator instance of an Allocator
      * @return an instance of a sketch
      *
      * Deprecated, to be removed in the next major version
@@ -469,8 +477,8 @@ class kll_sketch {
     /**
      * This method deserializes a sketch from a given stream.
      * @param is input stream
-     * @param instance of a SerDe
-     * @param instance of an Allocator
+     * @param serde instance of a SerDe
+     * @param allocator instance of an Allocator
      * @return an instance of a sketch
      */
     template<typename SerDe = S>
@@ -480,7 +488,7 @@ class kll_sketch {
      * This method deserializes a sketch from a given array of bytes.
      * @param bytes pointer to the array of bytes
      * @param size the size of the array
-     * @param instance of an Allocator
+     * @param allocator instance of an Allocator
      * @return an instance of a sketch
      *
      * Deprecated, to be removed in the next major version
@@ -491,8 +499,8 @@ class kll_sketch {
      * This method deserializes a sketch from a given array of bytes.
      * @param bytes pointer to the array of bytes
      * @param size the size of the array
-     * @param instance of a SerDe
-     * @param instance of an Allocator
+     * @param serde instance of a SerDe
+     * @param allocator instance of an Allocator
      * @return an instance of a sketch
      */
     template<typename SerDe = S>
@@ -606,6 +614,8 @@ class kll_sketch {
     static void check_serial_version(uint8_t serial_version);
     static void check_family_id(uint8_t family_id);
+    void check_sorting() const;
     // implementations for floating point types
     template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
     static const TT& get_invalid_value() {
@@ -629,6 +639,9 @@ class kll_sketch {
       return true;
     }
+    // for type converting constructor
+    template<typename TT, typename CC, typename SS, typename AA>
+    friend class kll_sketch;
 };
 template<typename T, typename C, typename S, typename A>

data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp CHANGED Viewed

@@ -26,6 +26,7 @@
 #include <stdexcept>
 #include "conditional_forward.hpp"
+#include "count_zeros.hpp"
 #include "memory_operations.hpp"
 #include "kll_helper.hpp"
@@ -69,7 +70,7 @@ max_value_(nullptr),
 is_level_zero_sorted_(other.is_level_zero_sorted_)
 {
   items_ = allocator_.allocate(items_size_);
-  std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
+  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
   if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
   if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
 }
@@ -147,6 +148,33 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
   }
 }
+template<typename T, typename C, typename S, typename A>
+template<typename TT, typename CC, typename SS, typename AA>
+kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
+allocator_(allocator),
+k_(other.k_),
+m_(other.m_),
+min_k_(other.min_k_),
+n_(other.n_),
+num_levels_(other.num_levels_),
+levels_(other.levels_, allocator_),
+items_(nullptr),
+items_size_(other.items_size_),
+min_value_(nullptr),
+max_value_(nullptr),
+is_level_zero_sorted_(other.is_level_zero_sorted_)
+{
+  static_assert(
+    std::is_constructible<T, TT>::value,
+    "Type converting constructor requires new type to be constructible from existing type"
+  );
+  items_ = allocator_.allocate(items_size_);
+  for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
+  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
+  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
+  check_sorting();
+}
 template<typename T, typename C, typename S, typename A>
 template<typename FwdT>
 void kll_sketch<T, C, S, A>::update(FwdT&& value) {
@@ -305,8 +333,8 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
   uint64_t weight = 1;
   uint64_t total = 0;
   while (level < num_levels_) {
-    const auto from_index(levels_[level]);
-    const auto to_index(levels_[level + 1]); // exclusive
+    const auto from_index = levels_[level];
+    const auto to_index = levels_[level + 1]; // exclusive
     for (uint32_t i = from_index; i < to_index; i++) {
       if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
         total += weight;
@@ -694,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
   // level zero might not be sorted, so we must sort it if we wish to compact it
   // sort_level_zero() is not used here because of the adjustment for odd number of items
   if ((level == 0) && !is_level_zero_sorted_) {
-    std::sort(&items_[adj_beg], &items_[adj_beg + adj_pop], C());
+    std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
   }
   if (pop_above == 0) {
     kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -717,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
   // so that the freed-up space can be used by level zero
   if (level > 0) {
     const uint32_t amount = raw_beg - levels_[0];
-    std::move_backward(&items_[levels_[0]], &items_[levels_[0] + amount], &items_[levels_[0] + half_adj_pop + amount]);
+    std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
     for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
   }
   for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
@@ -775,22 +803,32 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
 template<typename T, typename C, typename S, typename A>
 void kll_sketch<T, C, S, A>::sort_level_zero() {
   if (!is_level_zero_sorted_) {
-    std::sort(&items_[levels_[0]], &items_[levels_[1]], C());
+    std::sort(items_ + levels_[0], items_ + levels_[1], C());
     is_level_zero_sorted_ = true;
   }
 }
+template<typename T, typename C, typename S, typename A>
+void kll_sketch<T, C, S, A>::check_sorting() const {
+  // not checking level 0
+  for (uint8_t level = 1; level < num_levels_; ++level) {
+    const auto from = items_ + levels_[level];
+    const auto to = items_ + levels_[level + 1];
+    if (!std::is_sorted(from, to, C())) {
+      throw std::logic_error("levels must be sorted");
+    }
+  }
+}
 template<typename T, typename C, typename S, typename A>
 template<bool inclusive>
 quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
   const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
   quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
-  uint8_t level = 0;
-  while (level < num_levels_) {
+  for (uint8_t level = 0; level < num_levels_; ++level) {
     const auto from = items_ + levels_[level];
     const auto to = items_ + levels_[level + 1]; // exclusive
     view.add(from, to, 1 << level);
-    ++level;
   }
   if (cumulative) view.template convert_to_cummulative<inclusive>();
   return view;

data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp CHANGED Viewed

@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
 #endif
 // typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
-typedef kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>> kll_float_sketch;
+using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
 // let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
-typedef kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>> kll_string_sketch;
+using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
 TEST_CASE("kll sketch", "[kll_sketch]") {
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
       (void) it; // to suppress "unused" warning
       FAIL("should be no iterations over an empty sketch");
     }
-}
+  }
   SECTION("get bad quantile") {
     kll_float_sketch sketch(200, 0);
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
       REQUIRE((*it).second == 3);
     }
   }
-  // cleanup
-  if (test_allocator_total_bytes != 0) {
-    REQUIRE(test_allocator_total_bytes == 0);
+  SECTION("type conversion: empty") {
+    kll_sketch<double> kll_double;
+    kll_sketch<float> kll_float(kll_double);
+    REQUIRE(kll_float.is_empty());
+    REQUIRE(kll_float.get_k() == kll_double.get_k());
+    REQUIRE(kll_float.get_n() == 0);
+    REQUIRE(kll_float.get_num_retained() == 0);
+  }
+  SECTION("type conversion: over k") {
+    kll_sketch<double> kll_double;
+    for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
+    kll_sketch<float> kll_float(kll_double);
+    REQUIRE(!kll_float.is_empty());
+    REQUIRE(kll_float.get_k() == kll_double.get_k());
+    REQUIRE(kll_float.get_n() == kll_double.get_n());
+    REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
+    auto sv_float = kll_float.get_sorted_view(false);
+    auto sv_double = kll_double.get_sorted_view(false);
+    auto sv_float_it = sv_float.begin();
+    auto sv_double_it = sv_double.begin();
+    while (sv_float_it != sv_float.end()) {
+      REQUIRE(sv_double_it != sv_double.end());
+      auto float_pair = *sv_float_it;
+      auto double_pair = *sv_double_it;
+      REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
+      REQUIRE(float_pair.second == double_pair.second);
+      ++sv_float_it;
+      ++sv_double_it;
+    }
+    REQUIRE(sv_double_it == sv_double.end());
+  }
+  class A {
+    int val;
+  public:
+    A(int val): val(val) {}
+    int get_val() const { return val; }
+  };
+  struct less_A {
+    bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
+  };
+  class B {
+    int val;
+  public:
+    explicit B(const A& a): val(a.get_val()) {}
+    int get_val() const { return val; }
+  };
+  struct less_B {
+    bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
+  };
+  SECTION("type conversion: custom types") {
+    kll_sketch<A, less_A> sa;
+    sa.update(1);
+    sa.update(2);
+    sa.update(3);
+    kll_sketch<B, less_B> sb(sa);
+    REQUIRE(sb.get_n() == 3);
   }
+  // cleanup
+  REQUIRE(test_allocator_total_bytes == 0);
 }
 } /* namespace datasketches */

data/vendor/datasketches-cpp/python/README.md CHANGED Viewed

@@ -12,16 +12,18 @@ This package provides a variety of sketches as described below. Wherever a speci
 ## Building and Installation
-Once cloned, the library can be installed by running `python -m pip install .` in the project root directory, which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
+Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
-If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
+If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
-The library is also available from PyPI via `python -m pip install datasketches`.
+The library is also available from PyPI via `python3 -m pip install datasketches`.
 ## Usage
 Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
+The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
 ## Available Sketch Classes
 - KLL (Absolute Error Quantiles)
@@ -74,12 +76,7 @@ The only developer-specific instructions relate to running unit tests.
 ### Unit tests
-The Python unit tests are run with `tox`. To ensure you have all the needed package, from the package base directory run:
-```bash
-python -m pip install --upgrade tox
-tox
-```
+The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run teh unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
 ## License

data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp CHANGED Viewed

@@ -151,6 +151,7 @@ template <typename T,
 class quantiles_sketch {
 public:
   using value_type = T;
+  using allocator_type = Allocator;
   using comparator = Comparator;
   using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
@@ -161,6 +162,14 @@ public:
   quantiles_sketch& operator=(const quantiles_sketch& other);
   quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
+  /**
+   * @brief Type converting constructor
+   * @param other quantiles sketch of a different type
+   * @param allocator instance of an Allocator
+   */
+  template<typename From, typename FC, typename FA>
+  explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());
   /**
    * Updates this sketch with the given data item.
    * @param value an item from a stream of items
@@ -227,6 +236,12 @@ public:
    */
   Comparator get_comparator() const;
+  /**
+   * Returns the allocator for this sketch.
+   * @return allocator
+   */
+  allocator_type get_allocator() const;
   /**
    * Returns an approximation to the value of the data item
    * that would be preceded by the given fraction of a hypothetical sorted

data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp CHANGED Viewed

@@ -138,6 +138,65 @@ is_sorted_(is_sorted)
     throw std::logic_error("Item count does not match value computed from k, n");
 }
+template<typename T, typename C, typename A>
+template<typename From, typename FC, typename FA>
+quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
+allocator_(allocator),
+k_(other.get_k()),
+n_(other.get_n()),
+bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
+base_buffer_(allocator),
+levels_(allocator),
+min_value_(nullptr),
+max_value_(nullptr),
+is_sorted_(false)
+{
+  static_assert(std::is_constructible<T, From>::value,
+                "Type converting constructor requires new type to be constructible from existing type");
+  base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
+  if (!other.is_empty()) {
+    min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
+    max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
+    // reserve space in levels
+    const uint8_t num_levels = compute_levels_needed(k_, n_);
+    levels_.reserve(num_levels);
+    for (int i = 0; i < num_levels; ++i) {
+      Level level(allocator);
+      level.reserve(k_);
+      levels_.push_back(std::move(level));
+    }
+    // iterate through points, assigning to the correct level as needed
+    for (auto pair : other) {
+      const uint64_t wt = pair.second;
+      if (wt == 1) {
+        base_buffer_.push_back(T(pair.first));
+        // resize where needed as if adding points via update()
+        if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
+          const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
+          base_buffer_.reserve(new_size);
+        }
+      }
+      else {
+        const uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
+        levels_[idx].push_back(T(pair.first));
+      }
+    }
+    // validate that ordering within each level is preserved
+    // base_buffer_ can be considered unsorted for this purpose
+    for (int i = 0; i < num_levels; ++i) {
+      if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
+        throw std::logic_error("Copy construction across types produces invalid sorting");
+      }
+    }
+  }
+}
 template<typename T, typename C, typename A>
 quantiles_sketch<T, C, A>::~quantiles_sketch() {
   if (min_value_ != nullptr) {
@@ -238,7 +297,7 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
   );
   write(os, flags_byte);
   write(os, k_);
-  uint16_t unused = 0;
+  const uint16_t unused = 0;
   write(os, unused);
   if (!is_empty()) {
@@ -624,6 +683,11 @@ C quantiles_sketch<T, C, A>::get_comparator() const {
   return C();
 }
+template<typename T, typename C, typename A>
+A quantiles_sketch<T, C, A>::get_allocator() const {
+  return allocator_;
+}
 // implementation for fixed-size arithmetic types (integral and floating point)
 template<typename T, typename C, typename A>
 template<typename SerDe, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
@@ -783,9 +847,9 @@ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) co
 template<typename T, typename C, typename A>
 uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
-  uint32_t bb_count = compute_base_buffer_items(k, n);
-  uint64_t bit_pattern = compute_bit_pattern(k, n);
-  uint32_t valid_levels = compute_valid_levels(bit_pattern);
+  const uint32_t bb_count = compute_base_buffer_items(k, n);
+  const uint64_t bit_pattern = compute_bit_pattern(k, n);
+  const uint32_t valid_levels = compute_valid_levels(bit_pattern);
   return bb_count + (k * valid_levels);
 }
@@ -843,11 +907,11 @@ void quantiles_sketch<T, C, A>::check_family_id(uint8_t family_id) {
 template<typename T, typename C, typename A>
 void quantiles_sketch<T, C, A>::check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version) {
-  bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
-  bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
+  const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
+  const bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
-  uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
-               + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
+  const uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
+                     + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
   bool valid = true;
   switch (sw) { // exhaustive list and description of all valid cases
@@ -888,7 +952,7 @@ typename quantiles_sketch<T, C, A>::const_iterator quantiles_sketch<T, C, A>::en
 template<typename T, typename C, typename A>
 void quantiles_sketch<T, C, A>::grow_base_buffer() {
-  size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
+  const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
   base_buffer_.reserve(new_size);
 }
@@ -912,7 +976,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
 template<typename T, typename C, typename A>
 bool quantiles_sketch<T, C, A>::grow_levels_if_needed() {
-  uint8_t levels_needed = compute_levels_needed(k_, n_);
+  const uint8_t levels_needed = compute_levels_needed(k_, n_);
   if (levels_needed == 0)
     return false; // don't need levels and might have small base buffer. Possible during merges.
@@ -992,7 +1056,7 @@ template<typename FwdV>
 void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride) {
   // Random offset in range [0, stride)
   std::uniform_int_distribution<uint16_t> dist(0, stride - 1);
-  uint16_t rand_offset = dist(random_utils::rand);
+  const uint16_t rand_offset = dist(random_utils::rand);
   if ((buf_in.size() != stride * buf_out.capacity())
     || (buf_out.size() > 0)) {
@@ -1000,7 +1064,7 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
         "stride*buf_out.capacity() and empty buf_out");
   }
-  size_t k = buf_out.capacity();
+  const size_t k = buf_out.capacity();
   for (uint16_t i = rand_offset, o = 0; o < k; i += stride, ++o) {
     buf_out.push_back(conditional_forward<FwdV>(buf_in[i]));
   }
@@ -1117,7 +1181,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
   const uint16_t downsample_factor = src.get_k() / tgt.get_k();
   const uint8_t lg_sample_factor = count_trailing_zeros_in_u32(downsample_factor);
-  uint64_t new_n = src.get_n() + tgt.get_n();
+  const uint64_t new_n = src.get_n() + tgt.get_n();
   // move items from src's base buffer
   for (uint16_t i = 0; i < src.base_buffer_.size(); ++i) {
@@ -1125,7 +1189,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
   }
   // check (after moving raw items) if we need to extend levels array
-  uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
+  const uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
   if (levels_needed > tgt.levels_.size()) {
     tgt.levels_.reserve(levels_needed);
     while (tgt.levels_.size() < levels_needed) {

data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp CHANGED Viewed

@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
   const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
   REQUIRE(delta == Approx(0.02).margin(0.01));
   const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
-  std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
   REQUIRE_FALSE(delta > threshold);
   REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
 }
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
   const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
   REQUIRE(delta == Approx(0.02).margin(0.01));
   const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
-  std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
   REQUIRE(delta > threshold);
   REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
 }

data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp CHANGED Viewed

@@ -903,6 +903,69 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
     }
   }
+  SECTION("Type converting copy constructor") {
+    const uint16_t k = 8;
+    const int n = 403;
+    quantiles_sketch<double> sk_double(k);
+    quantiles_sketch<float> sk_float(k, sk_double.get_allocator());
+    REQUIRE(sk_float.is_empty());
+    for (int i = 0; i < n; ++i) sk_double.update(i + .01);
+    quantiles_sketch<int> sk_int(sk_double);
+    REQUIRE(sk_double.get_n() == sk_int.get_n());
+    REQUIRE(sk_double.get_k() == sk_int.get_k());
+    REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
+    auto sv_double = sk_double.get_sorted_view(false);
+    std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
+    auto sv_int = sk_int.get_sorted_view(false);
+    std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
+    REQUIRE(vec_double.size() == vec_int.size());
+    for (size_t i = 0; i < vec_int.size(); ++i) {
+      // known truncation with conversion so approximate result
+      REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
+      // exact equality for weights
+      REQUIRE(vec_double[i].second == vec_int[i].second);
+    }
+  }
+  class A {
+    int val;
+  public:
+    A(int val): val(val) {}
+    int get_val() const { return val; }
+  };
+  struct less_A {
+    bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
+  };
+  class B {
+    int val;
+  public:
+    explicit B(const A& a): val(a.get_val()) {}
+    int get_val() const { return val; }
+  };
+  struct less_B {
+    bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
+  };
+  SECTION("type conversion: custom types") {
+    quantiles_sketch<A, less_A> sa;
+    sa.update(1);
+    sa.update(2);
+    sa.update(3);
+    quantiles_sketch<B, less_B> sb(sa);
+    REQUIRE(sb.get_n() == 3);
+  }
   // cleanup
   if (test_allocator_total_bytes != 0) {
     REQUIRE(test_allocator_total_bytes == 0);

data/vendor/datasketches-cpp/req/include/req_compactor.hpp CHANGED Viewed

@@ -38,6 +38,9 @@ public:
   req_compactor& operator=(const req_compactor& other);
   req_compactor& operator=(req_compactor&& other);
+  template<typename TT, typename CC, typename AA>
+  req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
   bool is_sorted() const;
   uint32_t get_num_items() const;
   uint32_t get_nom_capacity() const;
@@ -128,6 +131,9 @@ private:
   template<typename S>
   static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
+  // for type converting constructor
+  template<typename TT, typename CC, typename AA>
+  friend class req_compactor;
 };
 } /* namespace datasketches */

data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp CHANGED Viewed

@@ -132,6 +132,33 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other)
   return *this;
 }
+template<typename T, typename C, typename A>
+template<typename TT, typename CC, typename AA>
+req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const A& allocator):
+allocator_(allocator),
+lg_weight_(other.lg_weight_),
+hra_(other.hra_),
+coin_(other.coin_),
+sorted_(other.sorted_),
+section_size_raw_(other.section_size_raw_),
+section_size_(other.section_size_),
+num_sections_(other.num_sections_),
+state_(other.state_),
+num_items_(other.num_items_),
+capacity_(other.capacity_),
+items_(nullptr)
+{
+  if (other.items_ != nullptr) {
+    items_ = allocator_.allocate(capacity_);
+    const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
+    const uint32_t to = hra_ ? capacity_ : num_items_;
+    for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
+    if (sorted_ && !std::is_sorted(items_ + from, items_ + to, C())) {
+      throw std::logic_error("items must be sorted");
+    }
+  }
+}
 template<typename T, typename C, typename A>
 bool req_compactor<T, C, A>::is_sorted() const {
   return sorted_;

data/vendor/datasketches-cpp/req/include/req_sketch.hpp CHANGED Viewed

@@ -58,6 +58,14 @@ public:
   req_sketch& operator=(const req_sketch& other);
   req_sketch& operator=(req_sketch&& other);
+  /*
+   * Type converting constructor.
+   * @param other sketch of a different type
+   * @param allocator instance of an Allocator
+   */
+  template<typename TT, typename CC, typename SS, typename AA>
+  explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
   /**
    * Returns configured parameter K
    * @return parameter K
@@ -408,6 +416,9 @@ private:
     }
   }
+  // for type converting constructor
+  template<typename TT, typename CC, typename SS, typename AA>
+  friend class req_sketch;
 };
 template<typename T, typename C, typename S, typename A>

data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp CHANGED Viewed

@@ -64,8 +64,8 @@ compactors_(other.compactors_),
 min_value_(nullptr),
 max_value_(nullptr)
 {
-  if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
-  if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
+  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
+  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
 }
 template<typename T, typename C, typename S, typename A>
@@ -113,6 +113,33 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
   return *this;
 }
+template<typename T, typename C, typename S, typename A>
+template<typename TT, typename CC, typename SS, typename AA>
+req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
+allocator_(allocator),
+k_(other.k_),
+hra_(other.hra_),
+max_nom_size_(other.max_nom_size_),
+num_retained_(other.num_retained_),
+n_(other.n_),
+compactors_(allocator),
+min_value_(nullptr),
+max_value_(nullptr)
+{
+  static_assert(
+    std::is_constructible<T, TT>::value,
+    "Type converting constructor requires new type to be constructible from existing type"
+  );
+  compactors_.reserve(other.compactors_.size());
+  for (const auto& compactor: other.compactors_) {
+    compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
+  }
+  if (!other.is_empty()) {
+    min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
+    max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
+  }
+}
 template<typename T, typename C, typename S, typename A>
 uint16_t req_sketch<T, C, S, A>::get_k() const {
   return k_;

data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp CHANGED Viewed

@@ -35,7 +35,7 @@ const std::string input_path = "test/";
 #endif
 TEST_CASE("req sketch: empty", "[req_sketch]") {
-  std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
+  //std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
   req_sketch<float> sketch(12);
   REQUIRE(sketch.get_k() == 12);
   REQUIRE(sketch.is_HRA());
@@ -245,7 +245,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
   auto bytes = sketch.serialize();
   REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
   auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
-  std::cout << sketch2.to_string(true);
+  //std::cout << sketch2.to_string(true);
   REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
   REQUIRE(sketch2.is_empty() == sketch.is_empty());
   REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -282,7 +282,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
   auto bytes = sketch.serialize();
   REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
   auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
-  std::cout << sketch2.to_string(true);
+  //std::cout << sketch2.to_string(true);
   REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
   REQUIRE(sketch2.is_empty() == sketch.is_empty());
   REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -485,6 +485,72 @@ TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
   REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
 }
+TEST_CASE("req sketch: type conversion - empty", "[req_sketch]") {
+  req_sketch<double> req_double(12);
+  req_sketch<float> req_float(req_double);
+  REQUIRE(req_float.is_empty());
+  REQUIRE(req_float.get_k() == req_double.get_k());
+  REQUIRE(req_float.get_n() == 0);
+  REQUIRE(req_float.get_num_retained() == 0);
+}
+TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
+  req_sketch<double> req_double(12);
+  for (int i = 0; i < 1000; ++i) req_double.update(static_cast<double>(i));
+  req_sketch<float> req_float(req_double);
+  REQUIRE(!req_float.is_empty());
+  REQUIRE(req_float.get_k() == req_double.get_k());
+  REQUIRE(req_float.get_n() == req_double.get_n());
+  REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
+  auto sv_float = req_float.get_sorted_view(false);
+  auto sv_double = req_double.get_sorted_view(false);
+  auto sv_float_it = sv_float.begin();
+  auto sv_double_it = sv_double.begin();
+  while (sv_float_it != sv_float.end()) {
+    REQUIRE(sv_double_it != sv_double.end());
+    auto float_pair = *sv_float_it;
+    auto double_pair = *sv_double_it;
+    REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
+    REQUIRE(float_pair.second == double_pair.second);
+    ++sv_float_it;
+    ++sv_double_it;
+  }
+  REQUIRE(sv_double_it == sv_double.end());
+}
+class A {
+    int val;
+  public:
+    A(int val): val(val) {}
+    int get_val() const { return val; }
+  };
+  struct less_A {
+    bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
+  };
+  class B {
+    int val;
+  public:
+    explicit B(const A& a): val(a.get_val()) {}
+    int get_val() const { return val; }
+  };
+  struct less_B {
+    bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
+  };
+TEST_CASE("req sketch: type conversion - custom types") {
+  req_sketch<A, less_A> sa(4);
+  sa.update(1);
+  sa.update(2);
+  sa.update(3);
+  req_sketch<B, less_B> sb(sa);
+  REQUIRE(sb.get_n() == 3);
+}
 //TEST_CASE("for manual comparison with Java") {
 //  req_sketch<float> sketch(12, false);
 //  for (size_t i = 0; i < 100000; ++i) sketch.update(i);

data/vendor/datasketches-cpp/setup.py CHANGED Viewed

@@ -81,7 +81,7 @@ class CMakeBuild(build_ext):
 setup(
     name='datasketches',
-    version='3.4.0',
+    version='3.5.0',
     author='Apache Software Foundation',
     author_email='dev@datasketches.apache.org',
     description='The Apache DataSketches Library for Python',

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: datasketches
 version: !ruby/object:Gem::Version
-  version: 0.2.5
+  version: 0.2.6
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-21 00:00:00.000000000 Z
+date: 2022-07-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rice