RubyGems - datasketches - Versions diffs - 0.3.1 → 0.3.2 - Mend

datasketches 0.3.1 → 0.3.2

Files changed (113) hide show

data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp ADDED Viewed

@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <cmath>
+#include <catch2/catch.hpp>
+#include <density_sketch.hpp>
+namespace datasketches {
+TEST_CASE("density sketch: empty", "[density_sketch]") {
+  density_sketch<float> sketch(10, 3);
+  REQUIRE(sketch.is_empty());
+  REQUIRE_THROWS_AS(sketch.get_estimate({0, 0, 0}), std::runtime_error);
+}
+TEST_CASE("density sketch: one item", "[density_sketch]") {
+  density_sketch<float> sketch(10, 3);
+  // dimension mismatch
+  REQUIRE_THROWS_AS(sketch.update(std::vector<float>({0, 0})), std::invalid_argument);
+  sketch.update(std::vector<float>({0, 0, 0}));
+  REQUIRE_FALSE(sketch.is_empty());
+  REQUIRE_FALSE(sketch.is_estimation_mode());
+  REQUIRE(sketch.get_estimate({0, 0, 0}) == 1);
+  REQUIRE(sketch.get_estimate({0.01, 0.01, 0.01}) > 0.95);
+  REQUIRE(sketch.get_estimate({1, 1, 1}) < 0.05);
+}
+TEST_CASE("density sketch: merge", "[density_sketch]") {
+  density_sketch<float> sketch1(10, 4);
+  sketch1.update(std::vector<float>({0, 0, 0, 0}));
+  sketch1.update(std::vector<float>({1, 2, 3, 4}));
+  density_sketch<float> sketch2(10, 4);
+  sketch2.update(std::vector<float>({5, 6, 7, 8}));
+  sketch1.merge(sketch2);
+  REQUIRE(sketch1.get_n() == 3);
+  REQUIRE(sketch1.get_num_retained() == 3);
+}
+TEST_CASE("density sketch: iterator", "[density_sketch]") {
+  density_sketch<float> sketch(10, 3);
+  unsigned n = 1000;
+  for (unsigned i = 1; i <= n; ++i) sketch.update(std::vector<float>(3, i));
+  REQUIRE(sketch.get_n() == n);
+  REQUIRE(sketch.is_estimation_mode());
+  //std::cout << sketch.to_string(true, true);
+  unsigned count = 0;
+  for (auto pair: sketch) {
+    ++count;
+    // just to assert something about the output
+    REQUIRE(pair.first.size() == sketch.get_dim());
+  }
+  REQUIRE(count == sketch.get_num_retained());
+}
+// spherical kernel for testing, returns 1 for vectors within radius and 0 otherwise
+template<typename T>
+struct spherical_kernel {
+  spherical_kernel(T radius = 1.0) : _radius_squared(radius * radius) {}
+  T operator()(const std::vector<T>& v1, const std::vector<T>& v2) const {
+    return std::inner_product(v1.begin(), v1.end(), v2.begin(), 0.0, std::plus<T>(), [](T a, T b){return (a-b)*(a-b);}) <= _radius_squared ? 1.0 : 0.0;
+  }
+  private:
+    T _radius_squared;
+};
+TEST_CASE("custom kernel", "[density_sketch]") {
+  density_sketch<float, spherical_kernel<float>> sketch(10, 3, spherical_kernel<float>(0.5));
+  // update with (1,1,1) and test points inside and outside the kernel
+  sketch.update(std::vector<float>(3, 1.0));
+  REQUIRE(sketch.get_estimate(std::vector<float>(3, 1.001)) == 1.0);
+  REQUIRE(sketch.get_estimate(std::vector<float>(3, 2.0)) == 0.0);
+  // rest of test follows iterator test above
+  unsigned n = 1000;
+  for (unsigned i = 2; i <= n; ++i) sketch.update(std::vector<float>(3, i));
+  REQUIRE(sketch.get_n() == n);
+  REQUIRE(sketch.is_estimation_mode());
+  unsigned count = 0;
+  for (auto pair: sketch) {
+    ++count;
+    // just to assert something about the output
+    REQUIRE(pair.first.size() == sketch.get_dim());
+  }
+  REQUIRE(count == sketch.get_num_retained());
+}
+TEST_CASE("serialize empty", "[density_sketch]") {
+  density_sketch<double> sk(10, 2);
+  auto bytes = sk.serialize();
+  auto sk2 = density_sketch<double>::deserialize(bytes.data(), bytes.size());
+  REQUIRE(sk2.is_empty());
+  REQUIRE(!sk2.is_estimation_mode());
+  REQUIRE(sk.get_k() == sk2.get_k());
+  REQUIRE(sk.get_dim() == sk2.get_dim());
+  REQUIRE(sk.get_n() == sk2.get_n());
+  REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  sk.serialize(s);
+  auto sk3 = density_sketch<double>::deserialize(s);
+  REQUIRE(sk3.is_empty());
+  REQUIRE(!sk3.is_estimation_mode());
+  REQUIRE(sk.get_k() == sk3.get_k());
+  REQUIRE(sk.get_dim() == sk3.get_dim());
+  REQUIRE(sk.get_n() == sk3.get_n());
+  REQUIRE(sk.get_num_retained() == sk3.get_num_retained());
+}
+TEST_CASE("serialize bytes", "[density_sketch]") {
+  uint16_t k = 10;
+  uint32_t dim = 3;
+  density_sketch<double> sk(k, dim);
+  for (uint16_t i = 0; i < k; ++i) {
+    double val = static_cast<double>(i);
+    sk.update(std::vector<double>({val, std::sqrt(val), -val}));
+  }
+  REQUIRE(!sk.is_estimation_mode());
+  // exact mode
+  auto bytes = sk.serialize();
+  auto sk2 = density_sketch<double>::deserialize(bytes.data(), bytes.size());
+  REQUIRE(!sk2.is_empty());
+  REQUIRE(!sk2.is_estimation_mode());
+  REQUIRE(sk.get_k() == sk2.get_k());
+  REQUIRE(sk.get_dim() == sk2.get_dim());
+  REQUIRE(sk.get_n() == sk2.get_n());
+  REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
+  auto it1 = sk.begin();
+  auto it2 = sk2.begin();
+  while (it1 != sk.end()) {
+    REQUIRE(it1->first[0] == it2->first[0]);
+    REQUIRE(it1->second == it2->second);
+    ++it1;
+    ++it2;
+  }
+  // estimation mode
+  size_t n = 1031;
+  for (uint32_t i = k; i < n; ++i) {
+    double val = static_cast<double>(i);
+    sk.update(std::vector<double>({val, std::sqrt(val), -val}));
+  }
+  REQUIRE(sk.is_estimation_mode());
+  bytes = sk.serialize();
+  sk2 = density_sketch<double>::deserialize(bytes.data(), bytes.size());
+  REQUIRE(!sk2.is_empty());
+  REQUIRE(sk2.is_estimation_mode());
+  REQUIRE(sk.get_k() == sk2.get_k());
+  REQUIRE(sk.get_dim() == sk2.get_dim());
+  REQUIRE(sk.get_n() == sk2.get_n());
+  REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
+  it1 = sk.begin();
+  it2 = sk2.begin();
+  while (it1 != sk.end()) {
+    REQUIRE(it1->first[0] == it2->first[0]);
+    REQUIRE(it1->second == it2->second);
+    ++it1;
+    ++it2;
+  }
+}
+TEST_CASE("serialize stream", "[density_sketch]") {
+  uint16_t k = 10;
+  uint32_t dim = 3;
+  density_sketch<float> sk(k, dim);
+  for (uint16_t i = 0; i < k; ++i) {
+    float val = static_cast<float>(i);
+    sk.update(std::vector<float>({val, std::sin(val), std::cos(val)}));
+  }
+  REQUIRE(!sk.is_estimation_mode());
+  // exact mode
+  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+  sk.serialize(s);
+  auto sk2 = density_sketch<float>::deserialize(s);
+  REQUIRE(!sk2.is_empty());
+  REQUIRE(!sk2.is_estimation_mode());
+  REQUIRE(sk.get_k() == sk2.get_k());
+  REQUIRE(sk.get_dim() == sk2.get_dim());
+  REQUIRE(sk.get_n() == sk2.get_n());
+  REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
+  auto it1 = sk.begin();
+  auto it2 = sk2.begin();
+  while (it1 != sk.end()) {
+    REQUIRE(it1->first[0] == it2->first[0]);
+    REQUIRE(it1->second == it2->second);
+    ++it1;
+    ++it2;
+  }
+  // estimation mode
+  size_t n = 1031;
+  for (uint32_t i = k; i < n; ++i) {
+    float val = static_cast<float>(i);
+    sk.update(std::vector<float>({val, std::sqrt(val), -val}));
+  }
+  REQUIRE(sk.is_estimation_mode());
+  std::stringstream s2(std::ios::in | std::ios::out | std::ios::binary);
+  sk.serialize(s2);
+  sk2 = density_sketch<float>::deserialize(s2);
+  REQUIRE(!sk2.is_empty());
+  REQUIRE(sk2.is_estimation_mode());
+  REQUIRE(sk.get_k() == sk2.get_k());
+  REQUIRE(sk.get_dim() == sk2.get_dim());
+  REQUIRE(sk.get_n() == sk2.get_n());
+  REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
+  it1 = sk.begin();
+  it2 = sk2.begin();
+  while (it1 != sk.end()) {
+    REQUIRE(it1->first[0] == it2->first[0]);
+    REQUIRE(it1->second == it2->second);
+    ++it1;
+    ++it2;
+  }
+}
+} /* namespace datasketches */

data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp CHANGED Viewed

@@ -91,8 +91,14 @@ private:
 // This iterator uses strides based on golden ratio to avoid clustering during merge
 template<typename K, typename V, typename H, typename E, typename A>
-class reverse_purge_hash_map<K, V, H, E, A>::iterator: public std::iterator<std::input_iterator_tag, K> {
+class reverse_purge_hash_map<K, V, H, E, A>::iterator {
 public:
+  using iterator_category = std::input_iterator_tag;
+  using value_type = std::pair<K&, V>;
+  using difference_type = void;
+  using pointer = void;
+  using reference = const value_type;
   friend class reverse_purge_hash_map<K, V, H, E, A>;
   iterator& operator++() {
     ++count;
@@ -107,8 +113,8 @@ public:
   iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
   bool operator==(const iterator& rhs) const { return count == rhs.count; }
   bool operator!=(const iterator& rhs) const { return count != rhs.count; }
-  const std::pair<K&, V> operator*() const {
-    return std::pair<K&, V>(map->keys_[index], map->values_[index]);
+  reference operator*() const {
+    return value_type(map->keys_[index], map->values_[index]);
   }
 private:
   static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2

data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp CHANGED Viewed

@@ -51,6 +51,22 @@ Hll4Array<A>::Hll4Array(const Hll4Array<A>& that) :
   }
 }
+template<typename A>
+Hll4Array<A>::Hll4Array(const HllArray<A>& other) :
+  HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_4, other.isStartFullSize(), other.getAllocator()),
+  auxHashMap_(nullptr)
+{
+  const int numBytes = this->hll4ArrBytes(this->lgConfigK_);
+  this->hllByteArr_.resize(numBytes, 0);
+  this->oooFlag_ = other.isOutOfOrderFlag();
+  for (const auto coupon : other) { // all = false, so skip empty values
+    internalCouponUpdate(coupon); // updates KxQ registers
+  }
+  this->hipAccum_ = other.getHipAccum();
+  this->rebuild_kxq_curmin_ = false;
+}
 template<typename A>
 Hll4Array<A>::~Hll4Array() {
   // hllByteArr deleted in parent
@@ -114,10 +130,9 @@ uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const {
 }
 template<typename A>
-uint8_t Hll4Array<A>::get_value(uint32_t index) const {
-  const uint8_t value = getSlot(index);
+uint8_t Hll4Array<A>::adjustRawValue(uint32_t slot, uint8_t value) const {
   if (value != hll_constants::AUX_TOKEN) return value + this->curMin_;
-  return auxHashMap_->mustFindValueFor(index);
+  return auxHashMap_->mustFindValueFor(slot);
 }
 template<typename A>
@@ -210,7 +225,7 @@ void Hll4Array<A>::internalHll4Update(uint32_t slotNo, uint8_t newVal) {
       // we just increased a pair value, so it might be time to change curMin
       if (actualOldValue == this->curMin_) { // 908
-        this->decNumAtCurMin();
+        --(this->numAtCurMin_);
         while (this->numAtCurMin_ == 0) {
           shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table
           // shifts values in 4-bit table and recounts curMin
@@ -328,13 +343,6 @@ typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
       this->tgtHllType_, auxHashMap_, this->curMin_, false);
 }
-template<typename A>
-void Hll4Array<A>::mergeHll(const HllArray<A>& src) {
-  for (const auto coupon: src) {
-    internalCouponUpdate(coupon);
-  }
-}
 }
 #endif // _HLL4ARRAY_INTERNAL_HPP_

data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp CHANGED Viewed

@@ -25,14 +25,12 @@
 namespace datasketches {
-template<typename A>
-class Hll4Iterator;
 template<typename A>
 class Hll4Array final : public HllArray<A> {
   public:
     explicit Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
     explicit Hll4Array(const Hll4Array<A>& that);
+    explicit Hll4Array(const HllArray<A>& that);
     virtual ~Hll4Array();
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
@@ -41,13 +39,12 @@ class Hll4Array final : public HllArray<A> {
     inline uint8_t getSlot(uint32_t slotNo) const;
     inline void putSlot(uint32_t slotNo, uint8_t value);
-    inline uint8_t get_value(uint32_t index) const;
+    inline uint8_t adjustRawValue(uint32_t index, uint8_t value) const;
     virtual uint32_t getUpdatableSerializationBytes() const;
     virtual uint32_t getHllByteArrBytes() const;
     virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) final;
-    void mergeHll(const HllArray<A>& src);
     virtual AuxHashMap<A>* getAuxHashMap() const;
     // does *not* delete old map if overwriting

data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp CHANGED Viewed

@@ -34,6 +34,25 @@ HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize, allocator)
   this->hllByteArr_.resize(numBytes, 0);
 }
+template<typename A>
+Hll6Array<A>::Hll6Array(const HllArray<A>& other) :
+  HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_6, other.isStartFullSize(), other.getAllocator())
+{
+  const int numBytes = this->hll6ArrBytes(this->lgConfigK_);
+  this->hllByteArr_.resize(numBytes, 0);
+  this->oooFlag_ = other.isOutOfOrderFlag();
+  uint32_t num_zeros = 1 << this->lgConfigK_;
+  for (const auto coupon : other) { // all = false, so skip empty values
+    num_zeros--;
+    internalCouponUpdate(coupon); // updates KxQ registers
+  }
+  this->numAtCurMin_ = num_zeros;
+  this->hipAccum_ = other.getHipAccum();
+  this->rebuild_kxq_curmin_ = false;
+}
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
@@ -101,13 +120,6 @@ void Hll6Array<A>::internalCouponUpdate(uint32_t coupon) {
   }
 }
-template<typename A>
-void Hll6Array<A>::mergeHll(const HllArray<A>& src) {
-  for (const auto coupon: src) {
-    internalCouponUpdate(coupon);
-  }
-}
 }
 #endif // _HLL6ARRAY_INTERNAL_HPP_

data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp CHANGED Viewed

@@ -31,6 +31,7 @@ template<typename A>
 class Hll6Array final : public HllArray<A> {
   public:
     Hll6Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
+    explicit Hll6Array(const HllArray<A>& that);
     virtual ~Hll6Array() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
@@ -41,7 +42,6 @@ class Hll6Array final : public HllArray<A> {
     inline void putSlot(uint32_t slotNo, uint8_t value);
     virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) final;
-    void mergeHll(const HllArray<A>& src);
     virtual uint32_t getHllByteArrBytes() const;

data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp CHANGED Viewed

@@ -32,6 +32,25 @@ HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize, allocator)
   this->hllByteArr_.resize(numBytes, 0);
 }
+template<typename A>
+Hll8Array<A>::Hll8Array(const HllArray<A>& other):
+  HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_8, other.isStartFullSize(), other.getAllocator())
+{
+  const int numBytes = this->hll8ArrBytes(this->lgConfigK_);
+  this->hllByteArr_.resize(numBytes, 0);
+  this->oooFlag_ = other.isOutOfOrderFlag();
+  uint32_t num_zeros = 1 << this->lgConfigK_;
+  for (const auto coupon : other) { // all = false, so skip empty values
+    num_zeros--;
+    internalCouponUpdate(coupon); // updates KxQ registers
+  }
+  this->numAtCurMin_ = num_zeros;
+  this->hipAccum_ = other.getHipAccum();
+  this->rebuild_kxq_curmin_ = false;
+}
 template<typename A>
 std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
   return [](HllSketchImpl<A>* ptr) {
@@ -77,13 +96,11 @@ void Hll8Array<A>::internalCouponUpdate(uint32_t coupon) {
   const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
   const uint8_t newVal = HllUtil<A>::getValue(coupon);
-  const uint8_t curVal = getSlot(slotNo);
+  const uint8_t curVal = this->hllByteArr_[slotNo];
   if (newVal > curVal) {
-    putSlot(slotNo, newVal);
+    this->hllByteArr_[slotNo] = newVal;
     this->hipAndKxQIncrementalUpdate(curVal, newVal);
-    if (curVal == 0) {
-      this->numAtCurMin_--; // interpret numAtCurMin as num zeros
-    }
+    this->numAtCurMin_ -= curVal == 0; // interpret numAtCurMin as num zeros
   }
 }
@@ -97,49 +114,88 @@ void Hll8Array<A>::mergeList(const CouponList<A>& src) {
 template<typename A>
 void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
   // at this point src_k >= dst_k
-  const uint32_t src_k = 1 << src.getLgConfigK();
-  const uint32_t dst_mask = (1 << this->getLgConfigK()) - 1;
-  // duplication below is to avoid a virtual method call in a loop
-  if (src.getTgtHllType() == target_hll_type::HLL_8) {
-    for (uint32_t i = 0; i < src_k; i++) {
-      const uint8_t new_v = static_cast<const Hll8Array<A>&>(src).getSlot(i);
-      const uint32_t j = i & dst_mask;
-      const uint8_t old_v = this->hllByteArr_[j];
-      if (new_v > old_v) {
-        this->hllByteArr_[j] = new_v;
-        this->hipAndKxQIncrementalUpdate(old_v, new_v);
-        if (old_v == 0) {
-          this->numAtCurMin_--;
-        }
+  // we can optimize further when the k values are equal
+  if (this->getLgConfigK() == src.getLgConfigK()) {
+    if (src.getTgtHllType() == target_hll_type::HLL_8) {
+      uint32_t i = 0;
+      for (const auto value: src.getHllArray()) {
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
+        ++i;
       }
-    }
-  } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
-    for (uint32_t i = 0; i < src_k; i++) {
-      const uint8_t new_v = static_cast<const Hll6Array<A>&>(src).getSlot(i);
-      const uint32_t j = i & dst_mask;
-      const uint8_t old_v = this->hllByteArr_[j];
-      if (new_v > old_v) {
-        this->hllByteArr_[j] = new_v;
-        this->hipAndKxQIncrementalUpdate(old_v, new_v);
-        if (old_v == 0) {
-          this->numAtCurMin_--;
-        }
+    } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
+      const uint32_t src_k = 1 << src.getLgConfigK();
+      uint32_t i = 0;
+      const uint8_t* ptr = src.getHllArray().data();
+      while (i < src_k) {
+        uint8_t value = *ptr & 0x3f;
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
+        ++i;
+        value = *ptr++ >> 6;
+        value |= (*ptr & 0x0f) << 2;
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
+        ++i;
+        value = *ptr++ >> 4;
+        value |= (*ptr & 3) << 4;
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
+        ++i;
+        value = *ptr++ >> 2;
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
+        ++i;
+      }
+    } else { // HLL_4
+      const auto& src4 = static_cast<const Hll4Array<A>&>(src);
+      uint32_t i = 0;
+      for (const auto byte: src.getHllArray()) {
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], src4.adjustRawValue(i, byte & hll_constants::loNibbleMask));
+        ++i;
+        this->hllByteArr_[i] = std::max(this->hllByteArr_[i], src4.adjustRawValue(i, byte >> 4));
+        ++i;
       }
     }
-  } else { // HLL_4
-    for (uint32_t i = 0; i < src_k; i++) {
-      const uint8_t new_v = static_cast<const Hll4Array<A>&>(src).get_value(i);
-      const uint32_t j = i & dst_mask;
-      const uint8_t old_v = this->hllByteArr_[j];
-      if (new_v > old_v) {
-        this->hllByteArr_[j] = new_v;
-        this->hipAndKxQIncrementalUpdate(old_v, new_v);
-        if (old_v == 0) {
-          this->numAtCurMin_--;
-        }
+  } else {
+    // src_k > dst_k
+    const uint32_t dst_mask = (1 << this->getLgConfigK()) - 1;
+    // special treatment below to optimize performance
+    if (src.getTgtHllType() == target_hll_type::HLL_8) {
+      uint32_t i = 0;
+      for (const auto value: src.getHllArray()) {
+        processValue(i++, dst_mask, value);
+      }
+    } else if (src.getTgtHllType() == target_hll_type::HLL_6) {
+      const uint32_t src_k = 1 << src.getLgConfigK();
+      uint32_t i = 0;
+      const uint8_t* ptr = src.getHllArray().data();
+      while (i < src_k) {
+        uint8_t value = *ptr & 0x3f;
+        processValue(i++, dst_mask, value);
+        value = *ptr++ >> 6;
+        value |= (*ptr & 0x0f) << 2;
+        processValue(i++, dst_mask, value);
+        value = *ptr++ >> 4;
+        value |= (*ptr & 3) << 4;
+        processValue(i++, dst_mask, value);
+        value = *ptr++ >> 2;
+        processValue(i++, dst_mask, value);
+      }
+    } else { // HLL_4
+      const auto& src4 = static_cast<const Hll4Array<A>&>(src);
+      uint32_t i = 0;
+      for (const auto byte: src.getHllArray()) {
+        processValue(i, dst_mask, src4.adjustRawValue(i, byte & hll_constants::loNibbleMask));
+        ++i;
+        processValue(i, dst_mask, src4.adjustRawValue(i, byte >> 4));
+        ++i;
       }
     }
   }
+  this->setRebuildKxqCurminFlag(true);
+}
+template<typename A>
+void Hll8Array<A>::processValue(uint32_t slot, uint32_t mask, uint8_t new_val) {
+  const size_t index = slot & mask;
+  this->hllByteArr_[index] = std::max(this->hllByteArr_[index], new_val);
 }
 }

data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp CHANGED Viewed

@@ -31,6 +31,7 @@ template<typename A>
 class Hll8Array final : public HllArray<A> {
   public:
     Hll8Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
+    explicit Hll8Array(const HllArray<A>& that);
     virtual ~Hll8Array() = default;
     virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
@@ -48,6 +49,7 @@ class Hll8Array final : public HllArray<A> {
   private:
     inline void internalCouponUpdate(uint32_t coupon);
+    inline void processValue(uint32_t slot, uint32_t mask, uint8_t new_val);
 };
 }