RubyGems - datasketches - Versions diffs - 0.3.1 → 0.3.2 - Mend

datasketches 0.3.1 → 0.3.2

Files changed (113) hide show

data/vendor/datasketches-cpp/python/tests/kll_test.py CHANGED Viewed

@@ -16,11 +16,12 @@
 # under the License.
 import unittest
-from datasketches import kll_ints_sketch, kll_floats_sketch, kll_doubles_sketch, ks_test
+from datasketches import kll_ints_sketch, kll_floats_sketch, kll_doubles_sketch
+from datasketches import kll_items_sketch, ks_test, PyStringsSerDe
 import numpy as np
 class KllTest(unittest.TestCase):
-    def test_kll_example(self):
+    def test_kll_floats_example(self):
       k = 160
       n = 2 ** 20
@@ -61,12 +62,14 @@ class KllTest(unittest.TestCase):
       self.assertLess(kll.get_num_retained(), n)
       # merging itself will double the number of items the sketch has seen
-      kll.merge(kll)
+      # but need to do that with a copy
+      kll_copy = kll_floats_sketch(kll)
+      kll.merge(kll_copy)
       self.assertEqual(kll.get_n(), 2*n)
       # we can then serialize and reconstruct the sketch
       kll_bytes = kll.serialize()
-      new_kll = kll.deserialize(kll_bytes)
+      new_kll = kll_floats_sketch.deserialize(kll_bytes)
       self.assertEqual(kll.get_num_retained(), new_kll.get_num_retained())
       self.assertEqual(kll.get_min_value(), new_kll.get_min_value())
       self.assertEqual(kll.get_max_value(), new_kll.get_max_value())
@@ -78,6 +81,12 @@ class KllTest(unittest.TestCase):
       # they come from the same distribution (since they do)
       self.assertFalse(ks_test(kll, new_kll, 0.001))
+      total_weight = 0
+      for tuple in kll:
+        item = tuple[0]
+        weight = tuple[1]
+        total_weight = total_weight + weight
+      self.assertEqual(total_weight, kll.get_n())
     def test_kll_ints_sketch(self):
         k = 100
@@ -108,8 +117,9 @@ class KllTest(unittest.TestCase):
         self.assertEqual(kll.get_rank(round(n/2)), 0.5)
-        # merge self
-        kll.merge(kll)
+        # merge copy of self
+        kll_copy = kll_ints_sketch(kll)
+        kll.merge(kll_copy)
         self.assertEqual(kll.get_n(), 2 * n)
         sk_bytes = kll.serialize()
@@ -121,5 +131,29 @@ class KllTest(unittest.TestCase):
       kll = kll_doubles_sketch(k)
       self.assertTrue(kll.is_empty())
+    def test_kll_items_sketch(self):
+      # most functionality has been tested, but we need to ensure objects and sorting work
+      # as well as serialization
+      k = 100
+      n = 2 ** 16
+      # create a sketch and inject enough points to force compaction
+      kll = kll_items_sketch(k)
+      for i in range(0, n):
+        kll.update(str(i))
+      kll_copy = kll_items_sketch(kll)
+      kll.merge(kll_copy)
+      self.assertEqual(kll.get_n(), 2 * n)
+      kll_bytes = kll.serialize(PyStringsSerDe())
+      new_kll = kll_items_sketch.deserialize(kll_bytes, PyStringsSerDe())
+      self.assertEqual(kll.get_num_retained(), new_kll.get_num_retained())
+      self.assertEqual(kll.get_min_value(), new_kll.get_min_value())
+      self.assertEqual(kll.get_max_value(), new_kll.get_max_value())
+      self.assertEqual(kll.get_quantile(0.7), new_kll.get_quantile(0.7))
+      self.assertEqual(kll.get_rank(str(n/4)), new_kll.get_rank(str(n/4)))
 if __name__ == '__main__':
     unittest.main()

data/vendor/datasketches-cpp/python/tests/quantiles_test.py CHANGED Viewed

@@ -16,11 +16,12 @@
 # under the License.
 import unittest
-from datasketches import quantiles_ints_sketch, quantiles_floats_sketch, quantiles_doubles_sketch, ks_test
+from datasketches import quantiles_ints_sketch, quantiles_floats_sketch, quantiles_doubles_sketch
+from datasketches import quantiles_items_sketch, ks_test, PyStringsSerDe
 import numpy as np
 class QuantilesTest(unittest.TestCase):
-    def test_quantiles_example(self):
+    def test_quantiles_floats_example(self):
       k = 128
       n = 2 ** 20
@@ -61,12 +62,13 @@ class QuantilesTest(unittest.TestCase):
       self.assertLess(quantiles.get_num_retained(), n)
       # merging itself will double the number of items the sketch has seen
-      quantiles.merge(quantiles)
+      quantiles_copy = quantiles_floats_sketch(quantiles)
+      quantiles.merge(quantiles_copy)
       self.assertEqual(quantiles.get_n(), 2*n)
       # we can then serialize and reconstruct the sketch
       quantiles_bytes = quantiles.serialize()
-      new_quantiles = quantiles.deserialize(quantiles_bytes)
+      new_quantiles = quantiles_floats_sketch.deserialize(quantiles_bytes)
       self.assertEqual(quantiles.get_num_retained(), new_quantiles.get_num_retained())
       self.assertEqual(quantiles.get_min_value(), new_quantiles.get_min_value())
       self.assertEqual(quantiles.get_max_value(), new_quantiles.get_max_value())
@@ -80,6 +82,13 @@ class QuantilesTest(unittest.TestCase):
       unif_quantiles.update(np.random.uniform(10, 20, size=n-1))
       self.assertTrue(ks_test(quantiles, unif_quantiles, 0.001))
+      total_weight = 0
+      for tuple in quantiles:
+        item = tuple[0]
+        weight = tuple[1]
+        total_weight = total_weight + weight
+      self.assertEqual(total_weight, quantiles.get_n())
     def test_quantiles_ints_sketch(self):
         k = 128
         n = 10
@@ -110,7 +119,8 @@ class QuantilesTest(unittest.TestCase):
         self.assertEqual(quantiles.get_rank(round(n/2)), 0.5)
         # merge self
-        quantiles.merge(quantiles)
+        quantiles_copy = quantiles_ints_sketch(quantiles)
+        quantiles.merge(quantiles_copy)
         self.assertEqual(quantiles.get_n(), 2 * n)
         sk_bytes = quantiles.serialize()
@@ -122,5 +132,29 @@ class QuantilesTest(unittest.TestCase):
       quantiles = quantiles_doubles_sketch(k)
       self.assertTrue(quantiles.is_empty())
+    def test_quantiles_items_sketch(self):
+      # most functionality has been tested, but we need to ensure objects and sorting work
+      # as well as serialization
+      k = 128
+      n = 2 ** 16
+      # create a sketch and inject enough points to force compaction
+      quantiles = quantiles_items_sketch(k)
+      for i in range(0, n):
+        quantiles.update(str(i))
+      quantiles_copy = quantiles_items_sketch(quantiles)
+      quantiles.merge(quantiles_copy)
+      self.assertEqual(quantiles.get_n(), 2 * n)
+      quantiles_bytes = quantiles.serialize(PyStringsSerDe())
+      new_quantiles = quantiles_items_sketch.deserialize(quantiles_bytes, PyStringsSerDe())
+      self.assertEqual(quantiles.get_num_retained(), new_quantiles.get_num_retained())
+      self.assertEqual(quantiles.get_min_value(), new_quantiles.get_min_value())
+      self.assertEqual(quantiles.get_max_value(), new_quantiles.get_max_value())
+      self.assertEqual(quantiles.get_quantile(0.7), new_quantiles.get_quantile(0.7))
+      self.assertEqual(quantiles.get_rank(str(n/4)), new_quantiles.get_rank(str(n/4)))
 if __name__ == '__main__':
     unittest.main()

data/vendor/datasketches-cpp/python/tests/req_test.py CHANGED Viewed

@@ -16,7 +16,7 @@
 # under the License.
 import unittest
-from datasketches import req_ints_sketch, req_floats_sketch
+from datasketches import req_ints_sketch, req_floats_sketch, req_items_sketch, PyStringsSerDe
 import numpy as np
 class reqTest(unittest.TestCase):
@@ -67,18 +67,26 @@ class reqTest(unittest.TestCase):
       self.assertEqual(req.get_k(), k)
       # merging itself will double the number of items the sketch has seen
-      req.merge(req)
+      req_copy = req_floats_sketch(req)
+      req.merge(req_copy)
       self.assertEqual(req.get_n(), 2*n)
       # we can then serialize and reconstruct the sketch
       req_bytes = req.serialize()
-      new_req = req.deserialize(req_bytes)
+      new_req = req_floats_sketch.deserialize(req_bytes)
       self.assertEqual(req.get_num_retained(), new_req.get_num_retained())
       self.assertEqual(req.get_min_value(), new_req.get_min_value())
       self.assertEqual(req.get_max_value(), new_req.get_max_value())
       self.assertEqual(req.get_quantile(0.7), new_req.get_quantile(0.7))
       self.assertEqual(req.get_rank(0.0), new_req.get_rank(0.0))
+      total_weight = 0
+      for tuple in req:
+        item = tuple[0]
+        weight = tuple[1]
+        total_weight = total_weight + weight
+      self.assertEqual(total_weight, req.get_n())
     def test_req_ints_sketch(self):
         k = 100
         n = 10
@@ -109,18 +117,43 @@ class reqTest(unittest.TestCase):
         self.assertEqual(req.get_rank(round(n/2)), 0.5)
         # merge self
-        req.merge(req)
+        req_copy = req_ints_sketch(req)
+        req.merge(req_copy)
         self.assertEqual(req.get_n(), 2 * n)
         sk_bytes = req.serialize()
         self.assertTrue(isinstance(req_ints_sketch.deserialize(sk_bytes), req_ints_sketch))
     def test_req_floats_sketch(self):
-      # already tested ints and it's templatized, so just make sure it instantiates properly
+      # already tested floats with LRA so just check that HRA works
       k = 75
       req = req_floats_sketch(k, False) # low rank accuracy
       self.assertTrue(req.is_empty())
       self.assertFalse(req.is_hra())
+    def test_req_items_sketch(self):
+      # most functionality has been tested, but we need to ensure objects and sorting work
+      # as well as serialization
+      k = 100
+      n = 2 ** 16
+      # create a sketch and inject enough points to force compaction
+      req = req_items_sketch(k)
+      for i in range(0, n):
+        req.update(str(i))
+      req_copy = req_items_sketch(req)
+      req.merge(req_copy)
+      self.assertEqual(req.get_n(), 2 * n)
+      req_bytes = req.serialize(PyStringsSerDe())
+      new_req = req_items_sketch.deserialize(req_bytes, PyStringsSerDe())
+      self.assertEqual(req.get_num_retained(), new_req.get_num_retained())
+      self.assertEqual(req.get_min_value(), new_req.get_min_value())
+      self.assertEqual(req.get_max_value(), new_req.get_max_value())
+      self.assertEqual(req.get_quantile(0.7), new_req.get_quantile(0.7))
+      self.assertEqual(req.get_rank(str(n/4)), new_req.get_rank(str(n/4)))
 if __name__ == '__main__':
     unittest.main()

data/vendor/datasketches-cpp/python/tests/theta_test.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 import unittest
 from datasketches import theta_sketch, update_theta_sketch
@@ -24,11 +24,11 @@ from datasketches import theta_jaccard_similarity
 class ThetaTest(unittest.TestCase):
     def test_theta_basic_example(self):
-        k = 12      # 2^k = 4096 rows in the table
+        lgk = 12    # 2^k = 4096 rows in the table
         n = 1 << 18 # ~256k unique values
         # create a sketch and inject some values
-        sk = self.generate_theta_sketch(n, k)
+        sk = self.generate_theta_sketch(n, lgk)
         # we can check that the upper and lower bounds bracket the
         # estimate, without needing to know the exact value.
@@ -48,20 +48,26 @@ class ThetaTest(unittest.TestCase):
         self.assertFalse(sk.is_empty())
         self.assertEqual(sk.get_estimate(), new_sk.get_estimate())
+        count = 0
+        for hash in new_sk:
+          self.assertLess(hash, new_sk.get_theta64())
+          count = count + 1
+        self.assertEqual(count, new_sk.get_num_retained())
     def test_theta_set_operations(self):
-        k = 12      # 2^k = 4096 rows in the table
+        lgk = 12    # 2^k = 4096 rows in the table
         n = 1 << 18 # ~256k unique values
         # we'll have 1/4 of the values overlap
         offset = int(3 * n / 4) # it's a float w/o cast
         # create a couple sketches and inject some values
-        sk1 = self.generate_theta_sketch(n, k)
-        sk2 = self.generate_theta_sketch(n, k, offset)
+        sk1 = self.generate_theta_sketch(n, lgk)
+        sk2 = self.generate_theta_sketch(n, lgk, offset)
         # UNIONS
         # create a union object
-        union = theta_union(k)
+        union = theta_union(lgk)
         union.update(sk1)
         union.update(sk2)
@@ -77,7 +83,6 @@ class ThetaTest(unittest.TestCase):
         self.assertLessEqual(result.get_lower_bound(1), 7 * n / 4)
         self.assertGreaterEqual(result.get_upper_bound(1), 7 * n / 4)
         # INTERSECTIONS
         # create an intersection object
         intersect = theta_intersection() # no lg_k
@@ -96,7 +101,6 @@ class ThetaTest(unittest.TestCase):
         self.assertLessEqual(result.get_lower_bound(1), n / 4)
         self.assertGreaterEqual(result.get_upper_bound(1), n / 4)
         # A NOT B
         # create an a_not_b object
         anb = theta_a_not_b() # no lg_k
@@ -134,13 +138,11 @@ class ThetaTest(unittest.TestCase):
         self.assertTrue(theta_jaccard_similarity.similarity_test(sk1, result, 0.7))
-    def generate_theta_sketch(self, n, k, offset=0):
-      sk = update_theta_sketch(k)
+    def generate_theta_sketch(self, n, lgk, offset=0):
+      sk = update_theta_sketch(lgk)
       for i in range(0, n):
         sk.update(i + offset)
       return sk
 if __name__ == '__main__':
     unittest.main()

data/vendor/datasketches-cpp/python/tests/tuple_test.py ADDED Viewed

@@ -0,0 +1,206 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import unittest
+from datasketches import update_tuple_sketch
+from datasketches import compact_tuple_sketch, tuple_union
+from datasketches import tuple_intersection, tuple_a_not_b
+from datasketches import tuple_jaccard_similarity
+from datasketches import tuple_jaccard_similarity, PyIntsSerDe
+from datasketches import AccumulatorPolicy, MaxIntPolicy, MinIntPolicy
+from datasketches import update_theta_sketch
+class TupleTest(unittest.TestCase):
+    def test_tuple_basic_example(self):
+        lgk = 12    # 2^k = 4096 rows in the table
+        n = 1 << 18 # ~256k unique values
+        # create a sketch and inject some values -- summary is 2 so we can sum them
+        # and know the reuslt
+        sk = self.generate_tuple_sketch(AccumulatorPolicy(), n, lgk, value=2)
+        # we can check that the upper and lower bounds bracket the
+        # estimate, without needing to know the exact value.
+        self.assertLessEqual(sk.get_lower_bound(1), sk.get_estimate())
+        self.assertGreaterEqual(sk.get_upper_bound(1), sk.get_estimate())
+        # because this sketch is deterministically generated, we can
+        # also compare against the exact value
+        self.assertLessEqual(sk.get_lower_bound(1), n)
+        self.assertGreaterEqual(sk.get_upper_bound(1), n)
+        # compact and serialize for storage, then reconstruct
+        sk_bytes = sk.compact().serialize(PyIntsSerDe())
+        new_sk = compact_tuple_sketch.deserialize(sk_bytes, serde=PyIntsSerDe())
+        # estimate remains unchanged
+        self.assertFalse(sk.is_empty())
+        self.assertEqual(sk.get_estimate(), new_sk.get_estimate())
+        # we can also iterate over the sketch entries
+        # the iterator provides a (hashkey, summary) pair where the
+        # first value is the raw hash value and the second the summary
+        count = 0
+        cumSum = 0
+        for pair in new_sk:
+          self.assertLess(pair[0], new_sk.get_theta64())
+          count += 1
+          cumSum += pair[1]
+        self.assertEqual(count, new_sk.get_num_retained())
+        self.assertEqual(cumSum, 2 * new_sk.get_num_retained())
+        # we can even create a tuple sketch from an existing theta sketch
+        # as long as we provide a summary to use
+        theta_sk = update_theta_sketch(lgk)
+        for i in range(n, 2*n):
+          theta_sk.update(i)
+        cts = compact_tuple_sketch(theta_sk, 5)
+        cumSum = 0
+        for pair in cts:
+          cumSum += pair[1]
+        self.assertEqual(cumSum, 5 * cts.get_num_retained())
+    def test_tuple_set_operations(self):
+        lgk = 12    # 2^k = 4096 rows in the table
+        n = 1 << 18 # ~256k unique values
+        # we'll have 1/4 of the values overlap
+        offset = int(3 * n / 4) # it's a float w/o cast
+        # create a couple sketches and inject some values, with different summaries
+        sk1 = self.generate_tuple_sketch(AccumulatorPolicy(), n, lgk, value=5)
+        sk2 = self.generate_tuple_sketch(AccumulatorPolicy(), n, lgk, value=7, offset=offset)
+        # UNIONS
+        # create a union object
+        union = tuple_union(MaxIntPolicy(), lgk)
+        union.update(sk1)
+        union.update(sk2)
+        # getting result from union returns a compact_theta_sketch
+        # compact theta sketches can be used in additional unions
+        # or set operations but cannot accept further item updates
+        result = union.get_result()
+        self.assertTrue(isinstance(result, compact_tuple_sketch))
+        # since our process here is deterministic, we have
+        # checked and know the exact answer is within one
+        # standard deviation of the estimate
+        self.assertLessEqual(result.get_lower_bound(1), 7 * n / 4)
+        self.assertGreaterEqual(result.get_upper_bound(1), 7 * n / 4)
+        # we unioned two equal-sized sketches with overlap and used
+        # the max value as the resulting summary, meaning we should
+        # have more summaries with value 7 than value 5 in the result
+        count5 = 0
+        count7 = 0
+        for pair in result:
+          if pair[1] == 5:
+            count5 += 1
+          elif pair[1] == 7:
+            count7 += 1
+          else:
+            self.fail()
+        self.assertLess(count5, count7)
+        # INTERSECTIONS
+        # create an intersection object
+        intersect = tuple_intersection(MinIntPolicy()) # no lg_k
+        intersect.update(sk1)
+        intersect.update(sk2)
+        # has_result() indicates the intersection has been used,
+        # although the result may be the empty set
+        self.assertTrue(intersect.has_result())
+        # as with unions, the result is a compact sketch
+        result = intersect.get_result()
+        self.assertTrue(isinstance(result, compact_tuple_sketch))
+        # we know the sets overlap by 1/4
+        self.assertLessEqual(result.get_lower_bound(1), n / 4)
+        self.assertGreaterEqual(result.get_upper_bound(1), n / 4)
+        # in this example, we intersected the sketches and took the
+        # min value as the resulting summary, so all summaries
+        # must be exactly equal to that value
+        count5 = 0
+        for pair in result:
+          if pair[1] == 5:
+            count5 += 1
+          else:
+            self.fail()
+        self.assertEqual(count5, result.get_num_retained())
+        # A NOT B
+        # create an a_not_b object
+        anb = tuple_a_not_b() # no lg_k or policy
+        result = anb.compute(sk1, sk2)
+        # as with unions, the result is a compact sketch
+        self.assertTrue(isinstance(result, compact_tuple_sketch))
+        # we know the sets overlap by 1/4, so the remainder is 3/4
+        self.assertLessEqual(result.get_lower_bound(1), 3 * n / 4)
+        self.assertGreaterEqual(result.get_upper_bound(1), 3 * n / 4)
+        # here, we have only values with a summary of 5 as any keys that
+        # existed in both sketches were removed
+        count5 = 0
+        for pair in result:
+          if pair[1] == 5:
+            count5 += 1
+          else:
+            self.fail()
+        self.assertEqual(count5, result.get_num_retained())
+        # JACCARD SIMILARITY
+        # Jaccard Similarity measure returns (lower_bound, estimate, upper_bound)
+        # and does not examine summaries, even for (dis)similarity tests.
+        jac = tuple_jaccard_similarity.jaccard(sk1, sk2)
+        # we can check that results are in the expected order
+        self.assertLess(jac[0], jac[1])
+        self.assertLess(jac[1], jac[2])
+        # checks for sketch equivalence
+        self.assertTrue(tuple_jaccard_similarity.exactly_equal(sk1, sk1))
+        self.assertFalse(tuple_jaccard_similarity.exactly_equal(sk1, sk2))
+        # we can apply a check for similarity or dissimilarity at a
+        # given threshold, at 97.7% confidence.
+        # check that the Jaccard Index is at most (upper bound) 0.2.
+        # exact result would be 1/7
+        self.assertTrue(tuple_jaccard_similarity.dissimilarity_test(sk1, sk2, 0.2))
+        # check that the Jaccard Index is at least (lower bound) 0.7
+        # exact result would be 3/4, using result from A NOT B test
+        self.assertTrue(tuple_jaccard_similarity.similarity_test(sk1, result, 0.7))
+    # Generates a basic tuple sketch with a fixed value for each update
+    def generate_tuple_sketch(self, policy, n, lgk, value, offset=0):
+      sk = update_tuple_sketch(policy, lgk)
+      for i in range(0, n):
+        sk.update(i + offset, value)
+      return sk
+if __name__ == '__main__':
+    unittest.main()

data/vendor/datasketches-cpp/python/tests/vo_test.py CHANGED Viewed

@@ -45,6 +45,13 @@ class VoTest(unittest.TestCase):
     items = vo.get_samples()
     self.assertEqual(len(items), k)
+    count = 0
+    for tuple in vo:
+      sample = tuple[0]
+      weight = tuple[1]
+      count = count + 1
+    self.assertEqual(count, vo.num_samples)
     # we can also apply a predicate to the sketch to get an estimate
     # (with optimally minimal variance) of the subset sum of items
     # matching that predicate among the entire population

data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp CHANGED Viewed

@@ -580,15 +580,20 @@ private:
 template<typename T, typename C, typename A>
-class quantiles_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
+class quantiles_sketch<T, C, A>::const_iterator {
 public:
+  using iterator_category = std::input_iterator_tag;
   using value_type = std::pair<const T&, const uint64_t>;
+  using difference_type = void;
+  using pointer = const return_value_holder<value_type>;
+  using reference = const value_type;
   const_iterator& operator++();
   const_iterator& operator++(int);
   bool operator==(const const_iterator& other) const;
   bool operator!=(const const_iterator& other) const;
-  const value_type operator*() const;
-  const return_value_holder<value_type> operator->() const;
+  reference operator*() const;
+  pointer operator->() const;
 private:
   friend class quantiles_sketch<T, C, A>;
   using Level = std::vector<T, A>;

data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp CHANGED Viewed

@@ -645,12 +645,12 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
     uint8_t level = 0;
     os << " BB:" << std::endl;
     for (const T& item : base_buffer_) {
-      os << "    " << std::to_string(item) << std::endl;
+      os << "    " << item << std::endl;
     }
     for (uint8_t i = 0; i < levels_.size(); ++i) {
       os << " level " << static_cast<unsigned int>(level) << ":" << std::endl;
       for (const T& item : levels_[i]) {
-        os << "   " << std::to_string(item) << std::endl;
+        os << "   " << item << std::endl;
       }
     }
     os << "### End sketch data" << std::endl;
@@ -1354,12 +1354,12 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
 }
 template<typename T, typename C, typename A>
-auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
+auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> reference {
   return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
 }
 template<typename T, typename C, typename A>
-auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
+auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> pointer {
   return **this;
 }

data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp CHANGED Viewed

@@ -260,7 +260,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
         REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
       }
       subtotal_pmf += pmf[i];
-      if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
+      if (std::abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
         std::cerr << "CDF vs PMF for value " << i << std::endl;
         REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
       }

data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp CHANGED Viewed

@@ -28,8 +28,6 @@
 #include "conditional_forward.hpp"
 #include "common_defs.hpp"
-#include <iomanip>
 namespace datasketches {
 template<typename T, typename C, typename A>

data/vendor/datasketches-cpp/req/include/req_sketch.hpp CHANGED Viewed

@@ -399,15 +399,20 @@ private:
 };
 template<typename T, typename C, typename A>
-class req_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
+class req_sketch<T, C, A>::const_iterator {
 public:
+  using iterator_category = std::input_iterator_tag;
   using value_type = std::pair<const T&, const uint64_t>;
+  using difference_type = void;
+  using pointer = const return_value_holder<value_type>;
+  using reference = const value_type;
   const_iterator& operator++();
   const_iterator& operator++(int);
   bool operator==(const const_iterator& other) const;
   bool operator!=(const const_iterator& other) const;
-  const value_type operator*() const;
-  const return_value_holder<value_type> operator->() const;
+  reference operator*() const;
+  pointer operator->() const;
 private:
   using LevelsIterator = typename std::vector<Compactor, AllocCompactor>::const_iterator;
   LevelsIterator levels_it_;

data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp CHANGED Viewed

@@ -848,12 +848,12 @@ bool req_sketch<T, C, A>::const_iterator::operator!=(const const_iterator& other
 }
 template<typename T, typename C, typename A>
-auto req_sketch<T, C, A>::const_iterator::operator*() const -> const value_type {
+auto req_sketch<T, C, A>::const_iterator::operator*() const -> reference {
   return value_type(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
 }
 template<typename T, typename C, typename A>
-auto req_sketch<T, C, A>::const_iterator::operator->() const -> const return_value_holder<value_type> {
+auto req_sketch<T, C, A>::const_iterator::operator->() const -> pointer {
   return **this;
 }