RubyGems - datasketches - Versions diffs - 0.1.2 → 0.2.3 - Mend

datasketches 0.1.2 → 0.2.3

Files changed (205) hide show

data/vendor/datasketches-cpp/CMakeLists.txt CHANGED Viewed

@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 #set(CMAKE_VERBOSE_MAKEFILE ON)
 set(CMAKE_MACOSX_RPATH ON)
+set(CMAKE_CXX_STANDARD 11)
 # enable compiler warnings globally
 # derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
 # and https://arne-mertz.de/2018/07/cmake-properties-options/
@@ -70,6 +72,13 @@ if(COVERAGE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
   add_link_options(--coverage)
 endif()
+option(SANITIZE "Run sanitization checks (g++/clang only)" OFF)
+if(SANITIZE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
+  add_compile_options(-fsanitize=${SANITIZE})
+  add_link_options(-fsanitize=${SANITIZE})
+endif()
 # set default build type to Release
 # Derived from: https://blog.kitware.com/cmake-and-the-default-build-type/
 set(default_build_type "Release")
@@ -96,6 +105,7 @@ add_subdirectory(fi)
 add_subdirectory(theta)
 add_subdirectory(sampling)
 add_subdirectory(tuple)
+add_subdirectory(req)
 if (WITH_PYTHON)
   add_subdirectory(python)

data/vendor/datasketches-cpp/LICENSE CHANGED Viewed

@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
     ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     DEALINGS IN THE SOFTWARE.
     -------------------------------------------------------------
-    Code Locations
+    Code Locations:
       * https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
     that is adapted from the above.
+    =============================================================
+    BSD License
+    =============================================================
+    Original source code:
+       https://github.com/pybind/pybind11/blob/master/LICENSE
+    Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+    1. Redistributions of source code must retain the above copyright notice, this
+       list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright notice,
+       this list of conditions and the following disclaimer in the documentation
+       and/or other materials provided with the distribution.
+    3. Neither the name of the copyright holder nor the names of its contributors
+       may be used to endorse or promote products derived from this software
+       without specific prior written permission.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+    ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+    OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+    -------------------------------------------------------------
+    Code Locations:
+    Found only in the convenience binaries distributed from PyPI, which rely
+    on pybind11 code during compilation.
     =============================================================
     Public Domain
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
        https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
        Placed in the Public Domain by Austin Appleby
-    Code Locations
+    Code Locations:
       common/include/MurmurHash3.h
     that is adapted from the above.
     -------------------------------------------------------------
@@ -305,7 +342,7 @@ APPENDIX B: Additional licenses relevant to this product.
       * https://graphics.stanford.edu/~seander/bithacks.html
       * Placed in the Public Domain by Sean Eron Anderson
-    Code Locations
+    Code Locations:
       * common/include/ceiling_power_of_2.hpp
     that is adapted from the above.

data/vendor/datasketches-cpp/NOTICE CHANGED Viewed

@@ -1,5 +1,5 @@
 Apache DataSketches-cpp
-Copyright 2020 The Apache Software Foundation
+Copyright 2020-2021 The Apache Software Foundation
 Copyright 2015-2018 Yahoo
 Copyright 2019 Verizon Media

data/vendor/datasketches-cpp/README.md CHANGED Viewed

@@ -1,18 +1,18 @@
-# DataSketches Core C++ Library Component
-This is the core C++ component of the DataSketches library.  It contains all of the key sketching algorithms that are in the Java component and can be accessed directly from user applications.
+# Apache DataSketches Core C++ Library Component
+This is the core C++ component of the Apache DataSketches library.  It contains all of the key sketching algorithms that are in the Java component and can be accessed directly from user applications.
 This component is also a dependency of other components of the library that create adaptors for target systems, such as PostgreSQL.
 Note that we have a parallel core component for Java implementations of the same sketch algorithms,
 [datasketches-java](https://github.com/apache/datasketches-java).
-Please visit the main [DataSketches website](https://datasketches.apache.org) for more information.
+Please visit the main [Apache DataSketches website](https://datasketches.apache.org) for more information.
 If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us.
 ---
-This code requires C++11. It was tested with GCC 4.8.5 (standard in RedHat at the time of this writing), GCC 8.2.0 and Apple LLVM version 10.0.1 (clang-1001.0.46.4)
+This code requires C++11.
 This includes Python bindings. For the Python interface, see the README notes in [the python subdirectory](https://github.com/apache/datasketches-cpp/tree/master/python).

data/vendor/datasketches-cpp/common/include/MurmurHash3.h CHANGED Viewed

@@ -3,6 +3,7 @@
 //  * Changed input seed in MurmurHash3_x64_128 to uint64_t
 //  * Define and use HashState reference to return result
 //  * Made entire hash function defined inline
+//  * Added compute_seed_hash
 //-----------------------------------------------------------------------------
 // MurmurHash3 was written by Austin Appleby, and is placed in the public
 // domain. The author hereby disclaims copyright to this source code.
@@ -15,6 +16,8 @@
 #ifndef _MURMURHASH3_H_
 #define _MURMURHASH3_H_
+#include <cstring>
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
@@ -75,9 +78,11 @@ typedef struct {
 // Block read - if your platform needs to do endian-swapping or can only
 // handle aligned reads, do the conversion here
-FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
+FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
 {
-  return p[i];
+  uint64_t res;
+  memcpy(&res, p + i, sizeof(res));
+  return res;
 }
 //-----------------------------------------------------------------------------
@@ -94,7 +99,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
   return k;
 }
-FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
+FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
   static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
   static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
@@ -105,13 +110,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
   // Number of full 128-bit blocks of 16 bytes.
   // Possible exclusion of a remainder of up to 15 bytes.
-  const int nblocks = lenBytes >> 4; // bytes / 16
+  const size_t nblocks = lenBytes >> 4; // bytes / 16
   // Process the 128-bit blocks (the body) into the hash
   const uint64_t* blocks = (const uint64_t*)(data);
-  for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
-    uint64_t k1 = getblock64(blocks,i*2+0);
-    uint64_t k2 = getblock64(blocks,i*2+1);
+  for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
+    uint64_t k1 = getblock64(blocks, i * 2 + 0);
+    uint64_t k2 = getblock64(blocks, i * 2 + 1);
     k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
     out.h1 = ROTL64(out.h1,27);
@@ -170,4 +175,10 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
 //-----------------------------------------------------------------------------
+FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
+  HashState hashes;
+  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
+  return static_cast<uint16_t>(hashes.h1 & 0xffff);
+}
 #endif // _MURMURHASH3_H_

data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp CHANGED Viewed

@@ -381,7 +381,7 @@ private:
   // The following computes an approximation to the lower bound of a Frequentist
   // confidence interval based on the tails of the Binomial distribution.
   static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
-    if (theta == 1) return num_samples;
+    if (theta == 1) return static_cast<double>(num_samples);
     if (num_samples == 0) return 0;
     if (num_samples == 1) {
       const double delta = delta_of_num_std_devs[num_std_devs];
@@ -395,24 +395,24 @@ private:
     }
     // at this point we know 2 <= num_samples <= 120
     if (theta > (1 - 1e-5)) { // empirically-determined threshold
-      return num_samples;
+      return static_cast<double>(num_samples);
     }
     if (theta < (num_samples / 360.0)) { // empirically-determined threshold
       // here we use the Gaussian approximation, but with a modified num_std_devs
-      const unsigned index = 3 * num_samples + (num_std_devs - 1);
+      const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
       const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
       return raw_lb - 0.5; // fake round down
     }
     // This is the most difficult range to approximate; we will compute an "exact" LB.
     // We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
     const double delta = delta_of_num_std_devs[num_std_devs];
-    return special_n_star(num_samples, theta, delta); // no need to round
+    return static_cast<double>(special_n_star(num_samples, theta, delta)); // no need to round
   }
   // The following computes an approximation to the upper bound of a Frequentist
   // confidence interval based on the tails of the Binomial distribution.
   static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
-    if (theta == 1) return num_samples;
+    if (theta == 1) return static_cast<double>(num_samples);
     if (num_samples == 0) {
       const double delta = delta_of_num_std_devs[num_std_devs];
       const double raw_ub = std::log(delta) / std::log(1 - theta);
@@ -425,18 +425,18 @@ private:
     }
     // at this point we know 2 <= num_samples <= 120
     if (theta > (1 - 1e-5)) { // empirically-determined threshold
-      return num_samples + 1;
+      return static_cast<double>(num_samples + 1);
     }
     if (theta < (num_samples / 360.0)) { // empirically-determined threshold
       // here we use the Gaussian approximation, but with a modified num_std_devs
-      const unsigned index = 3 * num_samples + (num_std_devs - 1);
+      const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
       const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
       return raw_ub + 0.5; // fake round up
     }
     // This is the most difficult range to approximate; we will compute an "exact" UB.
     // We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
     const double delta = delta_of_num_std_devs[num_std_devs];
-    return special_n_prime_f(num_samples, theta, delta); // no need to round
+    return static_cast<double>(special_n_prime_f(num_samples, theta, delta)); // no need to round
   }
   static void check_theta(double theta) {

data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp CHANGED Viewed

@@ -110,14 +110,14 @@ public:
    * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
     else if (k == 0) { return 0.0; }
     else if (k == 1) { return (exact_lower_bound_on_p_k_eq_1(n, delta_of_num_stdevs(num_std_devs))); }
     else if (k == n) { return (exact_lower_bound_on_p_k_eq_n(n, delta_of_num_stdevs(num_std_devs))); }
     else {
-      double x = abramowitz_stegun_formula_26p5p22((n - k) + 1, k, (-1.0 * num_std_devs));
+      double x = abramowitz_stegun_formula_26p5p22((n - k) + 1.0, static_cast<double>(k), (-1.0 * num_std_devs));
       return (1.0 - x); // which is p
     }
   }
@@ -145,18 +145,18 @@ public:
    * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
    * unknown success probability.
    */
-  static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
+  static inline double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
     check_inputs(n, k);
     if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
     else if (k == n) { return 1.0; }
     else if (k == (n - 1)) {
-      return (exactU_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
+      return (exact_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
     }
     else if (k == 0) {
       return (exact_upper_bound_on_p_k_eq_zero(n, delta_of_num_stdevs(num_std_devs)));
     }
     else {
-      double x = abramowitz_stegun_formula_26p5p22(n - k, k + 1, num_std_devs);
+      double x = abramowitz_stegun_formula_26p5p22(static_cast<double>(n - k), k + 1.0, num_std_devs);
       return (1.0 - x); // which is p
     }
   }
@@ -167,7 +167,7 @@ public:
    * @param k is the number of successes. Must be non-negative, and cannot exceed n.
    * @return the estimate of the unknown binomial proportion.
    */
-  static inline double estimate_unknown_p(long n, long k) {
+  static inline double estimate_unknown_p(uint64_t n, uint64_t k) {
     check_inputs(n, k);
     if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
     else { return ((double) k / (double) n); }
@@ -193,9 +193,7 @@ public:
   }
 private:
-  static inline void check_inputs(long n, long k) {
-    if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
-    if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
+  static inline void check_inputs(uint64_t n, uint64_t k) {
     if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
   }
@@ -251,8 +249,7 @@ private:
   // and it is worth keeping it that way so that it will always be easy to verify
   // that the formula was typed in correctly.
-  static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
-      double yp) {
+  static inline double abramowitz_stegun_formula_26p5p22(double a, double b, double yp) {
     const double b2m1 = (2.0 * b) - 1.0;
     const double a2m1 = (2.0 * a) - 1.0;
     const double lambda = ((yp * yp) - 3.0) / 6.0;
@@ -268,19 +265,19 @@ private:
   // Formulas for some special cases.
-  static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
+  static inline double exact_upper_bound_on_p_k_eq_zero(uint64_t n, double delta) {
     return (1.0 - pow(delta, (1.0 / n)));
   }
-  static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_n(uint64_t n, double delta) {
     return (pow(delta, (1.0 / n)));
   }
-  static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
+  static inline double exact_lower_bound_on_p_k_eq_1(uint64_t n, double delta) {
     return (1.0 - pow((1.0 - delta), (1.0 / n)));
   }
-  static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
+  static inline double exact_upper_bound_on_p_k_eq_minusone(uint64_t n, double delta) {
     return (pow((1.0 - delta), (1.0 / n)));
   }

data/vendor/datasketches-cpp/common/include/common_defs.hpp CHANGED Viewed

@@ -23,11 +23,14 @@
 #include <cstdint>
 #include <string>
 #include <memory>
+#include <iostream>
 namespace datasketches {
 static const uint64_t DEFAULT_SEED = 9001;
+enum resize_factor { X1 = 0, X2, X4, X8 };
 template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
 template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
@@ -46,6 +49,29 @@ constexpr uint8_t lg_size_from_count(uint32_t n, double load_factor) {
   return log2(n) + ((n > static_cast<uint32_t>((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1);
 }
+// stream helpers to hide casts
+template<typename T>
+static inline T read(std::istream& is) {
+  T value;
+  is.read(reinterpret_cast<char*>(&value), sizeof(T));
+  return value;
+}
+template<typename T>
+static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
+  is.read(reinterpret_cast<char*>(ptr), size_bytes);
+}
+template<typename T>
+static inline void write(std::ostream& os, T& value) {
+  os.write(reinterpret_cast<const char*>(&value), sizeof(T));
+}
+template<typename T>
+static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
+  os.write(reinterpret_cast<const char*>(ptr), size_bytes);
+}
 } // namespace
 #endif // _COMMON_DEFS_HPP_

data/vendor/datasketches-cpp/common/include/conditional_forward.hpp CHANGED Viewed

@@ -38,29 +38,41 @@ fwd_type<T1, T2> conditional_forward(T2&& value) {
 // Forward container as iterators
 template<typename Container>
-auto forward_begin(Container&& c) ->
-typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.begin())>::type
+auto forward_begin(Container&& c) -> typename std::enable_if<
+  std::is_lvalue_reference<Container>::value ||
+  std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
+  decltype(c.begin())
+>::type
 {
   return c.begin();
 }
 template<typename Container>
-auto forward_begin(Container&& c) ->
-typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.begin()))>::type
+auto forward_begin(Container&& c) -> typename std::enable_if<
+  !std::is_lvalue_reference<Container>::value &&
+  !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
+  decltype(std::make_move_iterator(c.begin()))
+>::type
 {
   return std::make_move_iterator(c.begin());
 }
 template<typename Container>
-auto forward_end(Container&& c) ->
-typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.end())>::type
+auto forward_end(Container&& c) -> typename std::enable_if<
+  std::is_lvalue_reference<Container>::value ||
+  std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
+  decltype(c.end())
+>::type
 {
   return c.end();
 }
 template<typename Container>
-auto forward_end(Container&& c) ->
-typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.end()))>::type
+auto forward_end(Container&& c) -> typename std::enable_if<
+  !std::is_lvalue_reference<Container>::value &&
+  !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
+  decltype(std::make_move_iterator(c.end()))
+>::type
 {
   return std::make_move_iterator(c.end());
 }

data/vendor/datasketches-cpp/common/include/count_zeros.hpp CHANGED Viewed

@@ -94,7 +94,7 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
 static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
   for (int i = 0; i < 4; i++) {
     const int byte = input & 0xff;
-    if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
+    if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
     input >>= 8;
   }
   return 32;
@@ -103,7 +103,7 @@ static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
 static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
   for (int i = 0; i < 8; i++) {
     const int byte = input & 0xff;
-    if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
+    if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
     input >>= 8;
   }
   return 64;

data/vendor/datasketches-cpp/common/include/memory_operations.hpp CHANGED Viewed

@@ -52,6 +52,18 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
   return size;
 }
+template<typename T>
+static inline size_t copy_to_mem(const T& item, void* dst) {
+  memcpy(dst, &item, sizeof(T));
+  return sizeof(T);
+}
+template<typename T>
+static inline size_t copy_from_mem(const void* src, T& item) {
+  memcpy(&item, src, sizeof(T));
+  return sizeof(T);
+}
 } // namespace
 #endif // _MEMORY_OPERATIONS_HPP_

data/vendor/datasketches-cpp/common/include/serde.hpp CHANGED Viewed

@@ -51,7 +51,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
     bool failure = false;
     try {
       os.write(reinterpret_cast<const char*>(items), sizeof(T) * num);
-    } catch (std::ostream::failure& e) {
+    } catch (std::ostream::failure&) {
       failure = true;
     }
     if (failure || !os.good()) {
@@ -62,7 +62,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
     bool failure = false;
     try {
       is.read((char*)items, sizeof(T) * num);
-    } catch (std::istream::failure& e) {
+    } catch (std::istream::failure&) {
       failure = true;
     }
     if (failure || !is.good()) {
@@ -99,11 +99,11 @@ struct serde<std::string> {
     bool failure = false;
     try {
       for (; i < num && os.good(); i++) {
-        uint32_t length = items[i].size();
+        uint32_t length = static_cast<uint32_t>(items[i].size());
         os.write((char*)&length, sizeof(length));
         os.write(items[i].c_str(), length);
       }
-    } catch (std::ostream::failure& e) {
+    } catch (std::ostream::failure&) {
       failure = true;
     }
     if (failure || !os.good()) {
@@ -121,12 +121,12 @@ struct serde<std::string> {
         std::string str;
         str.reserve(length);
         for (uint32_t j = 0; j < length; j++) {
-          str.push_back(is.get());
+          str.push_back(static_cast<char>(is.get()));
         }
         if (!is.good()) { break; }
         new (&items[i]) std::string(std::move(str));
       }
-    } catch (std::istream::failure& e) {
+    } catch (std::istream::failure&) {
       failure = true;
     }
     if (failure || !is.good()) {
@@ -143,7 +143,7 @@ struct serde<std::string> {
   size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
     size_t bytes_written = 0;
     for (unsigned i = 0; i < num; ++i) {
-      const uint32_t length = items[i].size();
+      const uint32_t length = static_cast<uint32_t>(items[i].size());
       const size_t new_bytes = length + sizeof(length);
       check_memory_size(bytes_written + new_bytes, capacity);
       memcpy(ptr, &length, sizeof(length));

data/vendor/datasketches-cpp/common/test/CMakeLists.txt CHANGED Viewed

@@ -15,6 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
+# two parts here, the common test code for other parts to use,
+# and an integration test using the other parts of the library.
+# common dependencies for tests
 add_library(common_test OBJECT "")
 set_target_properties(common_test PROPERTIES
@@ -36,3 +40,23 @@ target_sources(common_test
     ${CMAKE_CURRENT_SOURCE_DIR}/catch_runner.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.cpp
 )
+# now the integration test part
+add_executable(integration_test)
+target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test)
+set_target_properties(integration_test PROPERTIES
+  CXX_STANDARD 11
+  CXX_STANDARD_REQUIRED YES
+)
+add_test(
+  NAME integration_test
+  COMMAND integration_test
+)
+target_sources(integration_test
+  PRIVATE
+    integration_test.cpp
+)

data/vendor/datasketches-cpp/common/test/integration_test.cpp ADDED Viewed

@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <catch.hpp>
+#include "cpc_sketch.hpp"
+#include "cpc_union.hpp"
+#include "frequent_items_sketch.hpp"
+#include "hll.hpp"
+#include "kll_sketch.hpp"
+#include "req_sketch.hpp"
+#include "var_opt_sketch.hpp"
+#include "var_opt_union.hpp"
+#include "theta_sketch.hpp"
+#include "theta_union.hpp"
+#include "theta_intersection.hpp"
+#include "theta_a_not_b.hpp"
+#include "tuple_sketch.hpp"
+#include "tuple_union.hpp"
+#include "tuple_intersection.hpp"
+#include "tuple_a_not_b.hpp"
+namespace datasketches {
+template<typename Summary>
+struct subtracting_intersection_policy {
+  void operator()(Summary& summary, const Summary& other) const {
+    summary -= other;
+  }
+};
+using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
+TEST_CASE("integration: declare all sketches", "[integration]") {
+  cpc_sketch cpc(12);
+  cpc_union cpc_u(12);
+  frequent_items_sketch<std::string> fi(100);
+  hll_sketch hll(13);
+  hll_union hll_u(13);
+  kll_sketch<double> kll(200);
+  req_sketch<double> req(12);
+  var_opt_sketch<std::string> vo(100);
+  var_opt_union<std::string> vo_u(100);
+  update_theta_sketch theta = update_theta_sketch::builder().build();
+  theta_union theta_u = theta_union::builder().build();
+  theta_intersection theta_i;
+  theta_a_not_b theta_anb;
+  auto tuple = update_tuple_sketch<float>::builder().build();
+  auto tuple_u = tuple_union<float>::builder().build();
+  tuple_intersection_float tuple_i;
+  tuple_a_not_b<float> tuple_anb;
+}
+} /* namespace datasketches */