RubyGems - datasketches - Versions diffs - 0.1.2 → 0.2.0 - Mend

datasketches 0.1.2 → 0.2.0

Files changed (160) hide show

data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp CHANGED Viewed

@@ -41,13 +41,13 @@ void cpc_init() {
 }
 template<typename A>
-cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed):
+cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 was_merged(false),
 num_coupons(0),
-surprising_value_table(2, 6 + lg_k),
-sliding_window(),
+surprising_value_table(2, 6 + lg_k, allocator),
+sliding_window(allocator),
 window_offset(0),
 first_interesting_column(0),
 kxp(1 << lg_k),
@@ -58,6 +58,11 @@ hip_est_accum(0)
   }
 }
+template<typename A>
+A cpc_sketch_alloc<A>::get_allocator() const {
+  return sliding_window.get_allocator();
+}
 template<typename A>
 uint8_t cpc_sketch_alloc<A>::get_lg_k() const {
   return lg_k;
@@ -277,7 +282,7 @@ void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
   sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it)
-  u32_table<A> new_table(2, 6 + lg_k);
+  u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
   const uint32_t* old_slots = surprising_value_table.get_slots();
   const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
@@ -401,7 +406,7 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
 template<typename A>
 void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(A(sliding_window.get_allocator()));
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -454,7 +459,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
 template<typename A>
 vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(sliding_window.get_allocator());
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -464,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
   const bool has_window = compressed.window_data.size() > 0;
   const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
   const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
-  vector_u8<A> bytes(size);
+  vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
   const uint8_t serial_version = SERIAL_VERSION;
@@ -511,7 +516,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
 }
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   uint8_t preamble_ints;
   is.read((char*)&preamble_ints, sizeof(preamble_ints));
   uint8_t serial_version;
@@ -529,7 +534,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
   const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -583,7 +588,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   if (!is.good())
     throw std::runtime_error("error reading from std::istream");
@@ -592,7 +597,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
 }
 template<typename A>
-cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
+cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = static_cast<const char*>(bytes);
@@ -614,7 +619,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
   const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
   const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
   ensure_minimum_memory(size, preamble_ints << 2);
-  compressed_state<A> compressed;
+  compressed_state<A> compressed(allocator);
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
   compressed.window_data_words = 0;
@@ -677,7 +682,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
     throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
         + std::to_string(compute_seed_hash(seed)));
   }
-  uncompressed_state<A> uncompressed;
+  uncompressed_state<A> uncompressed(allocator);
   get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
   return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
       std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
@@ -766,7 +771,7 @@ vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
   // This is essential for the routine's O(k) time cost (as opposed to O(C)).
   const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
-  vector_u64<A> matrix(k, default_row);
+  vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
   if (num_coupons == 0) return matrix;

data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp CHANGED Viewed

@@ -35,7 +35,7 @@ namespace datasketches {
  */
 // alias with default allocator for convenience
-typedef cpc_union_alloc<std::allocator<void>> cpc_union;
+using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
 template<typename A>
 class cpc_union_alloc {
@@ -45,7 +45,7 @@ public:
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
    */
-  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
+  explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
   cpc_union_alloc(const cpc_union_alloc<A>& other);
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;

data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp CHANGED Viewed

@@ -25,16 +25,16 @@
 namespace datasketches {
 template<typename A>
-cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
+cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
 lg_k(lg_k),
 seed(seed),
 accumulator(nullptr),
-bit_matrix()
+bit_matrix(allocator)
 {
   if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
     throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
   }
-  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
+  accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
 }
 template<typename A>
@@ -200,13 +200,13 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
   const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
-  vector_u8<A> sliding_window(k);
+  vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
   // don't need to zero the window's memory
   // dynamically growing caused snowplow effect
   uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized
   if (table_lg_size < 2) table_lg_size = 2;
-  u32_table<A> table(table_lg_size, 6 + lg_k);
+  u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator());
   // the following should work even when the offset is zero
   const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX;
@@ -314,7 +314,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
     vector_u64<A> old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
     const size_t new_k = 1 << new_lg_k;
-    bit_matrix = vector_u64<A>(new_k, 0);
+    bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
     return;

data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp CHANGED Viewed

@@ -24,12 +24,6 @@
 namespace datasketches {
-static inline uint16_t compute_seed_hash(uint64_t seed) {
-  HashState hashes;
-  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
-  return hashes.h1 & 0xffff;
-}
 static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
   if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument");
   const uint64_t quotient = x / y;

data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp CHANGED Viewed

@@ -231,7 +231,7 @@ static const double ICON_POLYNOMIAL_COEFFICIENTS[ICON_TABLE_SIZE] = {
 #endif
 };
-static double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
+static inline double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
   const int final = start + num - 1;
   double total = coefficients[final];
   for (int j = final - 1; j >= start; j--) {
@@ -241,11 +241,11 @@ static double evaluate_polynomial(const double* coefficients, int start, int num
   return total;
 }
-static double icon_exponential_approximation(double k, double c) {
+static inline double icon_exponential_approximation(double k, double c) {
   return (0.7940236163830469 * k * pow(2.0, c / k));
 }
-static double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
+static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
   if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
   if (c < 2) return ((c == 0) ? 0.0 : 1.0);
   const size_t k = 1 << lg_k;

data/vendor/datasketches-cpp/cpc/include/u32_table.hpp CHANGED Viewed

@@ -39,8 +39,8 @@ template<typename A>
 class u32_table {
 public:
-  u32_table();
-  u32_table(uint8_t lg_size, uint8_t num_valid_bits);
+  u32_table(const A& allocator);
+  u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
   inline size_t get_num_items() const;
   inline const uint32_t* get_slots() const;
@@ -52,7 +52,7 @@ public:
   // returns true iff the item was present and was therefore removed from the table
   inline bool maybe_delete(uint32_t item);
-  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
+  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
   vector_u32<A> unwrapping_get_items() const;

data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp CHANGED Viewed

@@ -29,19 +29,19 @@
 namespace datasketches {
 template<typename A>
-u32_table<A>::u32_table():
+u32_table<A>::u32_table(const A& allocator):
 lg_size(0),
 num_valid_bits(0),
 num_items(0),
-slots()
+slots(allocator)
 {}
 template<typename A>
-u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
+u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
-slots(1 << lg_size, UINT32_MAX)
+slots(1 << lg_size, UINT32_MAX, allocator)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
@@ -110,10 +110,10 @@ bool u32_table<A>::maybe_delete(uint32_t item) {
 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
-u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
+u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
-  u32_table<A> table(lg_num_slots, 6 + lg_k);
+  u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
   // Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
   // However, we are starting out with the correct final table size, so the problem might not occur
   for (size_t i = 0; i < num_pairs; i++) {
@@ -152,7 +152,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
   const size_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
-  slots = vector_u32<A>(new_size, UINT32_MAX);
+  slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
   lg_size = new_lg_size;
   for (size_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
@@ -169,9 +169,9 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
 // The result is nearly sorted, so make sure to use an efficient sort for that case
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
-  if (num_items == 0) return vector_u32<A>();
+  if (num_items == 0) return vector_u32<A>(slots.get_allocator());
   const size_t table_size = 1 << lg_size;
-  vector_u32<A> result(num_items);
+  vector_u32<A> result(num_items, 0, slots.get_allocator());
   size_t i = 0;
   size_t l = 0;
   size_t r = num_items - 1;

data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt CHANGED Viewed

@@ -41,4 +41,5 @@ target_sources(cpc_test
     cpc_sketch_test.cpp
     cpc_union_test.cpp
     compression_test.cpp
+    cpc_sketch_allocation_test.cpp
 )

data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp ADDED Viewed

@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <cstring>
+#include <sstream>
+#include <fstream>
+#include <catch.hpp>
+#include "cpc_sketch.hpp"
+#include "test_allocator.hpp"
+namespace datasketches {
+using cpc_sketch_test_alloc = cpc_sketch_alloc<test_allocator<uint8_t>>;
+using alloc = test_allocator<uint8_t>;
+TEST_CASE("cpc sketch allocation: serialize deserialize empty", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    sketch.serialize(s);
+    auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize sparse", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(100);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    sketch.serialize(s);
+    auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize hybrid", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(200);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    sketch.serialize(s);
+    auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize pinned", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(2000);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    sketch.serialize(s);
+    auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize sliding", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(20000);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    sketch.serialize(s);
+    auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serializing deserialize sliding large", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(3000000);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
+    sketch.serialize(s);
+    auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize empty, bytes", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    auto bytes = sketch.serialize();
+    auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize sparse, bytes", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(100);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    auto bytes = sketch.serialize();
+    auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize hybrid, bytes", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(200);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    auto bytes = sketch.serialize();
+    auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize pinned, bytes", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(2000);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    auto bytes = sketch.serialize();
+    auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_sketch]") {
+  test_allocator_total_bytes = 0;
+  test_allocator_net_allocations = 0;
+  {
+    cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
+    const int n(20000);
+    for (int i = 0; i < n; i++) sketch.update(i);
+    auto bytes = sketch.serialize();
+    auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
+    REQUIRE(deserialized.is_empty() == sketch.is_empty());
+    REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+    REQUIRE(deserialized.validate());
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
+    REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
+  }
+  REQUIRE(test_allocator_total_bytes == 0);
+  REQUIRE(test_allocator_net_allocations == 0);
+}
+} /* namespace datasketches */