RubyGems - datasketches - Versions diffs - 0.3.2 → 0.4.1 - Mend

datasketches 0.3.2 → 0.4.1

Files changed (237) hide show

data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp CHANGED Viewed

@@ -26,45 +26,39 @@
 namespace datasketches {
+/// CPC constants
 namespace cpc_constants {
-    const uint8_t MIN_LG_K = 4;
-    const uint8_t MAX_LG_K = 26;
-    const uint8_t DEFAULT_LG_K = 11;
+  /// min log2 of K
+  const uint8_t MIN_LG_K = 4;
+  /// max log2 of K
+  const uint8_t MAX_LG_K = 26;
+  /// default log2 of K
+  const uint8_t DEFAULT_LG_K = 11;
 }
-// TODO: Redundant and deprecated. Will be removed in next major version release.
-static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
-static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
-static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
-template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
-template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
-template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
 // forward declaration
 template<typename A> class u32_table;
 template<typename A>
 struct compressed_state {
+  using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
   explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0),
       window_data(allocator), window_data_words(0) {}
-  vector_u32<A> table_data;
+  vector_u32 table_data;
   uint32_t table_data_words;
   uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
-  vector_u32<A> window_data;
+  vector_u32 window_data;
   uint32_t window_data_words;
 };
 template<typename A>
 struct uncompressed_state {
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
   explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {}
   u32_table<A> table;
-  vector_u8<A> window;
+  vector_bytes window;
 };
 } /* namespace datasketches */

data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp CHANGED Viewed

@@ -47,6 +47,9 @@ inline cpc_compressor<A>& get_compressor();
 template<typename A>
 class cpc_compressor {
 public:
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+  using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
   void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
   void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
@@ -126,17 +129,17 @@ private:
   uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values);
   void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
-  void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
+  void compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
-  vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
-  void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
+  vector_u32 uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
+  void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window, uint8_t lg_k, uint32_t num_coupons) const;
   static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits);
   static size_t safe_length_for_compressed_window_buf(uint32_t k);
   static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
-  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
+  static inline vector_u32 tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
   static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t count);
 };

data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp CHANGED Viewed

@@ -183,7 +183,7 @@ void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompress
 template<typename A>
 void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
-  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
   u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
   compress_surprising_values(pairs, source.get_lg_k(), result);
 }
@@ -192,7 +192,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
       lg_k, source.table_data.get_allocator());
   target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
 }
@@ -204,12 +204,12 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
   if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
   if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
   const uint32_t k = 1 << source.get_lg_k();
-  vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs_from_table = source.surprising_value_table.unwrapping_get_items();
   const uint32_t num_pairs_from_table = static_cast<uint32_t>(pairs_from_table.size());
   if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
   const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
-  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
+  vector_u32 all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
   u32_table<A>::merge(
       pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -224,7 +224,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
       lg_k, source.table_data.get_allocator());
   // In the hybrid flavor, some of these pairs actually
@@ -250,7 +250,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& sour
 template<typename A>
 void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
-  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
   if (pairs.size() > 0) {
     // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
     // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
@@ -277,7 +277,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& sour
     target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+    vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
         lg_k, source.table_data.get_allocator());
     // undo the compressor's 8-column shift
     for (uint32_t i = 0; i < num_pairs; i++) {
@@ -291,7 +291,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& sour
 template<typename A>
 void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
-  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
   if (pairs.size() > 0) {
     // Here we apply a complicated transformation to the column indices, which
     // changes the implied ordering of the pairs, so we must do it before sorting.
@@ -330,7 +330,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
     target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+    vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
         lg_k, source.table_data.get_allocator());
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -356,7 +356,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
 }
 template<typename A>
-void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
+void cpc_compressor<A>::compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const {
   const uint32_t k = 1 << lg_k;
   const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
@@ -374,10 +374,10 @@ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, u
 }
 template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
-    uint8_t lg_k, const A& allocator) const {
+auto cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
+    uint8_t lg_k, const A& allocator) const -> vector_u32 {
   const uint32_t k = 1 << lg_k;
-  vector_u32<A> pairs(num_pairs, 0, allocator);
+  vector_u32 pairs(num_pairs, 0, allocator);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
   low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
   return pairs;
@@ -399,7 +399,7 @@ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t l
 }
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window,
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window,
     uint8_t lg_k, uint32_t num_coupons) const {
   const uint32_t k = 1 << lg_k;
   window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
@@ -722,10 +722,10 @@ void write_unary(
 // The empty space that this leaves at the beginning of the output array
 // will be filled in later by the caller.
 template<typename A>
-vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
-    uint32_t empty_space, const A& allocator) {
+auto cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
+    uint32_t empty_space, const A& allocator) -> vector_u32 {
   const size_t output_length = empty_space + num_pairs_to_get;
-  vector_u32<A> pairs(output_length, 0, allocator);
+  vector_u32 pairs(output_length, 0, allocator);
   size_t pair_index = empty_space;
   for (unsigned row_index = 0; row_index < k; row_index++) {
     uint8_t byte = window[row_index];

data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp CHANGED Viewed

@@ -33,58 +33,58 @@
 namespace datasketches {
-/*
- * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch
- *
- * This is a very compact (in serialized form) distinct counting sketch.
- * The theory is described in the following paper:
- * https://arxiv.org/abs/1708.06839
- *
- * author Kevin Lang
- * author Alexander Saydakov
- */
-// forward-declarations
+// forward declarations
 template<typename A> class cpc_sketch_alloc;
 template<typename A> class cpc_union_alloc;
-// alias with default allocator for convenience
+/// CPC sketch alias with default allocator
 using cpc_sketch = cpc_sketch_alloc<std::allocator<uint8_t>>;
-// allocation and initialization of global decompression (decoding) tables
-// call this before anything else if you want to control the initialization time
-// for instance, to have this happen outside of a transaction context
-// otherwise initialization happens on the first use (serialization or deserialization)
-// it is safe to call more than once assuming no race conditions
-// this is not thread safe! neither is the rest of the library
+/**
+ * Allocation and initialization of global decompression (decoding) tables.
+ * Call this before anything else if you want to control the initialization time.
+ * For instance, to have this happen outside of a transaction context.
+ * Otherwise initialization happens on the first use (serialization or deserialization).
+ * It is safe to call more than once assuming no race conditions.
+ * This is not thread safe! Neither is the rest of the library.
+ */
 template<typename A> void cpc_init();
+/**
+ * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch
+ *
+ * This is a very compact (in serialized form) distinct counting sketch.
+ * The theory is described in the following paper:
+ * https://arxiv.org/abs/1708.06839
+ *
+ * @author Kevin Lang
+ * @author Alexander Saydakov
+ */
 template<typename A>
 class cpc_sketch_alloc {
 public:
+  using allocator_type = A;
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+  using vector_u64 = std::vector<uint64_t, typename std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
   /**
    * Creates an instance of the sketch given the lg_k parameter and hash seed.
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
+   * @param allocator instance of an allocator
    */
   explicit cpc_sketch_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
-  using allocator_type = A;
+  /// @return allocator
   A get_allocator() const;
-  /**
-   * @return configured lg_k of this sketch
-   */
+  /// @return configured lg_k of this sketch
   uint8_t get_lg_k() const;
-  /**
-   * @return true if this sketch represents an empty set
-   */
+  /// @return true if this sketch represents an empty set
   bool is_empty() const;
-  /**
-   * @return estimate of the distinct count of the input stream
-   */
+  /// @return estimate of the distinct count of the input stream
   double get_estimate() const;
   /**
@@ -189,13 +189,14 @@ public:
    * Otherwise two sketches that should represent overlapping sets will be disjoint
    * For instance, for signed 32-bit values call update(int32_t) method above,
    * which does widening conversion to int64_t, if compatibility with Java is expected
-   * @param data pointer to the data
-   * @param length of the data in bytes
+   * @param value pointer to the data
+   * @param size of the data in bytes
    */
   void update(const void* value, size_t size);
   /**
    * Returns a human-readable summary of this sketch
+   * @return a human-readable summary of this sketch
    */
   string<A> to_string() const;
@@ -205,16 +206,13 @@ public:
    */
   void serialize(std::ostream& os) const;
-  // This is a convenience alias for users
-  // The type returned by the following serialize method
-  using vector_bytes = vector_u8<A>;
   /**
    * This method serializes the sketch as a vector of bytes.
    * An optional header can be reserved in front of the sketch.
    * It is an uninitialized space of a given size.
    * This header is used in Datasketches PostgreSQL extension.
    * @param header_size_bytes space to reserve in front of the sketch
+   * @return serialized sketch as a vector of bytes
    */
   vector_bytes serialize(unsigned header_size_bytes = 0) const;
@@ -222,6 +220,7 @@ public:
    * This method deserializes a sketch from a given stream.
    * @param is input stream
    * @param seed the seed for the hash function that was used to create the sketch
+   * @param allocator instance of an Allocator
    * @return an instance of a sketch
    */
   static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
@@ -231,6 +230,7 @@ public:
    * @param bytes pointer to the array of bytes
    * @param size the size of the array
    * @param seed the seed for the hash function that was used to create the sketch
+   * @param allocator instance of an Allocator
    * @return an instance of the sketch
    */
   static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
@@ -246,10 +246,10 @@ public:
    */
   static size_t get_max_serialized_size_bytes(uint8_t lg_k);
-  // for internal use
+  /// @private for internal use
   uint32_t get_num_coupons() const;
-  // for debugging
+  /// @private for debugging
   // this should catch some forms of corruption during serialization-deserialization
   bool validate() const;
@@ -276,7 +276,7 @@ private:
   uint32_t num_coupons; // the number of coupons collected so far
   u32_table<A> surprising_value_table;
-  vector_u8<A> sliding_window;
+  vector_bytes sliding_window;
   uint8_t window_offset; // derivable from num_coupons, but made explicit for speed
   uint8_t first_interesting_column; // This is part of a speed optimization
@@ -285,7 +285,7 @@ private:
   // for deserialization and cpc_union::get_result()
   cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column, u32_table<A>&& table,
-      vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
+      vector_bytes&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
   inline void row_col_update(uint32_t row_col);
   inline void update_sparse(uint32_t row_col);
@@ -308,7 +308,7 @@ private:
   static inline uint8_t determine_correct_offset(uint8_t lg_k, uint64_t c);
   // this produces a full-size k-by-64 bit matrix
-  vector_u64<A> build_bit_matrix() const;
+  vector_u64 build_bit_matrix() const;
   static uint8_t get_preamble_ints(uint32_t num_coupons, bool has_hip, bool has_table, bool has_window);
   inline void write_hip(std::ostream& os) const;

data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp CHANGED Viewed

@@ -315,7 +315,7 @@ void cpc_sketch_alloc<A>::move_window() {
   const uint32_t k = 1 << lg_k;
   // Construct the full-sized bit matrix that corresponds to the sketch
-  vector_u64<A> bit_matrix = build_bit_matrix();
+  vector_u64 bit_matrix = build_bit_matrix();
   // refresh the KXP register on every 8th window shift.
   if ((new_offset & 0x7) == 0) refresh_kxp(bit_matrix.data());
@@ -458,7 +458,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
 }
 template<typename A>
-vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
+auto cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
   compressed_state<A> compressed(sliding_window.get_allocator());
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
@@ -469,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
   const bool has_window = compressed.window_data.size() > 0;
   const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
   const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
-  vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
+  vector_bytes bytes(size, 0, sliding_window.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   ptr += copy_to_mem(preamble_ints, ptr);
   const uint8_t serial_version = SERIAL_VERSION;
@@ -712,15 +712,18 @@ static const size_t CPC_MAX_PREAMBLE_SIZE_BYTES = 40;
 template<typename A>
 size_t cpc_sketch_alloc<A>::get_max_serialized_size_bytes(uint8_t lg_k) {
   check_lg_k(lg_k);
-  if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) return CPC_EMPIRICAL_MAX_SIZE_BYTES[lg_k - CPC_MIN_LG_K] + CPC_MAX_PREAMBLE_SIZE_BYTES;
+  if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) {
+    return CPC_EMPIRICAL_MAX_SIZE_BYTES[lg_k - cpc_constants::MIN_LG_K] + CPC_MAX_PREAMBLE_SIZE_BYTES;
+  }
   const uint32_t k = 1 << lg_k;
   return (int) (CPC_EMPIRICAL_MAX_SIZE_FACTOR * k) + CPC_MAX_PREAMBLE_SIZE_BYTES;
 }
 template<typename A>
 void cpc_sketch_alloc<A>::check_lg_k(uint8_t lg_k) {
-  if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
-    throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
+  if (lg_k < cpc_constants::MIN_LG_K || lg_k > cpc_constants::MAX_LG_K) {
+    throw std::invalid_argument("lg_k must be >= " + std::to_string(cpc_constants::MIN_LG_K) + " and <= "
+        + std::to_string(cpc_constants::MAX_LG_K) + ": " + std::to_string(lg_k));
   }
 }
@@ -731,14 +734,14 @@ uint32_t cpc_sketch_alloc<A>::get_num_coupons() const {
 template<typename A>
 bool cpc_sketch_alloc<A>::validate() const {
-  vector_u64<A> bit_matrix = build_bit_matrix();
+  vector_u64 bit_matrix = build_bit_matrix();
   const uint64_t num_bits_set = count_bits_set_in_matrix(bit_matrix.data(), 1ULL << lg_k);
   return num_bits_set == num_coupons;
 }
 template<typename A>
 cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column,
-    u32_table<A>&& table, vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed):
+    u32_table<A>&& table, vector_bytes&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed):
 lg_k(lg_k),
 seed(seed),
 was_merged(!has_hip),
@@ -800,14 +803,14 @@ uint8_t cpc_sketch_alloc<A>::determine_correct_offset(uint8_t lg_k, uint64_t c)
 }
 template<typename A>
-vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
+auto cpc_sketch_alloc<A>::build_bit_matrix() const -> vector_u64 {
   const uint32_t k = 1 << lg_k;
   if (window_offset > 56) throw std::logic_error("offset > 56");
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
   // This is essential for the routine's O(k) time cost (as opposed to O(C)).
   const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
-  vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
+  vector_u64 matrix(k, default_row, sliding_window.get_allocator());
   if (num_coupons == 0) return matrix;

data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp CHANGED Viewed

@@ -27,31 +27,55 @@
 namespace datasketches {
-/*
+/// CPC union alias with default allocator
+using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
+/**
  * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Union
  *
  * author Kevin Lang
  * author Alexander Saydakov
  */
-// alias with default allocator for convenience
-using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
 template<typename A>
 class cpc_union_alloc {
 public:
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+  using vector_u64 = std::vector<uint64_t, typename std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
   /**
    * Creates an instance of the union given the lg_k parameter and hash seed.
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
+   * @param allocator instance of an allocator
    */
   explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+  /**
+   * Copy constructor
+   * @param other union to be copied
+   */
   cpc_union_alloc(const cpc_union_alloc<A>& other);
+  /**
+   * Move constructor
+   * @param other union to be moved
+   */
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
   ~cpc_union_alloc();
+  /**
+   * Copy assignment
+   * @param other union to be copied
+   * @return reference to this union
+   */
   cpc_union_alloc<A>& operator=(const cpc_union_alloc<A>& other);
+  /**
+   * Move assignment
+   * @param other union to be moved
+   * @return reference to this union
+   */
   cpc_union_alloc<A>& operator=(cpc_union_alloc<A>&& other) noexcept;
   /**
@@ -73,14 +97,14 @@ public:
   cpc_sketch_alloc<A> get_result() const;
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> AllocU8;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<cpc_sketch_alloc<A>> AllocCpc;
+  using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
+  using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
+  using AllocCpc = typename std::allocator_traits<A>::template rebind_alloc<cpc_sketch_alloc<A>>;
   uint8_t lg_k;
   uint64_t seed;
   cpc_sketch_alloc<A>* accumulator;
-  vector_u64<A> bit_matrix;
+  vector_u64 bit_matrix;
   template<typename S> void internal_update(S&& sketch); // to support both rvalue and lvalue
@@ -90,8 +114,8 @@ private:
   void switch_to_bit_matrix();
   void walk_table_updating_sketch(const u32_table<A>& table);
   void or_table_into_matrix(const u32_table<A>& table);
-  void or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t offset, uint8_t src_lg_k);
-  void or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t src_lg_k);
+  void or_window_into_matrix(const vector_bytes& sliding_window, uint8_t offset, uint8_t src_lg_k);
+  void or_matrix_into_matrix(const vector_u64& src_matrix, uint8_t src_lg_k);
   void reduce_k(uint8_t new_lg_k);
 };

data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp CHANGED Viewed

@@ -33,8 +33,8 @@ seed(seed),
 accumulator(nullptr),
 bit_matrix(allocator)
 {
-  if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
-    throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
+  if (lg_k < cpc_constants::MIN_LG_K || lg_k > cpc_constants::MAX_LG_K) {
+    throw std::invalid_argument("lg_k must be >= " + std::to_string(cpc_constants::MIN_LG_K) + " and <= " + std::to_string(cpc_constants::MAX_LG_K) + ": " + std::to_string(lg_k));
   }
   accumulator = new (AllocCpc(allocator).allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
 }
@@ -166,7 +166,7 @@ void cpc_union_alloc<A>::internal_update(S&& sketch) {
   // SLIDING mode involves inverted logic, so we can't just walk the source sketch.
   // Instead, we convert it to a bitMatrix that can be OR'ed into the destination.
   if (cpc_sketch_alloc<A>::flavor::SLIDING != src_flavor) throw std::logic_error("wrong flavor"); // Case D
-  vector_u64<A> src_matrix = sketch.build_bit_matrix();
+  vector_u64 src_matrix = sketch.build_bit_matrix();
   or_matrix_into_matrix(src_matrix, sketch.get_lg_k());
 }
@@ -203,7 +203,7 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
   const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
-  vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
+  vector_bytes sliding_window(k, 0, bit_matrix.get_allocator());
   // don't need to zero the window's memory
   // dynamically growing caused snowplow effect
@@ -289,7 +289,7 @@ void cpc_union_alloc<A>::or_table_into_matrix(const u32_table<A>& table) {
 }
 template<typename A>
-void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t offset, uint8_t src_lg_k) {
+void cpc_union_alloc<A>::or_window_into_matrix(const vector_bytes& sliding_window, uint8_t offset, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
   const uint32_t src_k = 1 << src_lg_k;
@@ -299,7 +299,7 @@ void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>& sliding_windo
 }
 template<typename A>
-void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t src_lg_k) {
+void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64& src_matrix, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
   const uint32_t src_k = 1 << src_lg_k;
@@ -315,10 +315,10 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
   if (bit_matrix.size() > 0) { // downsample the unioner's bit matrix
     if (accumulator != nullptr) throw std::logic_error("accumulator is not null");
-    vector_u64<A> old_matrix = std::move(bit_matrix);
+    vector_u64 old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
     const uint32_t new_k = 1 << new_lg_k;
-    bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
+    bit_matrix = vector_u64(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
     return;

data/vendor/datasketches-cpp/cpc/include/u32_table.hpp CHANGED Viewed

@@ -38,6 +38,7 @@ static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
 template<typename A>
 class u32_table {
 public:
+  using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
   u32_table(const A& allocator);
   u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
@@ -54,7 +55,7 @@ public:
   static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator);
-  vector_u32<A> unwrapping_get_items() const;
+  vector_u32 unwrapping_get_items() const;
   static void merge(
     const uint32_t* arr_a, size_t start_a, size_t length_a, // input
@@ -70,7 +71,7 @@ private:
   uint8_t lg_size; // log2 of number of slots
   uint8_t num_valid_bits;
   uint32_t num_items;
-  vector_u32<A> slots;
+  vector_u32 slots;
   inline uint32_t lookup(uint32_t item) const;
   inline void must_insert(uint32_t item);