RubyGems - datasketches - Versions diffs - 0.3.2 → 0.4.0 - Mend

datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp CHANGED Viewed

@@ -26,45 +26,39 @@
 namespace datasketches {
+/// CPC constants
 namespace cpc_constants {
-    const uint8_t MIN_LG_K = 4;
-    const uint8_t MAX_LG_K = 26;
-    const uint8_t DEFAULT_LG_K = 11;
+  /// min log2 of K
+  const uint8_t MIN_LG_K = 4;
+  /// max log2 of K
+  const uint8_t MAX_LG_K = 26;
+  /// default log2 of K
+  const uint8_t DEFAULT_LG_K = 11;
 }
-// TODO: Redundant and deprecated. Will be removed in next major version release.
-static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
-static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
-static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
-template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
-template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
-template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
-template<typename A> using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
-template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
-template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
-template<typename A> using vector_u64 = std::vector<uint64_t, AllocU64<A>>;
 // forward declaration
 template<typename A> class u32_table;
 template<typename A>
 struct compressed_state {
+  using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
   explicit compressed_state(const A& allocator): table_data(allocator), table_data_words(0), table_num_entries(0),
       window_data(allocator), window_data_words(0) {}
-  vector_u32<A> table_data;
+  vector_u32 table_data;
   uint32_t table_data_words;
   uint32_t table_num_entries; // can be different from the number of entries in the sketch in hybrid mode
-  vector_u32<A> window_data;
+  vector_u32 window_data;
   uint32_t window_data_words;
 };
 template<typename A>
 struct uncompressed_state {
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
   explicit uncompressed_state(const A& allocator): table(allocator), window(allocator) {}
   u32_table<A> table;
-  vector_u8<A> window;
+  vector_bytes window;
 };
 } /* namespace datasketches */

data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp CHANGED Viewed

@@ -47,6 +47,9 @@ inline cpc_compressor<A>& get_compressor();
 template<typename A>
 class cpc_compressor {
 public:
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+  using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
   void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
   void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
@@ -126,17 +129,17 @@ private:
   uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values);
   void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
-  void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
+  void compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const;
   void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
-  vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
-  void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
+  vector_u32 uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
+  void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window, uint8_t lg_k, uint32_t num_coupons) const;
   static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits);
   static size_t safe_length_for_compressed_window_buf(uint32_t k);
   static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
-  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
+  static inline vector_u32 tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
   static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t count);
 };

data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp CHANGED Viewed

@@ -183,7 +183,7 @@ void cpc_compressor<A>::uncompress(const compressed_state<A>& source, uncompress
 template<typename A>
 void cpc_compressor<A>::compress_sparse_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   if (source.sliding_window.size() > 0) throw std::logic_error("unexpected sliding window");
-  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
   u32_table<A>::introspective_insertion_sort(pairs.data(), 0, pairs.size());
   compress_surprising_values(pairs, source.get_lg_k(), result);
 }
@@ -192,7 +192,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress_sparse_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("unexpected sliding window");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
       lg_k, source.table_data.get_allocator());
   target.table = u32_table<A>::make_from_pairs(pairs.data(), source.table_num_entries, lg_k, pairs.get_allocator());
 }
@@ -204,12 +204,12 @@ void cpc_compressor<A>::compress_hybrid_flavor(const cpc_sketch_alloc<A>& source
   if (source.sliding_window.size() == 0) throw std::logic_error("no sliding window");
   if (source.window_offset != 0) throw std::logic_error("window_offset != 0");
   const uint32_t k = 1 << source.get_lg_k();
-  vector_u32<A> pairs_from_table = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs_from_table = source.surprising_value_table.unwrapping_get_items();
   const uint32_t num_pairs_from_table = static_cast<uint32_t>(pairs_from_table.size());
   if (num_pairs_from_table > 0) u32_table<A>::introspective_insertion_sort(pairs_from_table.data(), 0, num_pairs_from_table);
   const uint32_t num_pairs_from_window = source.get_num_coupons() - num_pairs_from_table; // because the window offset is zero
-  vector_u32<A> all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
+  vector_u32 all_pairs = tricky_get_pairs_from_window(source.sliding_window.data(), k, num_pairs_from_window, num_pairs_from_table, source.get_allocator());
   u32_table<A>::merge(
       pairs_from_table.data(), 0, pairs_from_table.size(),
@@ -224,7 +224,7 @@ template<typename A>
 void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k) const {
   if (source.window_data.size() > 0) throw std::logic_error("window is not expected");
   if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-  vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
+  vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, source.table_num_entries,
       lg_k, source.table_data.get_allocator());
   // In the hybrid flavor, some of these pairs actually
@@ -250,7 +250,7 @@ void cpc_compressor<A>::uncompress_hybrid_flavor(const compressed_state<A>& sour
 template<typename A>
 void cpc_compressor<A>::compress_pinned_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
-  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
   if (pairs.size() > 0) {
     // Here we subtract 8 from the column indices. Because they are stored in the low 6 bits
     // of each row_col pair, and because no column index is less than 8 for a "Pinned" sketch,
@@ -277,7 +277,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& sour
     target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+    vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
         lg_k, source.table_data.get_allocator());
     // undo the compressor's 8-column shift
     for (uint32_t i = 0; i < num_pairs; i++) {
@@ -291,7 +291,7 @@ void cpc_compressor<A>::uncompress_pinned_flavor(const compressed_state<A>& sour
 template<typename A>
 void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& source, compressed_state<A>& result) const {
   compress_sliding_window(source.sliding_window.data(), source.get_lg_k(), source.get_num_coupons(), result);
-  vector_u32<A> pairs = source.surprising_value_table.unwrapping_get_items();
+  vector_u32 pairs = source.surprising_value_table.unwrapping_get_items();
   if (pairs.size() > 0) {
     // Here we apply a complicated transformation to the column indices, which
     // changes the implied ordering of the pairs, so we must do it before sorting.
@@ -330,7 +330,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
     target.table = u32_table<A>(2, 6 + lg_k, source.table_data.get_allocator());
   } else {
     if (source.table_data.size() == 0) throw std::logic_error("table is expected");
-    vector_u32<A> pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
+    vector_u32 pairs = uncompress_surprising_values(source.table_data.data(), source.table_data_words, num_pairs,
         lg_k, source.table_data.get_allocator());
     const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
@@ -356,7 +356,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
 }
 template<typename A>
-void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const {
+void cpc_compressor<A>::compress_surprising_values(const vector_u32& pairs, uint8_t lg_k, compressed_state<A>& result) const {
   const uint32_t k = 1 << lg_k;
   const uint32_t num_pairs = static_cast<uint32_t>(pairs.size());
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
@@ -374,10 +374,10 @@ void cpc_compressor<A>::compress_surprising_values(const vector_u32<A>& pairs, u
 }
 template<typename A>
-vector_u32<A> cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
-    uint8_t lg_k, const A& allocator) const {
+auto cpc_compressor<A>::uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs,
+    uint8_t lg_k, const A& allocator) const -> vector_u32 {
   const uint32_t k = 1 << lg_k;
-  vector_u32<A> pairs(num_pairs, 0, allocator);
+  vector_u32 pairs(num_pairs, 0, allocator);
   const uint8_t num_base_bits = golomb_choose_number_of_base_bits(k + num_pairs, num_pairs);
   low_level_uncompress_pairs(pairs.data(), num_pairs, num_base_bits, data, data_words);
   return pairs;
@@ -399,7 +399,7 @@ void cpc_compressor<A>::compress_sliding_window(const uint8_t* window, uint8_t l
 }
 template<typename A>
-void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window,
+void cpc_compressor<A>::uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_bytes& window,
     uint8_t lg_k, uint32_t num_coupons) const {
   const uint32_t k = 1 << lg_k;
   window.resize(k); // zeroing not needed here (unlike the Hybrid Flavor)
@@ -722,10 +722,10 @@ void write_unary(
 // The empty space that this leaves at the beginning of the output array
 // will be filled in later by the caller.
 template<typename A>
-vector_u32<A> cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
-    uint32_t empty_space, const A& allocator) {
+auto cpc_compressor<A>::tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get,
+    uint32_t empty_space, const A& allocator) -> vector_u32 {
   const size_t output_length = empty_space + num_pairs_to_get;
-  vector_u32<A> pairs(output_length, 0, allocator);
+  vector_u32 pairs(output_length, 0, allocator);
   size_t pair_index = empty_space;
   for (unsigned row_index = 0; row_index < k; row_index++) {
     uint8_t byte = window[row_index];

data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp CHANGED Viewed

@@ -33,58 +33,58 @@
 namespace datasketches {
-/*
- * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch
- *
- * This is a very compact (in serialized form) distinct counting sketch.
- * The theory is described in the following paper:
- * https://arxiv.org/abs/1708.06839
- *
- * author Kevin Lang
- * author Alexander Saydakov
- */
-// forward-declarations
+// forward declarations
 template<typename A> class cpc_sketch_alloc;
 template<typename A> class cpc_union_alloc;
-// alias with default allocator for convenience
+/// CPC sketch alias with default allocator
 using cpc_sketch = cpc_sketch_alloc<std::allocator<uint8_t>>;
-// allocation and initialization of global decompression (decoding) tables
-// call this before anything else if you want to control the initialization time
-// for instance, to have this happen outside of a transaction context
-// otherwise initialization happens on the first use (serialization or deserialization)
-// it is safe to call more than once assuming no race conditions
-// this is not thread safe! neither is the rest of the library
+/**
+ * Allocation and initialization of global decompression (decoding) tables.
+ * Call this before anything else if you want to control the initialization time.
+ * For instance, to have this happen outside of a transaction context.
+ * Otherwise initialization happens on the first use (serialization or deserialization).
+ * It is safe to call more than once assuming no race conditions.
+ * This is not thread safe! Neither is the rest of the library.
+ */
 template<typename A> void cpc_init();
+/**
+ * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch
+ *
+ * This is a very compact (in serialized form) distinct counting sketch.
+ * The theory is described in the following paper:
+ * https://arxiv.org/abs/1708.06839
+ *
+ * @author Kevin Lang
+ * @author Alexander Saydakov
+ */
 template<typename A>
 class cpc_sketch_alloc {
 public:
+  using allocator_type = A;
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+  using vector_u64 = std::vector<uint64_t, typename std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
   /**
    * Creates an instance of the sketch given the lg_k parameter and hash seed.
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
+   * @param allocator instance of an allocator
    */
   explicit cpc_sketch_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
-  using allocator_type = A;
+  /// @return allocator
   A get_allocator() const;
-  /**
-   * @return configured lg_k of this sketch
-   */
+  /// @return configured lg_k of this sketch
   uint8_t get_lg_k() const;
-  /**
-   * @return true if this sketch represents an empty set
-   */
+  /// @return true if this sketch represents an empty set
   bool is_empty() const;
-  /**
-   * @return estimate of the distinct count of the input stream
-   */
+  /// @return estimate of the distinct count of the input stream
   double get_estimate() const;
   /**
@@ -189,13 +189,14 @@ public:
    * Otherwise two sketches that should represent overlapping sets will be disjoint
    * For instance, for signed 32-bit values call update(int32_t) method above,
    * which does widening conversion to int64_t, if compatibility with Java is expected
-   * @param data pointer to the data
-   * @param length of the data in bytes
+   * @param value pointer to the data
+   * @param size of the data in bytes
    */
   void update(const void* value, size_t size);
   /**
    * Returns a human-readable summary of this sketch
+   * @return a human-readable summary of this sketch
    */
   string<A> to_string() const;
@@ -205,16 +206,13 @@ public:
    */
   void serialize(std::ostream& os) const;
-  // This is a convenience alias for users
-  // The type returned by the following serialize method
-  using vector_bytes = vector_u8<A>;
   /**
    * This method serializes the sketch as a vector of bytes.
    * An optional header can be reserved in front of the sketch.
    * It is an uninitialized space of a given size.
    * This header is used in Datasketches PostgreSQL extension.
    * @param header_size_bytes space to reserve in front of the sketch
+   * @return serialized sketch as a vector of bytes
    */
   vector_bytes serialize(unsigned header_size_bytes = 0) const;
@@ -222,6 +220,7 @@ public:
    * This method deserializes a sketch from a given stream.
    * @param is input stream
    * @param seed the seed for the hash function that was used to create the sketch
+   * @param allocator instance of an Allocator
    * @return an instance of a sketch
    */
   static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
@@ -231,6 +230,7 @@ public:
    * @param bytes pointer to the array of bytes
    * @param size the size of the array
    * @param seed the seed for the hash function that was used to create the sketch
+   * @param allocator instance of an Allocator
    * @return an instance of the sketch
    */
   static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
@@ -246,10 +246,10 @@ public:
    */
   static size_t get_max_serialized_size_bytes(uint8_t lg_k);
-  // for internal use
+  /// @private for internal use
   uint32_t get_num_coupons() const;
-  // for debugging
+  /// @private for debugging
   // this should catch some forms of corruption during serialization-deserialization
   bool validate() const;
@@ -276,7 +276,7 @@ private:
   uint32_t num_coupons; // the number of coupons collected so far
   u32_table<A> surprising_value_table;
-  vector_u8<A> sliding_window;
+  vector_bytes sliding_window;
   uint8_t window_offset; // derivable from num_coupons, but made explicit for speed
   uint8_t first_interesting_column; // This is part of a speed optimization
@@ -285,7 +285,7 @@ private:
   // for deserialization and cpc_union::get_result()
   cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column, u32_table<A>&& table,
-      vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
+      vector_bytes&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
   inline void row_col_update(uint32_t row_col);
   inline void update_sparse(uint32_t row_col);
@@ -308,7 +308,7 @@ private:
   static inline uint8_t determine_correct_offset(uint8_t lg_k, uint64_t c);
   // this produces a full-size k-by-64 bit matrix
-  vector_u64<A> build_bit_matrix() const;
+  vector_u64 build_bit_matrix() const;
   static uint8_t get_preamble_ints(uint32_t num_coupons, bool has_hip, bool has_table, bool has_window);
   inline void write_hip(std::ostream& os) const;

data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp CHANGED Viewed

@@ -315,7 +315,7 @@ void cpc_sketch_alloc<A>::move_window() {
   const uint32_t k = 1 << lg_k;
   // Construct the full-sized bit matrix that corresponds to the sketch
-  vector_u64<A> bit_matrix = build_bit_matrix();
+  vector_u64 bit_matrix = build_bit_matrix();
   // refresh the KXP register on every 8th window shift.
   if ((new_offset & 0x7) == 0) refresh_kxp(bit_matrix.data());
@@ -458,7 +458,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
 }
 template<typename A>
-vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
+auto cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const -> vector_bytes {
   compressed_state<A> compressed(sliding_window.get_allocator());
   compressed.table_data_words = 0;
   compressed.table_num_entries = 0;
@@ -469,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
   const bool has_window = compressed.window_data.size() > 0;
   const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
   const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
-  vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
+  vector_bytes bytes(size, 0, sliding_window.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
   ptr += copy_to_mem(preamble_ints, ptr);
   const uint8_t serial_version = SERIAL_VERSION;
@@ -712,15 +712,18 @@ static const size_t CPC_MAX_PREAMBLE_SIZE_BYTES = 40;
 template<typename A>
 size_t cpc_sketch_alloc<A>::get_max_serialized_size_bytes(uint8_t lg_k) {
   check_lg_k(lg_k);
-  if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) return CPC_EMPIRICAL_MAX_SIZE_BYTES[lg_k - CPC_MIN_LG_K] + CPC_MAX_PREAMBLE_SIZE_BYTES;
+  if (lg_k <= CPC_EMPIRICAL_SIZE_MAX_LGK) {
+    return CPC_EMPIRICAL_MAX_SIZE_BYTES[lg_k - cpc_constants::MIN_LG_K] + CPC_MAX_PREAMBLE_SIZE_BYTES;
+  }
   const uint32_t k = 1 << lg_k;
   return (int) (CPC_EMPIRICAL_MAX_SIZE_FACTOR * k) + CPC_MAX_PREAMBLE_SIZE_BYTES;
 }
 template<typename A>
 void cpc_sketch_alloc<A>::check_lg_k(uint8_t lg_k) {
-  if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
-    throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
+  if (lg_k < cpc_constants::MIN_LG_K || lg_k > cpc_constants::MAX_LG_K) {
+    throw std::invalid_argument("lg_k must be >= " + std::to_string(cpc_constants::MIN_LG_K) + " and <= "
+        + std::to_string(cpc_constants::MAX_LG_K) + ": " + std::to_string(lg_k));
   }
 }
@@ -731,14 +734,14 @@ uint32_t cpc_sketch_alloc<A>::get_num_coupons() const {
 template<typename A>
 bool cpc_sketch_alloc<A>::validate() const {
-  vector_u64<A> bit_matrix = build_bit_matrix();
+  vector_u64 bit_matrix = build_bit_matrix();
   const uint64_t num_bits_set = count_bits_set_in_matrix(bit_matrix.data(), 1ULL << lg_k);
   return num_bits_set == num_coupons;
 }
 template<typename A>
 cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column,
-    u32_table<A>&& table, vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed):
+    u32_table<A>&& table, vector_bytes&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed):
 lg_k(lg_k),
 seed(seed),
 was_merged(!has_hip),
@@ -800,14 +803,14 @@ uint8_t cpc_sketch_alloc<A>::determine_correct_offset(uint8_t lg_k, uint64_t c)
 }
 template<typename A>
-vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
+auto cpc_sketch_alloc<A>::build_bit_matrix() const -> vector_u64 {
   const uint32_t k = 1 << lg_k;
   if (window_offset > 56) throw std::logic_error("offset > 56");
   // Fill the matrix with default rows in which the "early zone" is filled with ones.
   // This is essential for the routine's O(k) time cost (as opposed to O(C)).
   const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
-  vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
+  vector_u64 matrix(k, default_row, sliding_window.get_allocator());
   if (num_coupons == 0) return matrix;

data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp CHANGED Viewed

@@ -27,31 +27,55 @@
 namespace datasketches {
-/*
+/// CPC union alias with default allocator
+using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
+/**
  * High performance C++ implementation of Compressed Probabilistic Counting (CPC) Union
  *
  * author Kevin Lang
  * author Alexander Saydakov
  */
-// alias with default allocator for convenience
-using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
 template<typename A>
 class cpc_union_alloc {
 public:
+  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
+  using vector_u64 = std::vector<uint64_t, typename std::allocator_traits<A>::template rebind_alloc<uint64_t>>;
   /**
    * Creates an instance of the union given the lg_k parameter and hash seed.
    * @param lg_k base 2 logarithm of the number of bins in the sketch
    * @param seed for hash function
+   * @param allocator instance of an allocator
    */
   explicit cpc_union_alloc(uint8_t lg_k = cpc_constants::DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
+  /**
+   * Copy constructor
+   * @param other union to be copied
+   */
   cpc_union_alloc(const cpc_union_alloc<A>& other);
+  /**
+   * Move constructor
+   * @param other union to be moved
+   */
   cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
   ~cpc_union_alloc();
+  /**
+   * Copy assignment
+   * @param other union to be copied
+   * @return reference to this union
+   */
   cpc_union_alloc<A>& operator=(const cpc_union_alloc<A>& other);
+  /**
+   * Move assignment
+   * @param other union to be moved
+   * @return reference to this union
+   */
   cpc_union_alloc<A>& operator=(cpc_union_alloc<A>&& other) noexcept;
   /**
@@ -73,14 +97,14 @@ public:
   cpc_sketch_alloc<A> get_result() const;
 private:
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint8_t> AllocU8;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<uint64_t> AllocU64;
-  typedef typename std::allocator_traits<A>::template rebind_alloc<cpc_sketch_alloc<A>> AllocCpc;
+  using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
+  using AllocU64 = typename std::allocator_traits<A>::template rebind_alloc<uint64_t>;
+  using AllocCpc = typename std::allocator_traits<A>::template rebind_alloc<cpc_sketch_alloc<A>>;
   uint8_t lg_k;
   uint64_t seed;
   cpc_sketch_alloc<A>* accumulator;
-  vector_u64<A> bit_matrix;
+  vector_u64 bit_matrix;
   template<typename S> void internal_update(S&& sketch); // to support both rvalue and lvalue
@@ -90,8 +114,8 @@ private:
   void switch_to_bit_matrix();
   void walk_table_updating_sketch(const u32_table<A>& table);
   void or_table_into_matrix(const u32_table<A>& table);
-  void or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t offset, uint8_t src_lg_k);
-  void or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t src_lg_k);
+  void or_window_into_matrix(const vector_bytes& sliding_window, uint8_t offset, uint8_t src_lg_k);
+  void or_matrix_into_matrix(const vector_u64& src_matrix, uint8_t src_lg_k);
   void reduce_k(uint8_t new_lg_k);
 };

data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp CHANGED Viewed

@@ -33,8 +33,8 @@ seed(seed),
 accumulator(nullptr),
 bit_matrix(allocator)
 {
-  if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
-    throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
+  if (lg_k < cpc_constants::MIN_LG_K || lg_k > cpc_constants::MAX_LG_K) {
+    throw std::invalid_argument("lg_k must be >= " + std::to_string(cpc_constants::MIN_LG_K) + " and <= " + std::to_string(cpc_constants::MAX_LG_K) + ": " + std::to_string(lg_k));
   }
   accumulator = new (AllocCpc(allocator).allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
 }
@@ -166,7 +166,7 @@ void cpc_union_alloc<A>::internal_update(S&& sketch) {
   // SLIDING mode involves inverted logic, so we can't just walk the source sketch.
   // Instead, we convert it to a bitMatrix that can be OR'ed into the destination.
   if (cpc_sketch_alloc<A>::flavor::SLIDING != src_flavor) throw std::logic_error("wrong flavor"); // Case D
-  vector_u64<A> src_matrix = sketch.build_bit_matrix();
+  vector_u64 src_matrix = sketch.build_bit_matrix();
   or_matrix_into_matrix(src_matrix, sketch.get_lg_k());
 }
@@ -203,7 +203,7 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
   const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
-  vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
+  vector_bytes sliding_window(k, 0, bit_matrix.get_allocator());
   // don't need to zero the window's memory
   // dynamically growing caused snowplow effect
@@ -289,7 +289,7 @@ void cpc_union_alloc<A>::or_table_into_matrix(const u32_table<A>& table) {
 }
 template<typename A>
-void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t offset, uint8_t src_lg_k) {
+void cpc_union_alloc<A>::or_window_into_matrix(const vector_bytes& sliding_window, uint8_t offset, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
   const uint32_t src_k = 1 << src_lg_k;
@@ -299,7 +299,7 @@ void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>& sliding_windo
 }
 template<typename A>
-void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t src_lg_k) {
+void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64& src_matrix, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
   const uint32_t src_k = 1 << src_lg_k;
@@ -315,10 +315,10 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
   if (bit_matrix.size() > 0) { // downsample the unioner's bit matrix
     if (accumulator != nullptr) throw std::logic_error("accumulator is not null");
-    vector_u64<A> old_matrix = std::move(bit_matrix);
+    vector_u64 old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
     const uint32_t new_k = 1 << new_lg_k;
-    bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
+    bit_matrix = vector_u64(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);
     return;

data/vendor/datasketches-cpp/cpc/include/u32_table.hpp CHANGED Viewed

@@ -38,6 +38,7 @@ static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
 template<typename A>
 class u32_table {
 public:
+  using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
   u32_table(const A& allocator);
   u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
@@ -54,7 +55,7 @@ public:
   static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator);
-  vector_u32<A> unwrapping_get_items() const;
+  vector_u32 unwrapping_get_items() const;
   static void merge(
     const uint32_t* arr_a, size_t start_a, size_t length_a, // input
@@ -70,7 +71,7 @@ private:
   uint8_t lg_size; // log2 of number of slots
   uint8_t num_valid_bits;
   uint32_t num_items;
-  vector_u32<A> slots;
+  vector_u32 slots;
   inline uint32_t lookup(uint32_t item) const;
   inline void must_insert(uint32_t item);