RubyGems - datasketches - Versions diffs - 0.2.1 → 0.2.2 - Mend

datasketches 0.2.1 → 0.2.2

Files changed (117) hide show

data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp CHANGED Viewed

@@ -24,19 +24,27 @@
 namespace datasketches {
+// forward declaration
+template<typename T, typename C, typename S, typename A> class kll_sketch;
 template <typename T, typename C, typename A>
 class kll_quantile_calculator {
   public:
-    // assumes that all levels are sorted including level 0
-    kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator);
+    using Entry = std::pair<T, uint64_t>;
+    using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
+    using Container = std::vector<Entry, AllocEntry>;
+    using const_iterator = typename Container::const_iterator;
+    template<typename S>
+    kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
     T get_quantile(double fraction) const;
+    const_iterator begin() const;
+    const_iterator end() const;
   private:
     using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
     using vector_u32 = std::vector<uint32_t, AllocU32>;
-    using Entry = std::pair<T, uint64_t>;
-    using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
-    using Container = std::vector<Entry, AllocEntry>;
     uint64_t n_;
     vector_u32 levels_;
     Container entries_;
@@ -45,7 +53,7 @@ class kll_quantile_calculator {
     T approximately_answer_positional_query(uint64_t pos) const;
     void convert_to_preceding_cummulative();
     uint32_t chunk_containing_pos(uint64_t pos) const;
-    uint32_t search_for_chunk_containing_pos(uint64_t pos, uint32_t l, uint32_t r) const;
+    uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
     static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
     static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
     static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);

data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp CHANGED Viewed

@@ -28,24 +28,38 @@
 namespace datasketches {
-template <typename T, typename C, typename A>
-kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n, const A& allocator):
-n_(n), levels_(num_levels + 1, 0, allocator), entries_(allocator)
+template<typename T, typename C, typename A>
+template<typename S>
+kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
+n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
 {
-  const uint32_t num_items = levels[num_levels] - levels[0];
-  entries_.reserve(num_items);
-  populate_from_sketch(items, levels, num_levels);
-  merge_sorted_blocks(entries_, levels_.data(), levels_.size() - 1, num_items);
-  if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
-  convert_to_preceding_cummulative();
+  const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
+  if (num_items > 0) {
+    entries_.reserve(num_items);
+    populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
+    if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
+    merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
+    if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
+    convert_to_preceding_cummulative();
+  }
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
   return approximately_answer_positional_query(pos_of_phi(fraction, n_));
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
+auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
+  return entries_.begin();
+}
+template<typename T, typename C, typename A>
+auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
+  return entries_.end();
+}
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
   size_t src_level = 0;
   size_t dst_level = 0;
@@ -68,7 +82,7 @@ void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, cons
   if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
   if (pos >= n_) throw std::logic_error("position out of range");
   const uint32_t num_items = levels_[levels_.size() - 1];
@@ -77,7 +91,7 @@ T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64
   return entries_[index].first;
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
   uint64_t subtotal = 0;
   for (auto& entry: entries_) {
@@ -87,13 +101,13 @@ void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
   }
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
-  const uint64_t pos = std::floor(phi * n);
+  const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
   return (pos == n) ? n - 1 : pos;
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
   if (entries_.size() < 1) throw std::logic_error("array too short");
   if (pos < entries_[0].second) throw std::logic_error("position too small");
@@ -101,19 +115,19 @@ uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) co
   return search_for_chunk_containing_pos(pos, 0, entries_.size());
 }
-template <typename T, typename C, typename A>
-uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint32_t l, uint32_t r) const {
+template<typename T, typename C, typename A>
+uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
   if (l + 1 == r) {
-    return l;
+    return static_cast<uint32_t>(l);
   }
-  const uint32_t m(l + (r - l) / 2);
+  const uint64_t m = l + (r - l) / 2;
   if (entries_[m].second <= pos) {
     return search_for_chunk_containing_pos(pos, m, r);
   }
   return search_for_chunk_containing_pos(pos, l, m);
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
   if (num_levels == 1) return;
   Container temporary(entries.get_allocator());
@@ -121,7 +135,7 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, c
   merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
     uint8_t starting_level, uint8_t num_levels) {
   if (num_levels == 1) return;
@@ -129,10 +143,11 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& ori
   const uint8_t num_levels_2 = num_levels - num_levels_1;
   const uint8_t starting_level_1 = starting_level;
   const uint8_t starting_level_2 = starting_level + num_levels_1;
-  const auto chunk_begin = temp.begin() + temp.size();
+  const auto initial_size = temp.size();
   merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
   merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
   const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
+  const auto chunk_begin = temp.begin() + initial_size;
   std::merge(
     std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
     std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
@@ -141,7 +156,7 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& ori
   temp.erase(chunk_begin, temp.end());
 }
-template <typename T, typename C, typename A>
+template<typename T, typename C, typename A>
 void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
     uint8_t starting_level, uint8_t num_levels) {
   if (num_levels == 1) {

data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp CHANGED Viewed

@@ -156,6 +156,9 @@ template<typename A> using vector_d = std::vector<double, AllocD<A>>;
 template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
 class kll_sketch {
   public:
+    using value_type = T;
+    using comparator = C;
     static const uint8_t DEFAULT_M = 8;
     static const uint16_t DEFAULT_K = 200;
     static const uint16_t MIN_K = DEFAULT_M;
@@ -296,7 +299,7 @@ class kll_sketch {
      *
      * @return array of approximations to the given number of evenly-spaced fractional ranks.
      */
-    std::vector<T, A> get_quantiles(size_t num) const;
+    std::vector<T, A> get_quantiles(uint32_t num) const;
     /**
      * Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
@@ -383,6 +386,33 @@ class kll_sketch {
     template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
     size_t get_serialized_size_bytes() const;
+    /**
+     * Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
+     * length. The resulting size is an overestimate to make sure actual sketches don't exceed it.
+     * This method can be used if allocation of storage is necessary beforehand, but it is not
+     * optimal.
+     * This method is for arithmetic types (integral and floating point)
+     * @param k parameter that controls size of the sketch and accuracy of estimates
+     * @param n stream length
+     * @return upper bound on the serialized size
+     */
+    template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
+    static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n);
+    /**
+     * Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
+     * length. The resulting size is an overestimate to make sure actual sketches don't exceed it.
+     * This method can be used if allocation of storage is necessary beforehand, but it is not
+     * optimal.
+     * This method is for all other non-arithmetic types, and it takes a max size of an item as input.
+     * @param k parameter that controls size of the sketch and accuracy of estimates
+     * @param n stream length
+     * @param max_item_size_bytes maximum size of an item in bytes
+     * @return upper bound on the serialized size
+     */
+    template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
+    static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes);
     /**
      * This method serializes the sketch into a given stream in a binary form
      * @param os output stream
@@ -391,7 +421,7 @@ class kll_sketch {
     // This is a convenience alias for users
     // The type returned by the following serialize method
-    typedef vector_u8<A> vector_bytes;
+    using vector_bytes = vector_u8<A>;
     /**
      * This method serializes the sketch as a vector of bytes.
@@ -480,6 +510,8 @@ class kll_sketch {
     T* max_value_;
     bool is_level_zero_sorted_;
+    friend class kll_quantile_calculator<T, C, A>;
     // for deserialization
     class item_deleter;
     class items_deleter;

data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp CHANGED Viewed

@@ -303,7 +303,7 @@ std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(const double* fractions,
 }
 template<typename T, typename C, typename S, typename A>
-std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(size_t num) const {
+std::vector<T, A> kll_sketch<T, C, S, A>::get_quantiles(uint32_t num) const {
   if (is_empty()) return std::vector<T, A>(allocator_);
   if (num == 0) {
     throw std::invalid_argument("num must be > 0");
@@ -380,36 +380,56 @@ size_t kll_sketch<T, C, S, A>::get_serialized_size_bytes() const {
   size_t size = DATA_START + num_levels_ * sizeof(uint32_t);
   size += S().size_of_item(*min_value_);
   size += S().size_of_item(*max_value_);
-  for (auto& it: *this) size += S().size_of_item(it.first);
+  for (auto it: *this) size += S().size_of_item(it.first);
   return size;
 }
+// implementation for fixed-size arithmetic types (integral and floating point)
+template<typename T, typename C, typename S, typename A>
+template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
+size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n) {
+  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
+  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
+  // the last integer in the levels_ array is not serialized because it can be derived
+  return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * sizeof(TT);
+}
+// implementation for all other types
+template<typename T, typename C, typename S, typename A>
+template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
+size_t kll_sketch<T, C, S, A>::get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes) {
+  const uint8_t num_levels = kll_helper::ub_on_num_levels(n);
+  const uint32_t max_num_retained = kll_helper::compute_total_capacity(k, DEFAULT_M, num_levels);
+  // the last integer in the levels_ array is not serialized because it can be derived
+  return DATA_START + num_levels * sizeof(uint32_t) + (max_num_retained + 2) * max_item_size_bytes;
+}
 template<typename T, typename C, typename S, typename A>
 void kll_sketch<T, C, S, A>::serialize(std::ostream& os) const {
   const bool is_single_item = n_ == 1;
   const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
-  os.write(reinterpret_cast<const char*>(&preamble_ints), sizeof(preamble_ints));
+  write(os, preamble_ints);
   const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
-  os.write(reinterpret_cast<const char*>(&serial_version), sizeof(serial_version));
+  write(os, serial_version);
   const uint8_t family(FAMILY);
-  os.write(reinterpret_cast<const char*>(&family), sizeof(family));
+  write(os, family);
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
     | (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
     | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
   );
-  os.write(reinterpret_cast<const char*>(&flags_byte), sizeof(flags_byte));
-  os.write((char*)&k_, sizeof(k_));
-  os.write((char*)&m_, sizeof(m_));
+  write(os, flags_byte);
+  write(os, k_);
+  write(os, m_);
   const uint8_t unused = 0;
-  os.write(reinterpret_cast<const char*>(&unused), sizeof(unused));
+  write(os, unused);
   if (is_empty()) return;
   if (!is_single_item) {
-    os.write((char*)&n_, sizeof(n_));
-    os.write((char*)&min_k_, sizeof(min_k_));
-    os.write((char*)&num_levels_, sizeof(num_levels_));
-    os.write((char*)&unused, sizeof(unused));
-    os.write((char*)levels_.data(), sizeof(levels_[0]) * num_levels_);
+    write(os, n_);
+    write(os, min_k_);
+    write(os, num_levels_);
+    write(os, unused);
+    write(os, levels_.data(), sizeof(levels_[0]) * num_levels_);
     S().serialize(os, min_value_, 1);
     S().serialize(os, max_value_, 1);
   }
@@ -424,27 +444,26 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
   uint8_t* ptr = bytes.data() + header_size_bytes;
   const uint8_t* end_ptr = ptr + size;
   const uint8_t preamble_ints(is_empty() || is_single_item ? PREAMBLE_INTS_SHORT : PREAMBLE_INTS_FULL);
-  ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
+  ptr += copy_to_mem(preamble_ints, ptr);
   const uint8_t serial_version(is_single_item ? SERIAL_VERSION_2 : SERIAL_VERSION_1);
-  ptr += copy_to_mem(&serial_version, ptr, sizeof(serial_version));
+  ptr += copy_to_mem(serial_version, ptr);
   const uint8_t family(FAMILY);
-  ptr += copy_to_mem(&family, ptr, sizeof(family));
+  ptr += copy_to_mem(family, ptr);
   const uint8_t flags_byte(
       (is_empty() ? 1 << flags::IS_EMPTY : 0)
     | (is_level_zero_sorted_ ? 1 << flags::IS_LEVEL_ZERO_SORTED : 0)
     | (is_single_item ? 1 << flags::IS_SINGLE_ITEM : 0)
   );
-  ptr += copy_to_mem(&flags_byte, ptr, sizeof(flags_byte));
-  ptr += copy_to_mem(&k_, ptr, sizeof(k_));
-  ptr += copy_to_mem(&m_, ptr, sizeof(m_));
-  const uint8_t unused = 0;
-  ptr += copy_to_mem(&unused, ptr, sizeof(unused));
+  ptr += copy_to_mem(flags_byte, ptr);
+  ptr += copy_to_mem(k_, ptr);
+  ptr += copy_to_mem(m_, ptr);
+  ptr += sizeof(uint8_t); // unused
   if (!is_empty()) {
     if (!is_single_item) {
-      ptr += copy_to_mem(&n_, ptr, sizeof(n_));
-      ptr += copy_to_mem(&min_k_, ptr, sizeof(min_k_));
-      ptr += copy_to_mem(&num_levels_, ptr, sizeof(num_levels_));
-      ptr += copy_to_mem(&unused, ptr, sizeof(unused));
+      ptr += copy_to_mem(n_, ptr);
+      ptr += copy_to_mem(min_k_, ptr);
+      ptr += copy_to_mem(num_levels_, ptr);
+      ptr += sizeof(uint8_t); // unused
       ptr += copy_to_mem(levels_.data(), ptr, sizeof(levels_[0]) * num_levels_);
       ptr += S().serialize(ptr, end_ptr - ptr, min_value_, 1);
       ptr += S().serialize(ptr, end_ptr - ptr, max_value_, 1);
@@ -459,20 +478,13 @@ vector_u8<A> kll_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, const A& allocator) {
-  uint8_t preamble_ints;
-  is.read((char*)&preamble_ints, sizeof(preamble_ints));
-  uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
-  uint8_t family_id;
-  is.read((char*)&family_id, sizeof(family_id));
-  uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
-  uint16_t k;
-  is.read((char*)&k, sizeof(k));
-  uint8_t m;
-  is.read((char*)&m, sizeof(m));
-  uint8_t unused;
-  is.read((char*)&unused, sizeof(unused));
+  const auto preamble_ints = read<uint8_t>(is);
+  const auto serial_version = read<uint8_t>(is);
+  const auto family_id = read<uint8_t>(is);
+  const auto flags_byte = read<uint8_t>(is);
+  const auto k = read<uint16_t>(is);
+  const auto m = read<uint8_t>(is);
+  read<uint8_t>(is); // skip unused byte
   check_m(m);
   check_preamble_ints(preamble_ints, flags_byte);
@@ -492,10 +504,10 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
     min_k = k;
     num_levels = 1;
   } else {
-    is.read((char*)&n, sizeof(n_));
-    is.read((char*)&min_k, sizeof(min_k_));
-    is.read((char*)&num_levels, sizeof(num_levels));
-    is.read((char*)&unused, sizeof(unused));
+    n = read<uint64_t>(is);
+    min_k = read<uint16_t>(is);
+    num_levels = read<uint8_t>(is);
+    read<uint8_t>(is); // skip unused byte
   }
   vector_u32<A> levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
@@ -503,7 +515,7 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(std::istream& is, con
     levels[0] = capacity - 1;
   } else {
     // the last integer in levels_ is not serialized because it can be derived
-    is.read((char*)levels.data(), sizeof(levels[0]) * num_levels);
+    read(is, levels.data(), sizeof(levels[0]) * num_levels);
   }
   levels[num_levels] = capacity;
   A alloc(allocator);
@@ -546,24 +558,24 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
   ensure_minimum_memory(size, 8);
   const char* ptr = static_cast<const char*>(bytes);
   uint8_t preamble_ints;
-  ptr += copy_from_mem(ptr, &preamble_ints, sizeof(preamble_ints));
+  ptr += copy_from_mem(ptr, preamble_ints);
   uint8_t serial_version;
-  ptr += copy_from_mem(ptr, &serial_version, sizeof(serial_version));
+  ptr += copy_from_mem(ptr, serial_version);
   uint8_t family_id;
-  ptr += copy_from_mem(ptr, &family_id, sizeof(family_id));
+  ptr += copy_from_mem(ptr, family_id);
   uint8_t flags_byte;
-  ptr += copy_from_mem(ptr, &flags_byte, sizeof(flags_byte));
+  ptr += copy_from_mem(ptr, flags_byte);
   uint16_t k;
-  ptr += copy_from_mem(ptr, &k, sizeof(k));
+  ptr += copy_from_mem(ptr, k);
   uint8_t m;
-  ptr += copy_from_mem(ptr, &m, sizeof(m));
-  ptr++; // skip unused byte
+  ptr += copy_from_mem(ptr, m);
+  ptr += sizeof(uint8_t); // skip unused byte
   check_m(m);
   check_preamble_ints(preamble_ints, flags_byte);
   check_serial_version(serial_version);
   check_family_id(family_id);
-  ensure_minimum_memory(size, 1 << preamble_ints);
+  ensure_minimum_memory(size, 1ULL << preamble_ints);
   const bool is_empty(flags_byte & (1 << flags::IS_EMPTY));
   if (is_empty) return kll_sketch<T, C, S, A>(k, allocator);
@@ -578,10 +590,10 @@ kll_sketch<T, C, S, A> kll_sketch<T, C, S, A>::deserialize(const void* bytes, si
     min_k = k;
     num_levels = 1;
   } else {
-    ptr += copy_from_mem(ptr, &n, sizeof(n));
-    ptr += copy_from_mem(ptr, &min_k, sizeof(min_k));
-    ptr += copy_from_mem(ptr, &num_levels, sizeof(num_levels));
-    ptr++; // skip unused byte
+    ptr += copy_from_mem(ptr, n);
+    ptr += copy_from_mem(ptr, min_k);
+    ptr += copy_from_mem(ptr, num_levels);
+    ptr += sizeof(uint8_t); // skip unused byte
   }
   vector_u32<A> levels(num_levels + 1, 0, allocator);
   const uint32_t capacity(kll_helper::compute_total_capacity(k, m, num_levels));
@@ -779,7 +791,7 @@ std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantil
   using AllocCalc = typename std::allocator_traits<A>::template rebind_alloc<kll_quantile_calculator<T, C, A>>;
   AllocCalc alloc(allocator_);
   std::unique_ptr<kll_quantile_calculator<T, C, A>, std::function<void(kll_quantile_calculator<T, C, A>*)>> quantile_calculator(
-    new (alloc.allocate(1)) kll_quantile_calculator<T, C, A>(items_, levels_.data(), num_levels_, n_, allocator_),
+    new (alloc.allocate(1)) kll_quantile_calculator<T, C, A>(*this),
     [&alloc](kll_quantile_calculator<T, C, A>* ptr){ ptr->~kll_quantile_calculator<T, C, A>(); alloc.deallocate(ptr, 1); }
   );
   return quantile_calculator;
@@ -1067,14 +1079,14 @@ typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::begin()
 template <typename T, typename C, typename S, typename A>
 typename kll_sketch<T, C, S, A>::const_iterator kll_sketch<T, C, S, A>::end() const {
-  return kll_sketch<T, C, S, A>::const_iterator(nullptr, nullptr, num_levels_);
+  return kll_sketch<T, C, S, A>::const_iterator(nullptr, levels_.data(), num_levels_);
 }
 // kll_sketch::const_iterator implementation
 template<typename T, typename C, typename S, typename A>
 kll_sketch<T, C, S, A>::const_iterator::const_iterator(const T* items, const uint32_t* levels, const uint8_t num_levels):
-items(items), levels(levels), num_levels(num_levels), index(levels == nullptr ? 0 : levels[0]), level(levels == nullptr ? num_levels : 0), weight(1)
+items(items), levels(levels), num_levels(num_levels), index(items == nullptr ? levels[num_levels] : levels[0]), level(items == nullptr ? num_levels : 0), weight(1)
 {}
 template<typename T, typename C, typename S, typename A>
@@ -1098,8 +1110,6 @@ typename kll_sketch<T, C, S, A>::const_iterator& kll_sketch<T, C, S, A>::const_i
 template<typename T, typename C, typename S, typename A>
 bool kll_sketch<T, C, S, A>::const_iterator::operator==(const const_iterator& other) const {
-  if (level != other.level) return false;
-  if (level == num_levels) return true; // end
   return index == other.index;
 }