RubyGems - datasketches - Versions diffs - 0.3.1 → 0.3.2 - Mend

datasketches 0.3.1 → 0.3.2

Files changed (113) hide show

data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp CHANGED Viewed

@@ -27,7 +27,8 @@
 #include "serde.hpp"
 #include "binomial_bounds.hpp"
 #include "theta_helpers.hpp"
-#include "compact_theta_sketch_parser.hpp"
+#include "count_zeros.hpp"
+#include "bit_packing.hpp"
 namespace datasketches {
@@ -38,7 +39,8 @@ bool base_theta_sketch_alloc<A>::is_estimation_mode() const {
 template<typename A>
 double base_theta_sketch_alloc<A>::get_theta() const {
-  return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
+  return static_cast<double>(get_theta64()) /
+         static_cast<double>(theta_constants::MAX_THETA);
 }
 template<typename A>
@@ -343,12 +345,9 @@ template<typename A>
 void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
   const uint8_t preamble_longs = this->is_estimation_mode() ? 3 : this->is_empty() || entries_.size() == 1 ? 1 : 2;
   write(os, preamble_longs);
-  const uint8_t serial_version = SERIAL_VERSION;
-  write(os, serial_version);
-  const uint8_t type = SKETCH_TYPE;
-  write(os, type);
-  const uint16_t unused16 = 0;
-  write(os, unused16);
+  write(os, UNCOMPRESSED_SERIAL_VERSION);
+  write(os, SKETCH_TYPE);
+  write<uint16_t>(os, 0); // unused
   const uint8_t flags_byte(
     (1 << flags::IS_COMPACT) |
     (1 << flags::IS_READ_ONLY) |
@@ -356,13 +355,10 @@ void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
     (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
   write(os, flags_byte);
-  const uint16_t seed_hash = get_seed_hash();
-  write(os, seed_hash);
+  write(os, get_seed_hash());
   if (preamble_longs > 1) {
-    const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
-    write(os, num_entries);
-    const uint32_t unused32 = 0;
-    write(os, unused32);
+    write<uint32_t>(os, entries_.size());
+    write<uint32_t>(os, 0); // unused
   }
   if (this->is_estimation_mode()) write(os, this->theta_);
   if (entries_.size() > 0) write(os, entries_.data(), entries_.size() * sizeof(uint64_t));
@@ -376,11 +372,9 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
   vector_bytes bytes(size, 0, entries_.get_allocator());
   uint8_t* ptr = bytes.data() + header_size_bytes;
-  ptr += copy_to_mem(preamble_longs, ptr);
-  const uint8_t serial_version = SERIAL_VERSION;
-  ptr += copy_to_mem(serial_version, ptr);
-  const uint8_t type = SKETCH_TYPE;
-  ptr += copy_to_mem(type, ptr);
+  *ptr++ = preamble_longs;
+  *ptr++ = UNCOMPRESSED_SERIAL_VERSION;
+  *ptr++ = SKETCH_TYPE;
   ptr += sizeof(uint16_t); // unused
   const uint8_t flags_byte(
     (1 << flags::IS_COMPACT) |
@@ -388,12 +382,10 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
     (this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
     (this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
   );
-  ptr += copy_to_mem(flags_byte, ptr);
-  const uint16_t seed_hash = get_seed_hash();
-  ptr += copy_to_mem(seed_hash, ptr);
+  *ptr++ = flags_byte;
+  ptr += copy_to_mem(get_seed_hash(), ptr);
   if (preamble_longs > 1) {
-    const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
-    ptr += copy_to_mem(num_entries, ptr);
+    ptr += copy_to_mem<uint32_t>(entries_.size(), ptr);
     ptr += sizeof(uint32_t); // unused
   }
   if (this->is_estimation_mode()) ptr += copy_to_mem(theta_, ptr);
@@ -401,131 +393,342 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
   return bytes;
 }
+template<typename A>
+bool compact_theta_sketch_alloc<A>::is_suitable_for_compression() const {
+  if (!this->is_ordered() || entries_.size() == 0 ||
+        (entries_.size() == 1 && !this->is_estimation_mode())) return false;
+  return true;
+}
+template<typename A>
+void compact_theta_sketch_alloc<A>::serialize_compressed(std::ostream& os) const {
+  if (is_suitable_for_compression()) return serialize_version_4(os);
+  return serialize(os);
+}
+template<typename A>
+auto compact_theta_sketch_alloc<A>::serialize_compressed(unsigned header_size_bytes) const -> vector_bytes {
+  if (is_suitable_for_compression()) return serialize_version_4(header_size_bytes);
+  return serialize(header_size_bytes);
+}
+template<typename A>
+uint8_t compact_theta_sketch_alloc<A>::compute_min_leading_zeros() const {
+  // compression is based on leading zeros in deltas between ordered hash values
+  // assumes ordered sketch
+  uint64_t previous = 0;
+  uint64_t ored = 0;
+  for (const uint64_t entry: entries_) {
+    const uint64_t delta = entry - previous;
+    ored |= delta;
+    previous = entry;
+  }
+  return count_leading_zeros_in_u64(ored);
+}
+template<typename A>
+void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const {
+  const uint8_t preamble_longs = this->is_estimation_mode() ? 2 : 1;
+  const uint8_t entry_bits = 64 - compute_min_leading_zeros();
+  // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
+  const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
+  write(os, preamble_longs);
+  write(os, COMPRESSED_SERIAL_VERSION);
+  write(os, SKETCH_TYPE);
+  write(os, entry_bits);
+  write(os, num_entries_bytes);
+  const uint8_t flags_byte(
+    (1 << flags::IS_COMPACT) |
+    (1 << flags::IS_READ_ONLY) |
+    (1 << flags::IS_ORDERED)
+  );
+  write(os, flags_byte);
+  write(os, get_seed_hash());
+  if (this->is_estimation_mode()) write(os, this->theta_);
+  uint32_t num_entries = entries_.size();
+  for (unsigned i = 0; i < num_entries_bytes; ++i) {
+    write<uint8_t>(os, num_entries & 0xff);
+    num_entries >>= 8;
+  }
+  uint64_t previous = 0;
+  uint64_t deltas[8];
+  vector_bytes buffer(entry_bits, 0, entries_.get_allocator()); // block of 8 entries takes entry_bits bytes
+  // pack blocks of 8 deltas
+  unsigned i;
+  for (i = 0; i + 7 < entries_.size(); i += 8) {
+    for (unsigned j = 0; j < 8; ++j) {
+      deltas[j] = entries_[i + j] - previous;
+      previous = entries_[i + j];
+    }
+    pack_bits_block8(deltas, buffer.data(), entry_bits);
+    write(os, buffer.data(), buffer.size());
+  }
+  // pack extra deltas if fewer than 8 of them left
+  if (i < entries_.size()) {
+    uint8_t offset = 0;
+    uint8_t* ptr = buffer.data();
+    for (; i < entries_.size(); ++i) {
+      const uint64_t delta = entries_[i] - previous;
+      previous = entries_[i];
+      offset = pack_bits(delta, entry_bits, ptr, offset);
+    }
+    write(os, buffer.data(), ptr - buffer.data());
+  }
+}
+template<typename A>
+auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_bytes) const -> vector_bytes {
+  const uint8_t preamble_longs = this->is_estimation_mode() ? 2 : 1;
+  const uint8_t entry_bits = 64 - compute_min_leading_zeros();
+  const size_t compressed_bits = entry_bits * entries_.size();
+  // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
+  const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
+  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + num_entries_bytes
+      + whole_bytes_to_hold_bits(compressed_bits);
+  vector_bytes bytes(size, 0, entries_.get_allocator());
+  uint8_t* ptr = bytes.data() + header_size_bytes;
+  *ptr++ = preamble_longs;
+  *ptr++ = COMPRESSED_SERIAL_VERSION;
+  *ptr++ = SKETCH_TYPE;
+  *ptr++ = entry_bits;
+  *ptr++ = num_entries_bytes;
+  const uint8_t flags_byte(
+    (1 << flags::IS_COMPACT) |
+    (1 << flags::IS_READ_ONLY) |
+    (1 << flags::IS_ORDERED)
+  );
+  *ptr++ = flags_byte;
+  ptr += copy_to_mem(get_seed_hash(), ptr);
+  if (this->is_estimation_mode()) {
+    ptr += copy_to_mem(theta_, ptr);
+  }
+  uint32_t num_entries = entries_.size();
+  for (unsigned i = 0; i < num_entries_bytes; ++i) {
+    *ptr++ = num_entries & 0xff;
+    num_entries >>= 8;
+  }
+  uint64_t previous = 0;
+  uint64_t deltas[8];
+  // pack blocks of 8 deltas
+  unsigned i;
+  for (i = 0; i + 7 < entries_.size(); i += 8) {
+    for (unsigned j = 0; j < 8; ++j) {
+      deltas[j] = entries_[i + j] - previous;
+      previous = entries_[i + j];
+    }
+    pack_bits_block8(deltas, ptr, entry_bits);
+    ptr += entry_bits;
+  }
+  // pack extra deltas if fewer than 8 of them left
+  uint8_t offset = 0;
+  for (; i < entries_.size(); ++i) {
+    const uint64_t delta = entries_[i] - previous;
+    previous = entries_[i];
+    offset = pack_bits(delta, entry_bits, ptr, offset);
+  }
+  return bytes;
+}
 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
   const auto preamble_longs = read<uint8_t>(is);
   const auto serial_version = read<uint8_t>(is);
   const auto type = read<uint8_t>(is);
+  checker<true>::check_sketch_type(type, SKETCH_TYPE);
   switch (serial_version) {
-  case SERIAL_VERSION: {
-      read<uint16_t>(is); // unused
-      const auto flags_byte = read<uint8_t>(is);
-      const auto seed_hash = read<uint16_t>(is);
-      checker<true>::check_sketch_type(type, SKETCH_TYPE);
-      checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
-      const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
-      if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
-      uint64_t theta = theta_constants::MAX_THETA;
-      uint32_t num_entries = 0;
-      if (!is_empty) {
-        if (preamble_longs == 1) {
-          num_entries = 1;
-        } else {
-          num_entries = read<uint32_t>(is);
-          read<uint32_t>(is); // unused
-          if (preamble_longs > 2) {
-            theta = read<uint64_t>(is);
-          }
-        }
-      }
-      std::vector<uint64_t, A> entries(num_entries, 0, allocator);
-      if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
+    case 4:
+      return deserialize_v4(preamble_longs, is, seed, allocator);
+    case 3:
+      return deserialize_v3(preamble_longs, is, seed, allocator);
+    case 1:
+      return deserialize_v1(preamble_longs, is, seed, allocator);
+    case 2:
+      return deserialize_v2(preamble_longs, is, seed, allocator);
+    default:
+      throw std::invalid_argument("unexpected sketch serialization version " + std::to_string(serial_version));
+  }
+}
-      const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+template<typename A>
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v1(
+    uint8_t, std::istream& is, uint64_t seed, const A& allocator)
+{
+  const auto seed_hash = compute_seed_hash(seed);
+  read<uint8_t>(is); // unused
+  read<uint32_t>(is); // unused
+  const auto num_entries = read<uint32_t>(is);
+  read<uint32_t>(is); //unused
+  const auto theta = read<uint64_t>(is);
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
+  if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
+  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  return compact_theta_sketch_alloc(is_empty, true, seed_hash, theta, std::move(entries));
+}
+template<typename A>
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v2(
+    uint8_t preamble_longs, std::istream& is, uint64_t seed, const A& allocator)
+{
+  read<uint8_t>(is); // unused
+  read<uint16_t>(is); // unused
+  const uint16_t seed_hash = read<uint16_t>(is);
+  checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
+  if (preamble_longs == 1) {
+    if (!is.good()) throw std::runtime_error("error reading from std::istream");
+    std::vector<uint64_t, A> entries(0, 0, allocator);
+    return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
+  } else if (preamble_longs == 2) {
+    const uint32_t num_entries = read<uint32_t>(is);
+    read<uint32_t>(is); // unused
+    std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+    if (num_entries == 0) {
+      return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
+    }
+    read(is, entries.data(), entries.size() * sizeof(uint64_t));
+    if (!is.good()) throw std::runtime_error("error reading from std::istream");
+    return compact_theta_sketch_alloc(false, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
+  } else if (preamble_longs == 3) {
+    const uint32_t num_entries = read<uint32_t>(is);
+    read<uint32_t>(is); // unused
+    const auto theta = read<uint64_t>(is);
+    bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
+    std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+    if (is_empty) {
       if (!is.good()) throw std::runtime_error("error reading from std::istream");
-      return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
+      return compact_theta_sketch_alloc(true, true, seed_hash, theta, std::move(entries));
+    } else {
+      read(is, entries.data(), sizeof(uint64_t) * entries.size());
+      if (!is.good()) throw std::runtime_error("error reading from std::istream");
+      return compact_theta_sketch_alloc(false, true, seed_hash, theta, std::move(entries));
+    }
+  } else {
+    throw std::invalid_argument(std::to_string(preamble_longs) + " longs of premable, but expected 1, 2, or 3");
   }
-  case 1: {
-      const auto seed_hash = compute_seed_hash(seed);
-      checker<true>::check_sketch_type(type, SKETCH_TYPE);
-      read<uint8_t>(is); // unused
+}
+template<typename A>
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v3(
+    uint8_t preamble_longs, std::istream& is, uint64_t seed, const A& allocator)
+{
+  read<uint16_t>(is); // unused
+  const auto flags_byte = read<uint8_t>(is);
+  const auto seed_hash = read<uint16_t>(is);
+  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
+  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
+  uint64_t theta = theta_constants::MAX_THETA;
+  uint32_t num_entries = 0;
+  if (!is_empty) {
+    if (preamble_longs == 1) {
+      num_entries = 1;
+    } else {
+      num_entries = read<uint32_t>(is);
       read<uint32_t>(is); // unused
-      const auto num_entries = read<uint32_t>(is);
-      read<uint32_t>(is); //unused
-      const auto theta = read<uint64_t>(is);
-      std::vector<uint64_t, A> entries(num_entries, 0, allocator);
-      bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
-      if (!is_empty)
-          read(is, entries.data(), sizeof(uint64_t) * entries.size());
-      if (!is.good())
-          throw std::runtime_error("error reading from std::istream");
-      return compact_theta_sketch_alloc(is_empty, true, seed_hash, theta, std::move(entries));
+      if (preamble_longs > 2) theta = read<uint64_t>(is);
+    }
   }
-  case 2: {
-      checker<true>::check_sketch_type(type, SKETCH_TYPE);
-      read<uint8_t>(is); // unused
-      read<uint16_t>(is); // unused
-      const uint16_t seed_hash = read<uint16_t>(is);
-      checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
-      if (preamble_longs == 1) {
-          if (!is.good())
-              throw std::runtime_error("error reading from std::istream");
-          std::vector<uint64_t, A> entries(0, 0, allocator);
-          return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
-      } else if (preamble_longs == 2) {
-          const uint32_t num_entries = read<uint32_t>(is);
-          read<uint32_t>(is); // unused
-          std::vector<uint64_t, A> entries(num_entries, 0, allocator);
-          if (num_entries == 0) {
-              return compact_theta_sketch_alloc(true, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
-          }
-          read(is, entries.data(), entries.size() * sizeof(uint64_t));
-          if (!is.good())
-              throw std::runtime_error("error reading from std::istream");
-          return compact_theta_sketch_alloc(false, true, seed_hash, theta_constants::MAX_THETA, std::move(entries));
-      } else if (preamble_longs == 3) {
-          const uint32_t num_entries = read<uint32_t>(is);
-          read<uint32_t>(is); // unused
-          const auto theta = read<uint64_t>(is);
-          bool is_empty = (num_entries == 0) && (theta == theta_constants::MAX_THETA);
-          std::vector<uint64_t, A> entries(num_entries, 0, allocator);
-          if (is_empty) {
-              if (!is.good())
-                  throw std::runtime_error("error reading from std::istream");
-              return compact_theta_sketch_alloc(true, true, seed_hash, theta, std::move(entries));
-          } else {
-              read(is, entries.data(), sizeof(uint64_t) * entries.size());
-              if (!is.good())
-                  throw std::runtime_error("error reading from std::istream");
-              return compact_theta_sketch_alloc(false, true, seed_hash, theta, std::move(entries));
-          }
-      } else {
-          throw std::invalid_argument(std::to_string(preamble_longs) + " longs of premable, but expected 1, 2, or 3");
-      }
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
+  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
+}
+template<typename A>
+compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize_v4(
+    uint8_t preamble_longs, std::istream& is, uint64_t seed, const A& allocator)
+{
+  const auto entry_bits = read<uint8_t>(is);
+  const auto num_entries_bytes = read<uint8_t>(is);
+  const auto flags_byte = read<uint8_t>(is);
+  const auto seed_hash = read<uint16_t>(is);
+  const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
+  if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
+  uint64_t theta = theta_constants::MAX_THETA;
+  if (preamble_longs > 1) theta = read<uint64_t>(is);
+  uint32_t num_entries = 0;
+  for (unsigned i = 0; i < num_entries_bytes; ++i) {
+    num_entries |= read<uint8_t>(is) << (i << 3);
+  }
+  vector_bytes buffer(entry_bits, 0, allocator); // block of 8 entries takes entry_bits bytes
+  std::vector<uint64_t, A> entries(num_entries, 0, allocator);
+  // unpack blocks of 8 deltas
+  unsigned i;
+  for (i = 0; i + 7 < num_entries; i += 8) {
+    read(is, buffer.data(), buffer.size());
+    unpack_bits_block8(&entries[i], buffer.data(), entry_bits);
   }
-  default:
-      // this should always fail since the valid cases are handled above
-      checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
-      // this throw is never reached, because check_serial_version will throw an informative exception.
-      // This is only here to avoid a compiler warning about a path without a return value.
-      throw std::invalid_argument("unexpected sketch serialization version");
+  // unpack extra deltas if fewer than 8 of them left
+  if (i < num_entries) read(is, buffer.data(), whole_bytes_to_hold_bits((num_entries - i) * entry_bits));
+  if (!is.good()) throw std::runtime_error("error reading from std::istream");
+  const uint8_t* ptr = buffer.data();
+  uint8_t offset = 0;
+  for (; i < num_entries; ++i) {
+    offset = unpack_bits(entries[i], entry_bits, ptr, offset);
   }
+  // undo deltas
+  uint64_t previous = 0;
+  for (i = 0; i < num_entries; ++i) {
+    entries[i] += previous;
+    previous = entries[i];
+  }
+  const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
+  return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
 }
 template<typename A>
 compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
   auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, false);
-  return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta, std::vector<uint64_t, A>(data.entries, data.entries + data.num_entries, allocator));
+  if (data.entry_bits == 64) { // versions 1 to 3
+    const uint64_t* entries = reinterpret_cast<const uint64_t*>(data.entries_start_ptr);
+    return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta,
+        std::vector<uint64_t, A>(entries, entries + data.num_entries, allocator));
+  } else { // version 4
+    std::vector<uint64_t, A> entries(data.num_entries, 0, allocator);
+    const uint8_t* ptr = reinterpret_cast<const uint8_t*>(data.entries_start_ptr);
+    // unpack blocks of 8 deltas
+    unsigned i;
+    for (i = 0; i + 7 < data.num_entries; i += 8) {
+      unpack_bits_block8(&entries[i], ptr, data.entry_bits);
+      ptr += data.entry_bits;
+    }
+    // unpack extra deltas if fewer than 8 of them left
+    uint8_t offset = 0;
+    for (; i < data.num_entries; ++i) {
+      offset = unpack_bits(entries[i], data.entry_bits, ptr, offset);
+    }
+    // undo deltas
+    uint64_t previous = 0;
+    for (i = 0; i < data.num_entries; ++i) {
+      entries[i] += previous;
+      previous = entries[i];
+    }
+    return compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.theta, std::move(entries));
+  }
 }
 // wrapped compact sketch
 template<typename A>
-wrapped_compact_theta_sketch_alloc<A>::wrapped_compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint32_t num_entries,
-    uint64_t theta, const uint64_t* entries):
-is_empty_(is_empty),
-is_ordered_(is_ordered),
-seed_hash_(seed_hash),
-num_entries_(num_entries),
-theta_(theta),
-entries_(entries)
+wrapped_compact_theta_sketch_alloc<A>::wrapped_compact_theta_sketch_alloc(const data_type& data):
+data_(data)
 {}
 template<typename A>
 const wrapped_compact_theta_sketch_alloc<A> wrapped_compact_theta_sketch_alloc<A>::wrap(const void* bytes, size_t size, uint64_t seed, bool dump_on_error) {
-  auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, dump_on_error);
-  return wrapped_compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.num_entries, data.theta, data.entries);
+  return wrapped_compact_theta_sketch_alloc(compact_theta_sketch_parser<true>::parse(bytes, size, seed, dump_on_error));
 }
 template<typename A>
@@ -535,37 +738,37 @@ A wrapped_compact_theta_sketch_alloc<A>::get_allocator() const {
 template<typename A>
 bool wrapped_compact_theta_sketch_alloc<A>::is_empty() const {
-  return is_empty_;
+  return data_.is_empty;
 }
 template<typename A>
 bool wrapped_compact_theta_sketch_alloc<A>::is_ordered() const {
-  return is_ordered_;
+  return data_.is_ordered;
 }
 template<typename A>
 uint64_t wrapped_compact_theta_sketch_alloc<A>::get_theta64() const {
-  return theta_;
+  return data_.theta;
 }
 template<typename A>
 uint32_t wrapped_compact_theta_sketch_alloc<A>::get_num_retained() const {
-  return static_cast<uint32_t>(num_entries_);
+  return data_.num_entries;
 }
 template<typename A>
 uint16_t wrapped_compact_theta_sketch_alloc<A>::get_seed_hash() const {
-  return seed_hash_;
+  return data_.seed_hash;
 }
 template<typename A>
 auto wrapped_compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
-  return entries_;
+  return const_iterator(data_.entries_start_ptr, data_.entry_bits, data_.num_entries, 0);
 }
 template<typename A>
 auto wrapped_compact_theta_sketch_alloc<A>::end() const -> const_iterator {
-  return entries_ + num_entries_;
+  return const_iterator(data_.entries_start_ptr, data_.entry_bits, data_.num_entries, data_.num_entries);
 }
 template<typename A>
@@ -574,12 +777,109 @@ void wrapped_compact_theta_sketch_alloc<A>::print_specifics(std::ostringstream&)
 template<typename A>
 void wrapped_compact_theta_sketch_alloc<A>::print_items(std::ostringstream& os) const {
     os << "### Retained entries" << std::endl;
-    for (const auto& hash: *this) {
+    for (const auto hash: *this) {
       os << hash << std::endl;
     }
     os << "### End retained entries" << std::endl;
 }
+// assumes index == 0 or index == num_entries
+template<typename Allocator>
+wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::const_iterator(
+    const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index):
+ptr_(ptr),
+entry_bits_(entry_bits),
+num_entries_(num_entries),
+index_(index),
+previous_(0),
+is_block_mode_(num_entries_ >= 8),
+buf_i_(0),
+offset_(0)
+{
+  if (entry_bits == 64) { // no compression
+    ptr_ = reinterpret_cast<const uint64_t*>(ptr) + index;
+  } else if (index < num_entries) {
+    if (is_block_mode_) {
+      unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
+      ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
+      for (int i = 0; i < 8; ++i) {
+        buffer_[i] += previous_;
+        previous_ = buffer_[i];
+      }
+    } else {
+      offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
+      buffer_[0] += previous_;
+      previous_ = buffer_[0];
+    }
+  }
+}
+template<typename Allocator>
+auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++() -> const_iterator& {
+  if (entry_bits_ == 64) { // no compression
+    ptr_ = reinterpret_cast<const uint64_t*>(ptr_) + 1;
+    return *this;
+  }
+  ++index_;
+  if (index_ < num_entries_) {
+    if (is_block_mode_) {
+      ++buf_i_;
+      if (buf_i_ == 8) {
+        buf_i_ = 0;
+        if (index_ + 8 < num_entries_) {
+          unpack_bits_block8(buffer_, reinterpret_cast<const uint8_t*>(ptr_), entry_bits_);
+          ptr_ = reinterpret_cast<const uint8_t*>(ptr_) + entry_bits_;
+          for (int i = 0; i < 8; ++i) {
+            buffer_[i] += previous_;
+            previous_ = buffer_[i];
+          }
+        } else {
+          is_block_mode_ = false;
+          offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
+          buffer_[0] += previous_;
+          previous_ = buffer_[0];
+        }
+      }
+    } else {
+      offset_ = unpack_bits(buffer_[0], entry_bits_, reinterpret_cast<const uint8_t*&>(ptr_), offset_);
+      buffer_[0] += previous_;
+      previous_ = buffer_[0];
+    }
+  }
+  return *this;
+}
+template<typename Allocator>
+auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator++(int) -> const_iterator {
+  const_iterator tmp(*this);
+  operator++();
+  return tmp;
+}
+template<typename Allocator>
+bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator!=(const const_iterator& other) const {
+  if (entry_bits_ == 64) return ptr_ != other.ptr_;
+  return index_ != other.index_;
+}
+template<typename Allocator>
+bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(const const_iterator& other) const {
+  if (entry_bits_ == 64) return ptr_ == other.ptr_;
+  return index_ == other.index_;
+}
+template<typename Allocator>
+const uint64_t& wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const {
+  if (entry_bits_ == 64) return *reinterpret_cast<const uint64_t*>(ptr_);
+  return buffer_[buf_i_];
+}
+template<typename Allocator>
+const uint64_t* wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const {
+  if (entry_bits_ == 64) return reinterpret_cast<const uint64_t*>(ptr_);
+  return buffer_ + buf_i_;
+}
 } /* namespace datasketches */
 #endif