RubyGems - datasketches - Versions diffs - 0.2.1 → 0.2.2 - Mend

datasketches 0.2.1 → 0.2.2

Files changed (117) hide show

data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp CHANGED Viewed

@@ -191,8 +191,8 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_accumulator() const {
 template<typename A>
 cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
-  const uint64_t k = 1 << lg_k;
-  const uint64_t num_coupons = count_bits_set_in_matrix(bit_matrix.data(), k);
+  const uint32_t k = 1 << lg_k;
+  const uint32_t num_coupons = count_bits_set_in_matrix(bit_matrix.data(), k);
   const auto flavor = cpc_sketch_alloc<A>::determine_flavor(lg_k, num_coupons);
   if (flavor != cpc_sketch_alloc<A>::flavor::HYBRID && flavor != cpc_sketch_alloc<A>::flavor::PINNED
@@ -215,7 +215,7 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
   // The snowplow effect was caused by processing the rows in order,
   // but we have fixed it by using a sufficiently large hash table.
-  for (unsigned i = 0; i < k; i++) {
+  for (uint32_t i = 0; i < k; i++) {
     uint64_t pattern = bit_matrix[i];
     sliding_window[i] = (pattern >> offset) & 0xff;
     pattern &= mask_for_clearing_window;
@@ -250,17 +250,17 @@ void cpc_union_alloc<A>::switch_to_bit_matrix() {
 template<typename A>
 void cpc_union_alloc<A>::walk_table_updating_sketch(const u32_table<A>& table) {
   const uint32_t* slots = table.get_slots();
-  const size_t num_slots = 1 << table.get_lg_size();
+  const uint32_t num_slots = 1 << table.get_lg_size();
   const uint64_t dst_mask = (((1 << accumulator->get_lg_k()) - 1) << 6) | 63; // downsamples when dst lgK < src LgK
   // Using a golden ratio stride fixes the snowplow effect.
   const double golden = 0.6180339887498949025;
-  size_t stride = static_cast<size_t>(golden * static_cast<double>(num_slots));
+  uint32_t stride = static_cast<uint32_t>(golden * static_cast<double>(num_slots));
   if (stride < 2) throw std::logic_error("stride < 2");
   if (stride == ((stride >> 1) << 1)) stride += 1; // force the stride to be odd
   if (stride < 3 || stride >= num_slots) throw std::out_of_range("stride out of range");
-  for (size_t i = 0, j = 0; i < num_slots; i++, j += stride) {
+  for (uint32_t i = 0, j = 0; i < num_slots; i++, j += stride) {
     j &= num_slots - 1;
     const uint32_t row_col = slots[j];
     if (row_col != UINT32_MAX) {
@@ -272,13 +272,13 @@ void cpc_union_alloc<A>::walk_table_updating_sketch(const u32_table<A>& table) {
 template<typename A>
 void cpc_union_alloc<A>::or_table_into_matrix(const u32_table<A>& table) {
   const uint32_t* slots = table.get_slots();
-  const size_t num_slots = 1 << table.get_lg_size();
+  const uint32_t num_slots = 1 << table.get_lg_size();
   const uint64_t dest_mask = (1 << lg_k) - 1;  // downsamples when dst lgK < sr LgK
-  for (size_t i = 0; i < num_slots; i++) {
+  for (uint32_t i = 0; i < num_slots; i++) {
     const uint32_t row_col = slots[i];
     if (row_col != UINT32_MAX) {
       const uint8_t col = row_col & 63;
-      const size_t row = row_col >> 6;
+      const uint32_t row = row_col >> 6;
       bit_matrix[row & dest_mask] |= static_cast<uint64_t>(1) << col; // set the bit
     }
   }
@@ -288,8 +288,8 @@ template<typename A>
 void cpc_union_alloc<A>::or_window_into_matrix(const vector_u8<A>& sliding_window, uint8_t offset, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
-  const size_t src_k = 1 << src_lg_k;
-  for (size_t src_row = 0; src_row < src_k; src_row++) {
+  const uint32_t src_k = 1 << src_lg_k;
+  for (uint32_t src_row = 0; src_row < src_k; src_row++) {
     bit_matrix[src_row & dst_mask] |= static_cast<uint64_t>(sliding_window[src_row]) << offset;
   }
 }
@@ -298,8 +298,8 @@ template<typename A>
 void cpc_union_alloc<A>::or_matrix_into_matrix(const vector_u64<A>& src_matrix, uint8_t src_lg_k) {
   if (lg_k > src_lg_k) throw std::logic_error("dst LgK > src LgK");
   const uint64_t dst_mask = (1 << lg_k) - 1; // downsamples when dst lgK < src LgK
-  const size_t src_k = 1 << src_lg_k;
-  for (size_t src_row = 0; src_row < src_k; src_row++) {
+  const uint32_t src_k = 1 << src_lg_k;
+  for (uint32_t src_row = 0; src_row < src_k; src_row++) {
     bit_matrix[src_row & dst_mask] |= src_matrix[src_row];
   }
 }
@@ -313,7 +313,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
     if (accumulator != nullptr) throw std::logic_error("accumulator is not null");
     vector_u64<A> old_matrix = std::move(bit_matrix);
     const uint8_t old_lg_k = lg_k;
-    const size_t new_k = 1 << new_lg_k;
+    const uint32_t new_k = 1 << new_lg_k;
     bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
     lg_k = new_lg_k;
     or_matrix_into_matrix(old_matrix, old_lg_k);

data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp CHANGED Viewed

@@ -31,9 +31,9 @@ static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
   else return quotient + 1;
 }
-static inline uint64_t long_floor_log2_of_long(uint64_t x) {
-  if (x < 1) throw std::invalid_argument("long_floor_log2_of_long: bad argument");
-  uint64_t p = 0;
+static inline uint8_t floor_log2_of_long(uint64_t x) {
+  if (x < 1) throw std::invalid_argument("floor_log2_of_long: bad argument");
+  uint8_t p = 0;
   uint64_t y = 1;
   while (true) {
     if (y == x) return p;
@@ -69,7 +69,7 @@ static inline uint64_t wegner_count_bits_set_in_matrix(const uint64_t* array, si
 // Note: this is an adaptation of the Java code,
 // which is apparently a variation of Figure 5-2 in "Hacker's Delight"
 // by Henry S. Warren.
-static inline uint64_t warren_bit_count(uint64_t i) {
+static inline uint32_t warren_bit_count(uint64_t i) {
   i = i - ((i >> 1) & 0x5555555555555555ULL);
   i = (i & 0x3333333333333333ULL) + ((i >> 2) & 0x3333333333333333ULL);
   i = (i + (i >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
@@ -79,9 +79,9 @@ static inline uint64_t warren_bit_count(uint64_t i) {
   return i & 0x7f;
 }
-static inline uint64_t warren_count_bits_set_in_matrix(const uint64_t* array, size_t length) {
-  uint64_t count = 0;
-  for (size_t i = 0; i < length; i++) {
+static inline uint32_t warren_count_bits_set_in_matrix(const uint64_t* array, uint32_t length) {
+  uint32_t count = 0;
+  for (uint32_t i = 0; i < length; i++) {
     count += warren_bit_count(array[i]);
   }
   return count;
@@ -91,13 +91,13 @@ static inline uint64_t warren_count_bits_set_in_matrix(const uint64_t* array, si
 #define CSA(h,l,a,b,c) {uint64_t u = a ^ b; uint64_t v = c; h = (a & b) | (u & v); l = u ^ v;}
-static inline uint64_t count_bits_set_in_matrix(const uint64_t* a, size_t length) {
+static inline uint32_t count_bits_set_in_matrix(const uint64_t* a, uint32_t length) {
   if ((length & 0x7) != 0) throw std::invalid_argument("the length of the array must be a multiple of 8");
-  uint64_t total = 0;
+  uint32_t total = 0;
   uint64_t ones, twos, twos_a, twos_b, fours, fours_a, fours_b, eights;
   fours = twos = ones = 0;
-  for (size_t i = 0; i <= length - 8; i = i + 8) {
+  for (uint32_t i = 0; i <= length - 8; i += 8) {
     CSA(twos_a, ones, ones, a[i+0], a[i+1]);
     CSA(twos_b, ones, ones, a[i+2], a[i+3]);
     CSA(fours_a, twos, twos, twos_a, twos_b);

data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp CHANGED Viewed

@@ -245,12 +245,12 @@ static inline double icon_exponential_approximation(double k, double c) {
   return (0.7940236163830469 * k * pow(2.0, c / k));
 }
-static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
+static inline double compute_icon_estimate(uint8_t lg_k, uint32_t c) {
   if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
   if (c < 2) return ((c == 0) ? 0.0 : 1.0);
-  const size_t k = 1 << lg_k;
-  const double double_k = k;
-  const double double_c = c;
+  const uint32_t k = 1 << lg_k;
+  const double double_k = static_cast<double>(k);
+  const double double_c = static_cast<double>(c);
   // Differing thresholds ensure that the approximated estimator is monotonically increasing.
   const double threshold_factor = ((lg_k < 14) ? 5.7 : 5.6);
   if (double_c > (threshold_factor * double_k)) return icon_exponential_approximation(double_k, double_c);

data/vendor/datasketches-cpp/cpc/include/u32_table.hpp CHANGED Viewed

@@ -29,11 +29,11 @@
 namespace datasketches {
-static const uint64_t U32_TABLE_UPSIZE_NUMER = 3LL;
-static const uint64_t U32_TABLE_UPSIZE_DENOM = 4LL;
+static const uint32_t U32_TABLE_UPSIZE_NUMER = 3LL;
+static const uint32_t U32_TABLE_UPSIZE_DENOM = 4LL;
-static const uint64_t U32_TABLE_DOWNSIZE_NUMER = 1LL;
-static const uint64_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
+static const uint32_t U32_TABLE_DOWNSIZE_NUMER = 1LL;
+static const uint32_t U32_TABLE_DOWNSIZE_DENOM = 4LL;
 template<typename A>
 class u32_table {
@@ -42,7 +42,7 @@ public:
   u32_table(const A& allocator);
   u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
-  inline size_t get_num_items() const;
+  inline uint32_t get_num_items() const;
   inline const uint32_t* get_slots() const;
   inline uint8_t get_lg_size() const;
   inline void clear();
@@ -52,7 +52,7 @@ public:
   // returns true iff the item was present and was therefore removed from the table
   inline bool maybe_delete(uint32_t item);
-  static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
+  static u32_table make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator);
   vector_u32<A> unwrapping_get_items() const;
@@ -69,10 +69,10 @@ private:
   uint8_t lg_size; // log2 of number of slots
   uint8_t num_valid_bits;
-  size_t num_items;
+  uint32_t num_items;
   vector_u32<A> slots;
-  inline size_t lookup(uint32_t item) const;
+  inline uint32_t lookup(uint32_t item) const;
   inline void must_insert(uint32_t item);
   inline void rebuild(uint8_t new_lg_size);
 };

data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp CHANGED Viewed

@@ -41,14 +41,14 @@ u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& alloca
 lg_size(lg_size),
 num_valid_bits(num_valid_bits),
 num_items(0),
-slots(1 << lg_size, UINT32_MAX, allocator)
+slots(1ULL << lg_size, UINT32_MAX, allocator)
 {
   if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
   if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
 }
 template<typename A>
-size_t u32_table<A>::get_num_items() const {
+uint32_t u32_table<A>::get_num_items() const {
   return num_items;
 }
@@ -70,7 +70,7 @@ void u32_table<A>::clear() {
 template<typename A>
 bool u32_table<A>::maybe_insert(uint32_t item) {
-  const size_t index = lookup(item);
+  const uint32_t index = lookup(item);
   if (slots[index] == item) return false;
   if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
   slots[index] = item;
@@ -83,7 +83,7 @@ bool u32_table<A>::maybe_insert(uint32_t item) {
 template<typename A>
 bool u32_table<A>::maybe_delete(uint32_t item) {
-  const size_t index = lookup(item);
+  const uint32_t index = lookup(item);
   if (slots[index] == UINT32_MAX) return false;
   if (slots[index] != item) throw std::logic_error("item does not exist");
   if (num_items == 0) throw std::logic_error("delete error");
@@ -110,7 +110,7 @@ bool u32_table<A>::maybe_delete(uint32_t item) {
 // this one is specifically tailored to be a part of fm85 decompression scheme
 template<typename A>
-u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
+u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, uint32_t num_pairs, uint8_t lg_k, const A& allocator) {
   uint8_t lg_num_slots = 2;
   while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
   u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
@@ -124,11 +124,11 @@ u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pai
 }
 template<typename A>
-size_t u32_table<A>::lookup(uint32_t item) const {
-  const size_t size = 1 << lg_size;
-  const size_t mask = size - 1;
+uint32_t u32_table<A>::lookup(uint32_t item) const {
+  const uint32_t size = 1 << lg_size;
+  const uint32_t mask = size - 1;
   const uint8_t shift = num_valid_bits - lg_size;
-  size_t probe = item >> shift;
+  uint32_t probe = item >> shift;
   if (probe > mask) throw std::logic_error("probe out of range");
   while (slots[probe] != item && slots[probe] != UINT32_MAX) {
     probe = (probe + 1) & mask;
@@ -139,7 +139,7 @@ size_t u32_table<A>::lookup(uint32_t item) const {
 // counts and resizing must be handled by the caller
 template<typename A>
 void u32_table<A>::must_insert(uint32_t item) {
-  const size_t index = lookup(item);
+  const uint32_t index = lookup(item);
   if (slots[index] == item) throw std::logic_error("item exists");
   if (slots[index] != UINT32_MAX) throw std::logic_error("could not insert");
   slots[index] = item;
@@ -148,13 +148,13 @@ void u32_table<A>::must_insert(uint32_t item) {
 template<typename A>
 void u32_table<A>::rebuild(uint8_t new_lg_size) {
   if (new_lg_size < 2) throw std::logic_error("lg_size must be >= 2");
-  const size_t old_size = 1 << lg_size;
-  const size_t new_size = 1 << new_lg_size;
+  const uint32_t old_size = 1 << lg_size;
+  const uint32_t new_size = 1 << new_lg_size;
   if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
   vector_u32<A> old_slots = std::move(slots);
   slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
   lg_size = new_lg_size;
-  for (size_t i = 0; i < old_size; i++) {
+  for (uint32_t i = 0; i < old_size; i++) {
     if (old_slots[i] != UINT32_MAX) {
       must_insert(old_slots[i]);
     }
@@ -170,7 +170,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
 template<typename A>
 vector_u32<A> u32_table<A>::unwrapping_get_items() const {
   if (num_items == 0) return vector_u32<A>(slots.get_allocator());
-  const size_t table_size = 1 << lg_size;
+  const uint32_t table_size = 1 << lg_size;
   vector_u32<A> result(num_items, 0, slots.get_allocator());
   size_t i = 0;
   size_t l = 0;

data/vendor/datasketches-cpp/cpc/test/compression_test.cpp CHANGED Viewed

@@ -27,38 +27,38 @@ namespace datasketches {
 typedef u32_table<std::allocator<void>> table;
 TEST_CASE("cpc sketch: compress and decompress pairs", "[cpc_sketch]") {
-  const int N = 200;
-  const int MAXWORDS = 1000;
+  const size_t N = 200;
+  const size_t MAXWORDS = 1000;
   HashState twoHashes;
   uint32_t pairArray[N];
   uint32_t pairArray2[N];
   uint64_t value = 35538947; // some arbitrary starting value
   const uint64_t golden64 = 0x9e3779b97f4a7c13ULL; // the golden ratio
-  for (int i = 0; i < N; i++) {
+  for (size_t i = 0; i < N; i++) {
     MurmurHash3_x64_128(&value, sizeof(value), 0, twoHashes);
     uint32_t rand = twoHashes.h1 & 0xffff;
     pairArray[i] = rand;
     value += golden64;
   }
   //table::knuth_shell_sort3(pairArray, 0, N - 1); // unsigned numerical sort
-  std::sort(pairArray, &pairArray[N]);
+  std::sort(pairArray, pairArray + N);
   uint32_t prev = UINT32_MAX;
-  int nxt = 0;
-  for (int i = 0; i < N; i++) { // uniquify
+  uint32_t nxt = 0;
+  for (size_t i = 0; i < N; i++) { // uniquify
     if (pairArray[i] != prev) {
       prev = pairArray[i];
       pairArray[nxt++] = pairArray[i];
     }
   }
-  int numPairs = nxt;
+  uint32_t numPairs = nxt;
   uint32_t compressedWords[MAXWORDS];
-  for (size_t numBaseBits = 0; numBaseBits <= 11; numBaseBits++) {
-    size_t numWordsWritten = get_compressor<std::allocator<void>>().low_level_compress_pairs(pairArray, numPairs, numBaseBits, compressedWords);
+  for (uint8_t numBaseBits = 0; numBaseBits <= 11; numBaseBits++) {
+    uint32_t numWordsWritten = get_compressor<std::allocator<void>>().low_level_compress_pairs(pairArray, numPairs, numBaseBits, compressedWords);
     get_compressor<std::allocator<void>>().low_level_uncompress_pairs(pairArray2, numPairs, numBaseBits, compressedWords, numWordsWritten);
-    for (int i = 0; i < numPairs; i++) {
+    for (size_t i = 0; i < numPairs; i++) {
       REQUIRE(pairArray[i] == pairArray2[i]);
     }
   }

data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp CHANGED Viewed

@@ -283,6 +283,26 @@ TEST_CASE("cpc sketch: serialize deserialize sliding, bytes", "[cpc_sketch]") {
   REQUIRE(deserialized.validate());
 }
+TEST_CASE("cpc sketch: serialize deserialize sliding huge", "[cpc_sketch]") {
+  cpc_sketch sketch(26);
+  const int n = 10000000;
+  for (int i = 0; i < n; i++) sketch.update(i);
+  REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.001));
+  auto bytes = sketch.serialize();
+  cpc_sketch deserialized = cpc_sketch::deserialize(bytes.data(), bytes.size());
+  REQUIRE(deserialized.is_empty() == sketch.is_empty());
+  REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+  REQUIRE(deserialized.validate());
+  REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 7), std::out_of_range);
+  REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), 15), std::out_of_range);
+  REQUIRE_THROWS_AS(cpc_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
+  // updating again with the same values should not change the sketch
+  for (int i = 0; i < n; i++) deserialized.update(i);
+  REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
+  REQUIRE(deserialized.validate());
+}
 TEST_CASE("cpc sketch: copy", "[cpc_sketch]") {
   cpc_sketch s1(11);
   s1.update(1);
@@ -378,4 +398,9 @@ TEST_CASE("cpc sketch: update string equivalence", "[cpc_sketch]") {
   REQUIRE(sketch.get_estimate() == Approx(1).margin(RELATIVE_ERROR_FOR_LG_K_11));
 }
+TEST_CASE("cpc sketch: max serialized size", "[cpc_sketch]") {
+  REQUIRE(cpc_sketch::get_max_serialized_size_bytes(4) == 24 + 40);
+  REQUIRE(cpc_sketch::get_max_serialized_size_bytes(26) == static_cast<size_t>((0.6 * (1 << 26)) + 40));
+}
 } /* namespace datasketches */

data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp CHANGED Viewed

@@ -81,7 +81,7 @@ TEST_CASE("cpc union: large", "[cpc_union]") {
   cpc_union u(11);
   for (int i = 0; i < 1000; i++) {
     cpc_sketch tmp(11);
-    for (int i = 0; i < 10000; i++) {
+    for (int j = 0; j < 10000; j++) {
       s.update(key);
       tmp.update(key);
       key++;

data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp CHANGED Viewed

@@ -65,7 +65,7 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
 void frequent_items_sketch<T, W, H, E, S, A>::merge(const frequent_items_sketch& other) {
   if (other.is_empty()) return;
   const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
-  for (auto &it: other.map) {
+  for (auto it: other.map) {
     update(it.first, it.second);
   }
   offset += other.offset;
@@ -76,7 +76,7 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
 void frequent_items_sketch<T, W, H, E, S, A>::merge(frequent_items_sketch&& other) {
   if (other.is_empty()) return;
   const W merged_total_weight = total_weight + other.get_total_weight(); // for correction at the end
-  for (auto &it: other.map) {
+  for (auto it: other.map) {
     update(std::move(it.first), it.second);
   }
   offset += other.offset;
@@ -147,7 +147,7 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
 typename frequent_items_sketch<T, W, H, E, S, A>::vector_row
 frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error_type err_type, W threshold) const {
   vector_row items(map.get_allocator());
-  for (auto &it: map) {
+  for (auto it: map) {
     const W lb = it.second;
     const W ub = it.second + offset;
     if ((err_type == NO_FALSE_NEGATIVES && ub > threshold) || (err_type == NO_FALSE_POSITIVES && lb > threshold)) {
@@ -162,28 +162,28 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
 template<typename T, typename W, typename H, typename E, typename S, typename A>
 void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const {
   const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
-  os.write((char*)&preamble_longs, sizeof(preamble_longs));
+  write(os, preamble_longs);
   const uint8_t serial_version = SERIAL_VERSION;
-  os.write((char*)&serial_version, sizeof(serial_version));
+  write(os, serial_version);
   const uint8_t family = FAMILY_ID;
-  os.write((char*)&family, sizeof(family));
+  write(os, family);
   const uint8_t lg_max_size = map.get_lg_max_size();
-  os.write((char*)&lg_max_size, sizeof(lg_max_size));
+  write(os, lg_max_size);
   const uint8_t lg_cur_size = map.get_lg_cur_size();
-  os.write((char*)&lg_cur_size, sizeof(lg_cur_size));
+  write(os, lg_cur_size);
   const uint8_t flags_byte(
     (is_empty() ? 1 << flags::IS_EMPTY : 0)
   );
-  os.write((char*)&flags_byte, sizeof(flags_byte));
+  write(os, flags_byte);
   const uint16_t unused16 = 0;
-  os.write((char*)&unused16, sizeof(unused16));
+  write(os, unused16);
   if (!is_empty()) {
     const uint32_t num_items = map.get_num_active();
-    os.write((char*)&num_items, sizeof(num_items));
+    write(os, num_items);
     const uint32_t unused32 = 0;
-    os.write((char*)&unused32, sizeof(unused32));
-    os.write((char*)&total_weight, sizeof(total_weight));
-    os.write((char*)&offset, sizeof(offset));
+    write(os, unused32);
+    write(os, total_weight);
+    write(os, offset);
     // copy active items and their weights to use batch serialization
     using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
@@ -192,14 +192,14 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
     A alloc(map.get_allocator());
     T* items = alloc.allocate(num_items);
     uint32_t i = 0;
-    for (auto &it: map) {
+    for (auto it: map) {
       new (&items[i]) T(it.first);
       weights[i++] = it.second;
     }
-    os.write((char*)weights, sizeof(W) * num_items);
+    write(os, weights, sizeof(W) * num_items);
     aw.deallocate(weights, num_items);
     S().serialize(os, items, num_items);
-    for (unsigned i = 0; i < num_items; i++) items[i].~T();
+    for (i = 0; i < num_items; i++) items[i].~T();
     alloc.deallocate(items, num_items);
   }
 }
@@ -208,7 +208,7 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
 size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes() const {
   if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
   size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
-  for (auto &it: map) size += S().size_of_item(it.first);
+  for (auto it: map) size += S().size_of_item(it.first);
   return size;
 }
@@ -220,28 +220,26 @@ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_byt
   uint8_t* end_ptr = ptr + size;
   const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
-  ptr += copy_to_mem(&preamble_longs, ptr, sizeof(uint8_t));
+  ptr += copy_to_mem(preamble_longs, ptr);
   const uint8_t serial_version = SERIAL_VERSION;
-  ptr += copy_to_mem(&serial_version, ptr, sizeof(uint8_t));
+  ptr += copy_to_mem(serial_version, ptr);
   const uint8_t family = FAMILY_ID;
-  ptr += copy_to_mem(&family, ptr, sizeof(uint8_t));
+  ptr += copy_to_mem(family, ptr);
   const uint8_t lg_max_size = map.get_lg_max_size();
-  ptr += copy_to_mem(&lg_max_size, ptr, sizeof(uint8_t));
+  ptr += copy_to_mem(lg_max_size, ptr);
   const uint8_t lg_cur_size = map.get_lg_cur_size();
-  ptr += copy_to_mem(&lg_cur_size, ptr, sizeof(uint8_t));
+  ptr += copy_to_mem(lg_cur_size, ptr);
   const uint8_t flags_byte(
     (is_empty() ? 1 << flags::IS_EMPTY : 0)
   );
-  ptr += copy_to_mem(&flags_byte, ptr, sizeof(uint8_t));
-  const uint16_t unused16 = 0;
-  ptr += copy_to_mem(&unused16, ptr, sizeof(uint16_t));
+  ptr += copy_to_mem(flags_byte, ptr);
+  ptr += sizeof(uint16_t); // unused
   if (!is_empty()) {
     const uint32_t num_items = map.get_num_active();
-    ptr += copy_to_mem(&num_items, ptr, sizeof(uint32_t));
-    const uint32_t unused32 = 0;
-    ptr += copy_to_mem(&unused32, ptr, sizeof(uint32_t));
-    ptr += copy_to_mem(&total_weight, ptr, sizeof(total_weight));
-    ptr += copy_to_mem(&offset, ptr, sizeof(offset));
+    ptr += copy_to_mem(num_items, ptr);
+    ptr += sizeof(uint32_t); // unused
+    ptr += copy_to_mem(total_weight, ptr);
+    ptr += copy_to_mem(offset, ptr);
     // copy active items and their weights to use batch serialization
     using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
@@ -250,7 +248,7 @@ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_byt
     A alloc(map.get_allocator());
     T* items = alloc.allocate(num_items);
     uint32_t i = 0;
-    for (auto &it: map) {
+    for (auto it: map) {
       new (&items[i]) T(it.first);
       weights[i++] = it.second;
     }
@@ -258,7 +256,7 @@ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_byt
     aw.deallocate(weights, num_items);
     const size_t bytes_remaining = end_ptr - ptr;
     ptr += S().serialize(ptr, bytes_remaining, items, num_items);
-    for (unsigned i = 0; i < num_items; i++) items[i].~T();
+    for (i = 0; i < num_items; i++) items[i].~T();
     alloc.deallocate(items, num_items);
   }
   return bytes;
@@ -268,38 +266,31 @@ template<typename T, typename W, typename H, typename E, typename S, typename A>
 class frequent_items_sketch<T, W, H, E, S, A>::items_deleter {
 public:
   items_deleter(uint32_t num, bool destroy, const A& allocator):
-    allocator(allocator), num(num), destroy(destroy) {}
-  void set_destroy(bool destroy) { this->destroy = destroy; }
+    allocator_(allocator), num_(num), destroy_(destroy) {}
+  void set_destroy(bool destroy) { destroy_ = destroy; }
   void operator() (T* ptr) {
     if (ptr != nullptr) {
-      if (destroy) {
-        for (uint32_t i = 0; i < num; ++i) ptr[i].~T();
+      if (destroy_) {
+        for (uint32_t i = 0; i < num_; ++i) ptr[i].~T();
       }
-      allocator.deallocate(ptr, num);
+      allocator_.deallocate(ptr, num_);
     }
   }
 private:
-  A allocator;
-  uint32_t num;
-  bool destroy;
+  A allocator_;
+  uint32_t num_;
+  bool destroy_;
 };
 template<typename T, typename W, typename H, typename E, typename S, typename A>
 frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
-  uint8_t preamble_longs;
-  is.read((char*)&preamble_longs, sizeof(preamble_longs));
-  uint8_t serial_version;
-  is.read((char*)&serial_version, sizeof(serial_version));
-  uint8_t family_id;
-  is.read((char*)&family_id, sizeof(family_id));
-  uint8_t lg_max_size;
-  is.read((char*)&lg_max_size, sizeof(lg_max_size));
-  uint8_t lg_cur_size;
-  is.read((char*)&lg_cur_size, sizeof(lg_cur_size));
-  uint8_t flags_byte;
-  is.read((char*)&flags_byte, sizeof(flags_byte));
-  uint16_t unused16;
-  is.read((char*)&unused16, sizeof(unused16));
+  const auto preamble_longs = read<uint8_t>(is);
+  const auto serial_version = read<uint8_t>(is);
+  const auto family_id = read<uint8_t>(is);
+  const auto lg_max_size = read<uint8_t>(is);
+  const auto lg_cur_size = read<uint8_t>(is);
+  const auto flags_byte = read<uint8_t>(is);
+  read<uint16_t>(is); // unused
   const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
@@ -310,19 +301,15 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
   frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
   if (!is_empty) {
-    uint32_t num_items;
-    is.read((char*)&num_items, sizeof(num_items));
-    uint32_t unused32;
-    is.read((char*)&unused32, sizeof(unused32));
-    W total_weight;
-    is.read((char*)&total_weight, sizeof(total_weight));
-    W offset;
-    is.read((char*)&offset, sizeof(offset));
+    const auto num_items = read<uint32_t>(is);
+    read<uint32_t>(is); // unused
+    const auto total_weight = read<W>(is);
+    const auto offset = read<W>(is);
     // batch deserialization with intermediate array of items and weights
     using AllocW = typename std::allocator_traits<A>::template rebind_alloc<W>;
     std::vector<W, AllocW> weights(num_items, 0, allocator);
-    is.read((char*)weights.data(), sizeof(W) * num_items);
+    read(is, weights.data(), sizeof(W) * num_items);
     A alloc(allocator);
     std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
     S().deserialize(is, items.get(), num_items);
@@ -344,19 +331,18 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
   const char* ptr = static_cast<const char*>(bytes);
   const char* base = static_cast<const char*>(bytes);
   uint8_t preamble_longs;
-  ptr += copy_from_mem(ptr, &preamble_longs, sizeof(uint8_t));
+  ptr += copy_from_mem(ptr, preamble_longs);
   uint8_t serial_version;
-  ptr += copy_from_mem(ptr, &serial_version, sizeof(uint8_t));
+  ptr += copy_from_mem(ptr, serial_version);
   uint8_t family_id;
-  ptr += copy_from_mem(ptr, &family_id, sizeof(uint8_t));
+  ptr += copy_from_mem(ptr, family_id);
   uint8_t lg_max_size;
-  ptr += copy_from_mem(ptr, &lg_max_size, sizeof(uint8_t));
+  ptr += copy_from_mem(ptr, lg_max_size);
   uint8_t lg_cur_size;
-  ptr += copy_from_mem(ptr, &lg_cur_size, sizeof(uint8_t));
+  ptr += copy_from_mem(ptr, lg_cur_size);
   uint8_t flags_byte;
-  ptr += copy_from_mem(ptr, &flags_byte, sizeof(uint8_t));
-  uint16_t unused16;
-  ptr += copy_from_mem(ptr, &unused16, sizeof(uint16_t));
+  ptr += copy_from_mem(ptr, flags_byte);
+  ptr += sizeof(uint16_t); // unused
   const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
@@ -364,18 +350,17 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
   check_serial_version(serial_version);
   check_family_id(family_id);
   check_size(lg_cur_size, lg_max_size);
-  ensure_minimum_memory(size, 1 << preamble_longs);
+  ensure_minimum_memory(size, 1ULL << preamble_longs);
   frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
   if (!is_empty) {
     uint32_t num_items;
-    ptr += copy_from_mem(ptr, &num_items, sizeof(uint32_t));
-    uint32_t unused32;
-    ptr += copy_from_mem(ptr, &unused32, sizeof(uint32_t));
+    ptr += copy_from_mem(ptr, num_items);
+    ptr += sizeof(uint32_t); // unused
     W total_weight;
-    ptr += copy_from_mem(ptr, &total_weight, sizeof(total_weight));
+    ptr += copy_from_mem(ptr, total_weight);
     W offset;
-    ptr += copy_from_mem(ptr, &offset, sizeof(offset));
+    ptr += copy_from_mem(ptr, offset);
     ensure_minimum_memory(size, ptr - base + (sizeof(W) * num_items));
     // batch deserialization with intermediate array of items and weights
@@ -446,14 +431,14 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
   os << "### End sketch summary" << std::endl;
   if (print_items) {
     vector_row items;
-    for (auto &it: map) {
+    for (auto it: map) {
       items.push_back(row(&it.first, it.second, offset));
     }
     // sort by estimate in descending order
     std::sort(items.begin(), items.end(), [](row a, row b){ return a.get_estimate() > b.get_estimate(); });
     os << "### Items in descending order by estimate" << std::endl;
     os << "   item, estimate, lower bound, upper bound" << std::endl;
-    for (auto &it: items) {
+    for (auto it: items) {
       os << "   " << it.get_item() << ", " << it.get_estimate() << ", "
          << it.get_lower_bound() << ", " << it.get_upper_bound() << std::endl;
     }