datasketches 0.2.0 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +7 -7
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  16. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  17. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  18. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  19. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  20. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  21. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  22. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  26. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
  31. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  32. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  33. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  34. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  35. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  36. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  37. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  38. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  39. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  40. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
  41. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  42. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  43. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  44. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  45. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  46. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  47. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  48. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  49. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  50. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  51. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  52. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  53. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  54. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  55. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  56. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  57. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  58. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  59. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  60. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  61. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  62. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
  63. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  64. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  65. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  66. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  67. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  68. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  69. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  70. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  71. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  72. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  73. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  74. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  75. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  76. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  77. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  78. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  79. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  80. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  81. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  82. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  83. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  84. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  85. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
  86. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
  87. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  88. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  89. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  90. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
  91. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  92. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  93. data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
  94. data/vendor/datasketches-cpp/python/README.md +50 -50
  95. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  96. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  97. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  98. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
  99. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
  100. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  101. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  102. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  103. data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
  104. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  105. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  106. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  107. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  108. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  109. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  110. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  111. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  112. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
  113. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  114. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  115. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
  116. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
  117. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
  118. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  119. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  120. data/vendor/datasketches-cpp/setup.py +10 -7
  121. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  122. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  124. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
  125. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  126. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  127. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  128. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  129. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  130. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  131. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  132. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
  133. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
  134. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
  135. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  136. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
  137. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  138. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
  139. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
  140. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  141. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  142. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  144. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  145. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  146. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  147. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  148. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  149. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  150. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
  151. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
  152. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  153. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  154. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
  155. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  156. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  157. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  158. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
  159. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  160. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  161. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
  162. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  163. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  164. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  165. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  166. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
  167. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
  168. metadata +18 -7
  169. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  170. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -110,14 +110,14 @@ public:
110
110
  * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
111
111
  * unknown success probability.
112
112
  */
113
- static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
113
+ static inline double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
114
114
  check_inputs(n, k);
115
115
  if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
116
116
  else if (k == 0) { return 0.0; }
117
117
  else if (k == 1) { return (exact_lower_bound_on_p_k_eq_1(n, delta_of_num_stdevs(num_std_devs))); }
118
118
  else if (k == n) { return (exact_lower_bound_on_p_k_eq_n(n, delta_of_num_stdevs(num_std_devs))); }
119
119
  else {
120
- double x = abramowitz_stegun_formula_26p5p22((n - k) + 1, k, (-1.0 * num_std_devs));
120
+ double x = abramowitz_stegun_formula_26p5p22((n - k) + 1.0, static_cast<double>(k), (-1.0 * num_std_devs));
121
121
  return (1.0 - x); // which is p
122
122
  }
123
123
  }
@@ -145,18 +145,18 @@ public:
145
145
  * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
146
146
  * unknown success probability.
147
147
  */
148
- static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
148
+ static inline double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
149
149
  check_inputs(n, k);
150
150
  if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
151
151
  else if (k == n) { return 1.0; }
152
152
  else if (k == (n - 1)) {
153
- return (exactU_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
153
+ return (exact_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
154
154
  }
155
155
  else if (k == 0) {
156
156
  return (exact_upper_bound_on_p_k_eq_zero(n, delta_of_num_stdevs(num_std_devs)));
157
157
  }
158
158
  else {
159
- double x = abramowitz_stegun_formula_26p5p22(n - k, k + 1, num_std_devs);
159
+ double x = abramowitz_stegun_formula_26p5p22(static_cast<double>(n - k), k + 1.0, num_std_devs);
160
160
  return (1.0 - x); // which is p
161
161
  }
162
162
  }
@@ -167,7 +167,7 @@ public:
167
167
  * @param k is the number of successes. Must be non-negative, and cannot exceed n.
168
168
  * @return the estimate of the unknown binomial proportion.
169
169
  */
170
- static inline double estimate_unknown_p(long n, long k) {
170
+ static inline double estimate_unknown_p(uint64_t n, uint64_t k) {
171
171
  check_inputs(n, k);
172
172
  if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
173
173
  else { return ((double) k / (double) n); }
@@ -193,9 +193,7 @@ public:
193
193
  }
194
194
 
195
195
  private:
196
- static inline void check_inputs(long n, long k) {
197
- if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
198
- if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
196
+ static inline void check_inputs(uint64_t n, uint64_t k) {
199
197
  if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
200
198
  }
201
199
 
@@ -251,8 +249,7 @@ private:
251
249
  // and it is worth keeping it that way so that it will always be easy to verify
252
250
  // that the formula was typed in correctly.
253
251
 
254
- static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
255
- double yp) {
252
+ static inline double abramowitz_stegun_formula_26p5p22(double a, double b, double yp) {
256
253
  const double b2m1 = (2.0 * b) - 1.0;
257
254
  const double a2m1 = (2.0 * a) - 1.0;
258
255
  const double lambda = ((yp * yp) - 3.0) / 6.0;
@@ -268,19 +265,19 @@ private:
268
265
 
269
266
  // Formulas for some special cases.
270
267
 
271
- static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
268
+ static inline double exact_upper_bound_on_p_k_eq_zero(uint64_t n, double delta) {
272
269
  return (1.0 - pow(delta, (1.0 / n)));
273
270
  }
274
271
 
275
- static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
272
+ static inline double exact_lower_bound_on_p_k_eq_n(uint64_t n, double delta) {
276
273
  return (pow(delta, (1.0 / n)));
277
274
  }
278
275
 
279
- static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
276
+ static inline double exact_lower_bound_on_p_k_eq_1(uint64_t n, double delta) {
280
277
  return (1.0 - pow((1.0 - delta), (1.0 / n)));
281
278
  }
282
279
 
283
- static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
280
+ static inline double exact_upper_bound_on_p_k_eq_minusone(uint64_t n, double delta) {
284
281
  return (pow((1.0 - delta), (1.0 / n)));
285
282
  }
286
283
 
@@ -23,11 +23,14 @@
23
23
  #include <cstdint>
24
24
  #include <string>
25
25
  #include <memory>
26
+ #include <iostream>
26
27
 
27
28
  namespace datasketches {
28
29
 
29
30
  static const uint64_t DEFAULT_SEED = 9001;
30
31
 
32
+ enum resize_factor { X1 = 0, X2, X4, X8 };
33
+
31
34
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
32
35
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
33
36
 
@@ -46,6 +49,29 @@ constexpr uint8_t lg_size_from_count(uint32_t n, double load_factor) {
46
49
  return log2(n) + ((n > static_cast<uint32_t>((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1);
47
50
  }
48
51
 
52
+ // stream helpers to hide casts
53
+ template<typename T>
54
+ static inline T read(std::istream& is) {
55
+ T value;
56
+ is.read(reinterpret_cast<char*>(&value), sizeof(T));
57
+ return value;
58
+ }
59
+
60
+ template<typename T>
61
+ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
62
+ is.read(reinterpret_cast<char*>(ptr), size_bytes);
63
+ }
64
+
65
+ template<typename T>
66
+ static inline void write(std::ostream& os, T& value) {
67
+ os.write(reinterpret_cast<const char*>(&value), sizeof(T));
68
+ }
69
+
70
+ template<typename T>
71
+ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
72
+ os.write(reinterpret_cast<const char*>(ptr), size_bytes);
73
+ }
74
+
49
75
  } // namespace
50
76
 
51
77
  #endif // _COMMON_DEFS_HPP_
@@ -38,29 +38,41 @@ fwd_type<T1, T2> conditional_forward(T2&& value) {
38
38
  // Forward container as iterators
39
39
 
40
40
  template<typename Container>
41
- auto forward_begin(Container&& c) ->
42
- typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.begin())>::type
41
+ auto forward_begin(Container&& c) -> typename std::enable_if<
42
+ std::is_lvalue_reference<Container>::value ||
43
+ std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
44
+ decltype(c.begin())
45
+ >::type
43
46
  {
44
47
  return c.begin();
45
48
  }
46
49
 
47
50
  template<typename Container>
48
- auto forward_begin(Container&& c) ->
49
- typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.begin()))>::type
51
+ auto forward_begin(Container&& c) -> typename std::enable_if<
52
+ !std::is_lvalue_reference<Container>::value &&
53
+ !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
54
+ decltype(std::make_move_iterator(c.begin()))
55
+ >::type
50
56
  {
51
57
  return std::make_move_iterator(c.begin());
52
58
  }
53
59
 
54
60
  template<typename Container>
55
- auto forward_end(Container&& c) ->
56
- typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.end())>::type
61
+ auto forward_end(Container&& c) -> typename std::enable_if<
62
+ std::is_lvalue_reference<Container>::value ||
63
+ std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
64
+ decltype(c.end())
65
+ >::type
57
66
  {
58
67
  return c.end();
59
68
  }
60
69
 
61
70
  template<typename Container>
62
- auto forward_end(Container&& c) ->
63
- typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.end()))>::type
71
+ auto forward_end(Container&& c) -> typename std::enable_if<
72
+ !std::is_lvalue_reference<Container>::value &&
73
+ !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
74
+ decltype(std::make_move_iterator(c.end()))
75
+ >::type
64
76
  {
65
77
  return std::make_move_iterator(c.end());
66
78
  }
@@ -94,7 +94,7 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
94
94
  static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
95
95
  for (int i = 0; i < 4; i++) {
96
96
  const int byte = input & 0xff;
97
- if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
97
+ if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
98
98
  input >>= 8;
99
99
  }
100
100
  return 32;
@@ -103,7 +103,7 @@ static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
103
103
  static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
104
104
  for (int i = 0; i < 8; i++) {
105
105
  const int byte = input & 0xff;
106
- if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
106
+ if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
107
107
  input >>= 8;
108
108
  }
109
109
  return 64;
@@ -51,7 +51,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
51
51
  bool failure = false;
52
52
  try {
53
53
  os.write(reinterpret_cast<const char*>(items), sizeof(T) * num);
54
- } catch (std::ostream::failure& e) {
54
+ } catch (std::ostream::failure&) {
55
55
  failure = true;
56
56
  }
57
57
  if (failure || !os.good()) {
@@ -62,7 +62,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
62
62
  bool failure = false;
63
63
  try {
64
64
  is.read((char*)items, sizeof(T) * num);
65
- } catch (std::istream::failure& e) {
65
+ } catch (std::istream::failure&) {
66
66
  failure = true;
67
67
  }
68
68
  if (failure || !is.good()) {
@@ -99,11 +99,11 @@ struct serde<std::string> {
99
99
  bool failure = false;
100
100
  try {
101
101
  for (; i < num && os.good(); i++) {
102
- uint32_t length = items[i].size();
102
+ uint32_t length = static_cast<uint32_t>(items[i].size());
103
103
  os.write((char*)&length, sizeof(length));
104
104
  os.write(items[i].c_str(), length);
105
105
  }
106
- } catch (std::ostream::failure& e) {
106
+ } catch (std::ostream::failure&) {
107
107
  failure = true;
108
108
  }
109
109
  if (failure || !os.good()) {
@@ -121,12 +121,12 @@ struct serde<std::string> {
121
121
  std::string str;
122
122
  str.reserve(length);
123
123
  for (uint32_t j = 0; j < length; j++) {
124
- str.push_back(is.get());
124
+ str.push_back(static_cast<char>(is.get()));
125
125
  }
126
126
  if (!is.good()) { break; }
127
127
  new (&items[i]) std::string(std::move(str));
128
128
  }
129
- } catch (std::istream::failure& e) {
129
+ } catch (std::istream::failure&) {
130
130
  failure = true;
131
131
  }
132
132
  if (failure || !is.good()) {
@@ -143,7 +143,7 @@ struct serde<std::string> {
143
143
  size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
144
144
  size_t bytes_written = 0;
145
145
  for (unsigned i = 0; i < num; ++i) {
146
- const uint32_t length = items[i].size();
146
+ const uint32_t length = static_cast<uint32_t>(items[i].size());
147
147
  const size_t new_bytes = length + sizeof(length);
148
148
  check_memory_size(bytes_written + new_bytes, capacity);
149
149
  memcpy(ptr, &length, sizeof(length));
@@ -32,43 +32,23 @@ target_include_directories(cpc
32
32
  target_link_libraries(cpc INTERFACE common)
33
33
  target_compile_features(cpc INTERFACE cxx_std_11)
34
34
 
35
- set(cpc_HEADERS "")
36
- list(APPEND cpc_HEADERS "include/compression_data.hpp")
37
- list(APPEND cpc_HEADERS "include/cpc_common.hpp")
38
- list(APPEND cpc_HEADERS "include/cpc_compressor.hpp")
39
- list(APPEND cpc_HEADERS "include/cpc_compressor_impl.hpp")
40
- list(APPEND cpc_HEADERS "include/cpc_confidence.hpp")
41
- list(APPEND cpc_HEADERS "include/cpc_sketch.hpp")
42
- list(APPEND cpc_HEADERS "include/cpc_sketch_impl.hpp")
43
- list(APPEND cpc_HEADERS "include/cpc_union.hpp")
44
- list(APPEND cpc_HEADERS "include/cpc_union_impl.hpp")
45
- list(APPEND cpc_HEADERS "include/cpc_util.hpp")
46
- list(APPEND cpc_HEADERS "include/icon_estimator.hpp")
47
- list(APPEND cpc_HEADERS "include/kxp_byte_lookup.hpp")
48
- list(APPEND cpc_HEADERS "include/u32_table.hpp")
49
- list(APPEND cpc_HEADERS "include/u32_table_impl.hpp")
50
-
51
35
  install(TARGETS cpc
52
36
  EXPORT ${PROJECT_NAME}
53
37
  )
54
38
 
55
- install(FILES ${cpc_HEADERS}
39
+ install(FILES
40
+ include/compression_data.hpp
41
+ include/cpc_common.hpp
42
+ include/cpc_compressor.hpp
43
+ include/cpc_compressor_impl.hpp
44
+ include/cpc_confidence.hpp
45
+ include/cpc_sketch.hpp
46
+ include/cpc_sketch_impl.hpp
47
+ include/cpc_union.hpp
48
+ include/cpc_union_impl.hpp
49
+ include/cpc_util.hpp
50
+ include/icon_estimator.hpp
51
+ include/kxp_byte_lookup.hpp
52
+ include/u32_table.hpp
53
+ include/u32_table_impl.hpp
56
54
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
57
-
58
- target_sources(cpc
59
- INTERFACE
60
- ${CMAKE_CURRENT_SOURCE_DIR}/include/compression_data.hpp
61
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_common.hpp
62
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor.hpp
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor_impl.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_confidence.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch_impl.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union_impl.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_util.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/icon_estimator.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kxp_byte_lookup.hpp
72
- ${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table.hpp
73
- ${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table_impl.hpp
74
- )
@@ -26,9 +26,16 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static const uint8_t CPC_MIN_LG_K = 4;
30
- static const uint8_t CPC_MAX_LG_K = 26;
31
- static const uint8_t CPC_DEFAULT_LG_K = 11;
29
+ namespace cpc_constants {
30
+ const uint8_t MIN_LG_K = 4;
31
+ const uint8_t MAX_LG_K = 26;
32
+ const uint8_t DEFAULT_LG_K = 11;
33
+ }
34
+
35
+ // TODO: Redundant and deprecated. Will be removed in next major version release.
36
+ static const uint8_t CPC_MIN_LG_K = cpc_constants::MIN_LG_K;
37
+ static const uint8_t CPC_MAX_LG_K = cpc_constants::MAX_LG_K;
38
+ static const uint8_t CPC_DEFAULT_LG_K = cpc_constants::DEFAULT_LG_K;
32
39
 
33
40
  template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
34
41
  template<typename A> using AllocU16 = typename std::allocator_traits<A>::template rebind_alloc<uint16_t>;
@@ -48,44 +48,44 @@ template<typename A>
48
48
  class cpc_compressor {
49
49
  public:
50
50
  void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
51
- void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const;
51
+ void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
52
52
 
53
53
  // methods below are public for testing
54
54
 
55
55
  // This returns the number of compressed words that were actually used. It is the caller's
56
56
  // responsibility to ensure that the compressed_words array is long enough to prevent over-run.
57
- size_t low_level_compress_bytes(
57
+ uint32_t low_level_compress_bytes(
58
58
  const uint8_t* byte_array, // input
59
- size_t num_bytes_to_encode,
59
+ uint32_t num_bytes_to_encode,
60
60
  const uint16_t* encoding_table,
61
61
  uint32_t* compressed_words // output
62
62
  ) const;
63
63
 
64
64
  void low_level_uncompress_bytes(
65
65
  uint8_t* byte_array, // output
66
- size_t num_bytes_to_decode,
66
+ uint32_t num_bytes_to_decode,
67
67
  const uint16_t* decoding_table,
68
68
  const uint32_t* compressed_words,
69
- size_t num_compressed_words // input
69
+ uint32_t num_compressed_words // input
70
70
  ) const;
71
71
 
72
72
  // Here "pairs" refers to row-column pairs that specify
73
73
  // the positions of surprising values in the bit matrix.
74
74
 
75
75
  // returns the number of compressedWords actually used
76
- size_t low_level_compress_pairs(
76
+ uint32_t low_level_compress_pairs(
77
77
  const uint32_t* pair_array, // input
78
- size_t num_pairs_to_encode,
79
- size_t num_base_bits,
78
+ uint32_t num_pairs_to_encode,
79
+ uint8_t num_base_bits,
80
80
  uint32_t* compressed_words // output
81
81
  ) const;
82
82
 
83
83
  void low_level_uncompress_pairs(
84
84
  uint32_t* pair_array, // output
85
- size_t num_pairs_to_decode,
86
- size_t num_base_bits,
85
+ uint32_t num_pairs_to_decode,
86
+ uint8_t num_base_bits,
87
87
  const uint32_t* compressed_words, // input
88
- size_t num_compressed_words // input
88
+ uint32_t num_compressed_words // input
89
89
  ) const;
90
90
 
91
91
  private:
@@ -122,22 +122,22 @@ private:
122
122
  void uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
123
123
  void uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
124
124
 
125
- uint8_t* make_inverse_permutation(const uint8_t* permu, int length);
126
- uint16_t* make_decoding_table(const uint16_t* encoding_table, int num_byte_values);
125
+ uint8_t* make_inverse_permutation(const uint8_t* permu, unsigned length);
126
+ uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values);
127
127
  void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
128
128
 
129
129
  void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
130
130
  void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
131
131
 
132
- vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const;
133
- void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
132
+ vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
133
+ void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
134
134
 
135
- static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
136
- static size_t safe_length_for_compressed_window_buf(uint64_t k);
137
- static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
135
+ static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits);
136
+ static size_t safe_length_for_compressed_window_buf(uint32_t k);
137
+ static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
138
138
 
139
139
  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
140
- static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
140
+ static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t count);
141
141
  };
142
142
 
143
143
  } /* namespace datasketches */