datasketches 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/datasketches/version.rb +1 -1
  4. data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
  5. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  6. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  7. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
  9. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  10. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  11. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  12. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  13. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  14. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
  16. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
  17. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  18. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  19. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  20. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  21. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  22. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  23. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
  25. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  26. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  28. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  29. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  30. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  31. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  32. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  33. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  34. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  35. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  36. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  37. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  38. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  39. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  40. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  41. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  42. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  43. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  44. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  45. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  47. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  48. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  49. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  50. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  51. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  52. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  53. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  54. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  55. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  56. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  57. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  58. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  59. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  60. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  61. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  62. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  63. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  64. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  65. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  66. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  67. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
  69. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  70. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  71. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
  73. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  74. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  75. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
  76. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  78. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  79. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
  84. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  85. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
  86. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
  87. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  88. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  89. data/vendor/datasketches-cpp/setup.py +1 -1
  90. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  91. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  92. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  93. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  94. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  95. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
  97. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
  98. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
  99. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  100. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
  101. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
  103. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  105. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  106. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
  107. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  108. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  109. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  112. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  113. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  114. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  116. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
  117. metadata +8 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e5d60e72541d2228f3e12328984de77635bfd0ce5244748c217cf9327bfe263
4
- data.tar.gz: e66d291ba480089371a59a770035cd4dab5b6ff21567f964e4f26f31f814e3e9
3
+ metadata.gz: 5c578044053c564421893cc4433f7fe557f23ba9d8a1995fc2a2c5f07742721a
4
+ data.tar.gz: f4122bd75e19fede015b01a5e5ad8e6130f75babe9c9160cc56f378480a16cee
5
5
  SHA512:
6
- metadata.gz: 12d752c80224802170762a0c25d354f09b83907ead47c95d5c47f072fb46520938b6bd5c64ba9779651af21cf903c4cd8de6d54d77527765f37ebea37920e611
7
- data.tar.gz: 30c684dc3ba0a11e973612457ae12d813369e35de6255fefd8c89fbb5ea2bc77e5bf5cc127d318e055289d8ded88d16f2ec308a64de535aa152a3cf3cad6daf5
6
+ metadata.gz: 2d7c4d7306f28356557a816a78033b909561ccd8f843281a2b756e88cbdcb9936da7995ff80871a19e229675ead812aca00d6c639d63a6532998c3c1b35aa953
7
+ data.tar.gz: fdf0fe1d14e04bfddef9df1ae7958f6571a7f689865aa02e81713d4b250afeeeb8c90a168ce855728d9c831aff6d3ea71df91c71b7269a760c19488c42c92658
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.2 (2021-07-17)
2
+
3
+ - Updated DataSketches to 3.1.0
4
+
1
5
  ## 0.2.1 (2021-05-23)
2
6
 
3
7
  - Improved performance
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  end
@@ -70,6 +70,13 @@ if(COVERAGE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
70
70
  add_link_options(--coverage)
71
71
  endif()
72
72
 
73
+ option(SANITIZE "Run sanitization checks (g++/clang only)" OFF)
74
+ if(SANITIZE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
75
+ add_compile_options(-fsanitize=${SANITIZE})
76
+ add_link_options(-fsanitize=${SANITIZE})
77
+ endif()
78
+
79
+
73
80
  # set default build type to Release
74
81
  # Derived from: https://blog.kitware.com/cmake-and-the-default-build-type/
75
82
  set(default_build_type "Release")
@@ -16,6 +16,8 @@
16
16
  #ifndef _MURMURHASH3_H_
17
17
  #define _MURMURHASH3_H_
18
18
 
19
+ #include <cstring>
20
+
19
21
  //-----------------------------------------------------------------------------
20
22
  // Platform-specific functions and macros
21
23
 
@@ -76,9 +78,11 @@ typedef struct {
76
78
  // Block read - if your platform needs to do endian-swapping or can only
77
79
  // handle aligned reads, do the conversion here
78
80
 
79
- FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
81
+ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
80
82
  {
81
- return p[i];
83
+ uint64_t res;
84
+ memcpy(&res, p + i, sizeof(res));
85
+ return res;
82
86
  }
83
87
 
84
88
  //-----------------------------------------------------------------------------
@@ -95,7 +99,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
95
99
  return k;
96
100
  }
97
101
 
98
- FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
102
+ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
99
103
  static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
100
104
  static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
101
105
 
@@ -106,13 +110,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
106
110
 
107
111
  // Number of full 128-bit blocks of 16 bytes.
108
112
  // Possible exclusion of a remainder of up to 15 bytes.
109
- const int nblocks = lenBytes >> 4; // bytes / 16
113
+ const size_t nblocks = lenBytes >> 4; // bytes / 16
110
114
 
111
115
  // Process the 128-bit blocks (the body) into the hash
112
116
  const uint64_t* blocks = (const uint64_t*)(data);
113
- for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
114
- uint64_t k1 = getblock64(blocks,i*2+0);
115
- uint64_t k2 = getblock64(blocks,i*2+1);
117
+ for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
118
+ uint64_t k1 = getblock64(blocks, i * 2 + 0);
119
+ uint64_t k2 = getblock64(blocks, i * 2 + 1);
116
120
 
117
121
  k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
118
122
  out.h1 = ROTL64(out.h1,27);
@@ -381,7 +381,7 @@ private:
381
381
  // The following computes an approximation to the lower bound of a Frequentist
382
382
  // confidence interval based on the tails of the Binomial distribution.
383
383
  static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
384
- if (theta == 1) return num_samples;
384
+ if (theta == 1) return static_cast<double>(num_samples);
385
385
  if (num_samples == 0) return 0;
386
386
  if (num_samples == 1) {
387
387
  const double delta = delta_of_num_std_devs[num_std_devs];
@@ -395,24 +395,24 @@ private:
395
395
  }
396
396
  // at this point we know 2 <= num_samples <= 120
397
397
  if (theta > (1 - 1e-5)) { // empirically-determined threshold
398
- return num_samples;
398
+ return static_cast<double>(num_samples);
399
399
  }
400
400
  if (theta < (num_samples / 360.0)) { // empirically-determined threshold
401
401
  // here we use the Gaussian approximation, but with a modified num_std_devs
402
- const unsigned index = 3 * num_samples + (num_std_devs - 1);
402
+ const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
403
403
  const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
404
404
  return raw_lb - 0.5; // fake round down
405
405
  }
406
406
  // This is the most difficult range to approximate; we will compute an "exact" LB.
407
407
  // We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
408
408
  const double delta = delta_of_num_std_devs[num_std_devs];
409
- return special_n_star(num_samples, theta, delta); // no need to round
409
+ return static_cast<double>(special_n_star(num_samples, theta, delta)); // no need to round
410
410
  }
411
411
 
412
412
  // The following computes an approximation to the upper bound of a Frequentist
413
413
  // confidence interval based on the tails of the Binomial distribution.
414
414
  static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
415
- if (theta == 1) return num_samples;
415
+ if (theta == 1) return static_cast<double>(num_samples);
416
416
  if (num_samples == 0) {
417
417
  const double delta = delta_of_num_std_devs[num_std_devs];
418
418
  const double raw_ub = std::log(delta) / std::log(1 - theta);
@@ -425,18 +425,18 @@ private:
425
425
  }
426
426
  // at this point we know 2 <= num_samples <= 120
427
427
  if (theta > (1 - 1e-5)) { // empirically-determined threshold
428
- return num_samples + 1;
428
+ return static_cast<double>(num_samples + 1);
429
429
  }
430
430
  if (theta < (num_samples / 360.0)) { // empirically-determined threshold
431
431
  // here we use the Gaussian approximation, but with a modified num_std_devs
432
- const unsigned index = 3 * num_samples + (num_std_devs - 1);
432
+ const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
433
433
  const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
434
434
  return raw_ub + 0.5; // fake round up
435
435
  }
436
436
  // This is the most difficult range to approximate; we will compute an "exact" UB.
437
437
  // We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
438
438
  const double delta = delta_of_num_std_devs[num_std_devs];
439
- return special_n_prime_f(num_samples, theta, delta); // no need to round
439
+ return static_cast<double>(special_n_prime_f(num_samples, theta, delta)); // no need to round
440
440
  }
441
441
 
442
442
  static void check_theta(double theta) {
@@ -110,14 +110,14 @@ public:
110
110
  * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
111
111
  * unknown success probability.
112
112
  */
113
- static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
113
+ static inline double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
114
114
  check_inputs(n, k);
115
115
  if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
116
116
  else if (k == 0) { return 0.0; }
117
117
  else if (k == 1) { return (exact_lower_bound_on_p_k_eq_1(n, delta_of_num_stdevs(num_std_devs))); }
118
118
  else if (k == n) { return (exact_lower_bound_on_p_k_eq_n(n, delta_of_num_stdevs(num_std_devs))); }
119
119
  else {
120
- double x = abramowitz_stegun_formula_26p5p22((n - k) + 1, k, (-1.0 * num_std_devs));
120
+ double x = abramowitz_stegun_formula_26p5p22((n - k) + 1.0, static_cast<double>(k), (-1.0 * num_std_devs));
121
121
  return (1.0 - x); // which is p
122
122
  }
123
123
  }
@@ -145,18 +145,18 @@ public:
145
145
  * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
146
146
  * unknown success probability.
147
147
  */
148
- static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
148
+ static inline double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
149
149
  check_inputs(n, k);
150
150
  if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
151
151
  else if (k == n) { return 1.0; }
152
152
  else if (k == (n - 1)) {
153
- return (exactU_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
153
+ return (exact_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
154
154
  }
155
155
  else if (k == 0) {
156
156
  return (exact_upper_bound_on_p_k_eq_zero(n, delta_of_num_stdevs(num_std_devs)));
157
157
  }
158
158
  else {
159
- double x = abramowitz_stegun_formula_26p5p22(n - k, k + 1, num_std_devs);
159
+ double x = abramowitz_stegun_formula_26p5p22(static_cast<double>(n - k), k + 1.0, num_std_devs);
160
160
  return (1.0 - x); // which is p
161
161
  }
162
162
  }
@@ -167,7 +167,7 @@ public:
167
167
  * @param k is the number of successes. Must be non-negative, and cannot exceed n.
168
168
  * @return the estimate of the unknown binomial proportion.
169
169
  */
170
- static inline double estimate_unknown_p(long n, long k) {
170
+ static inline double estimate_unknown_p(uint64_t n, uint64_t k) {
171
171
  check_inputs(n, k);
172
172
  if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
173
173
  else { return ((double) k / (double) n); }
@@ -193,9 +193,7 @@ public:
193
193
  }
194
194
 
195
195
  private:
196
- static inline void check_inputs(long n, long k) {
197
- if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
198
- if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
196
+ static inline void check_inputs(uint64_t n, uint64_t k) {
199
197
  if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
200
198
  }
201
199
 
@@ -251,8 +249,7 @@ private:
251
249
  // and it is worth keeping it that way so that it will always be easy to verify
252
250
  // that the formula was typed in correctly.
253
251
 
254
- static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
255
- double yp) {
252
+ static inline double abramowitz_stegun_formula_26p5p22(double a, double b, double yp) {
256
253
  const double b2m1 = (2.0 * b) - 1.0;
257
254
  const double a2m1 = (2.0 * a) - 1.0;
258
255
  const double lambda = ((yp * yp) - 3.0) / 6.0;
@@ -268,19 +265,19 @@ private:
268
265
 
269
266
  // Formulas for some special cases.
270
267
 
271
- static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
268
+ static inline double exact_upper_bound_on_p_k_eq_zero(uint64_t n, double delta) {
272
269
  return (1.0 - pow(delta, (1.0 / n)));
273
270
  }
274
271
 
275
- static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
272
+ static inline double exact_lower_bound_on_p_k_eq_n(uint64_t n, double delta) {
276
273
  return (pow(delta, (1.0 / n)));
277
274
  }
278
275
 
279
- static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
276
+ static inline double exact_lower_bound_on_p_k_eq_1(uint64_t n, double delta) {
280
277
  return (1.0 - pow((1.0 - delta), (1.0 / n)));
281
278
  }
282
279
 
283
- static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
280
+ static inline double exact_upper_bound_on_p_k_eq_minusone(uint64_t n, double delta) {
284
281
  return (pow((1.0 - delta), (1.0 / n)));
285
282
  }
286
283
 
@@ -23,6 +23,7 @@
23
23
  #include <cstdint>
24
24
  #include <string>
25
25
  #include <memory>
26
+ #include <iostream>
26
27
 
27
28
  namespace datasketches {
28
29
 
@@ -46,6 +47,29 @@ constexpr uint8_t lg_size_from_count(uint32_t n, double load_factor) {
46
47
  return log2(n) + ((n > static_cast<uint32_t>((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1);
47
48
  }
48
49
 
50
+ // stream helpers to hide casts
51
+ template<typename T>
52
+ static inline T read(std::istream& is) {
53
+ T value;
54
+ is.read(reinterpret_cast<char*>(&value), sizeof(T));
55
+ return value;
56
+ }
57
+
58
+ template<typename T>
59
+ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
60
+ is.read(reinterpret_cast<char*>(ptr), size_bytes);
61
+ }
62
+
63
+ template<typename T>
64
+ static inline void write(std::ostream& os, T& value) {
65
+ os.write(reinterpret_cast<const char*>(&value), sizeof(T));
66
+ }
67
+
68
+ template<typename T>
69
+ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
70
+ os.write(reinterpret_cast<const char*>(ptr), size_bytes);
71
+ }
72
+
49
73
  } // namespace
50
74
 
51
75
  #endif // _COMMON_DEFS_HPP_
@@ -38,29 +38,41 @@ fwd_type<T1, T2> conditional_forward(T2&& value) {
38
38
  // Forward container as iterators
39
39
 
40
40
  template<typename Container>
41
- auto forward_begin(Container&& c) ->
42
- typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.begin())>::type
41
+ auto forward_begin(Container&& c) -> typename std::enable_if<
42
+ std::is_lvalue_reference<Container>::value ||
43
+ std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
44
+ decltype(c.begin())
45
+ >::type
43
46
  {
44
47
  return c.begin();
45
48
  }
46
49
 
47
50
  template<typename Container>
48
- auto forward_begin(Container&& c) ->
49
- typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.begin()))>::type
51
+ auto forward_begin(Container&& c) -> typename std::enable_if<
52
+ !std::is_lvalue_reference<Container>::value &&
53
+ !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
54
+ decltype(std::make_move_iterator(c.begin()))
55
+ >::type
50
56
  {
51
57
  return std::make_move_iterator(c.begin());
52
58
  }
53
59
 
54
60
  template<typename Container>
55
- auto forward_end(Container&& c) ->
56
- typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.end())>::type
61
+ auto forward_end(Container&& c) -> typename std::enable_if<
62
+ std::is_lvalue_reference<Container>::value ||
63
+ std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
64
+ decltype(c.end())
65
+ >::type
57
66
  {
58
67
  return c.end();
59
68
  }
60
69
 
61
70
  template<typename Container>
62
- auto forward_end(Container&& c) ->
63
- typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.end()))>::type
71
+ auto forward_end(Container&& c) -> typename std::enable_if<
72
+ !std::is_lvalue_reference<Container>::value &&
73
+ !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
74
+ decltype(std::make_move_iterator(c.end()))
75
+ >::type
64
76
  {
65
77
  return std::make_move_iterator(c.end());
66
78
  }
@@ -94,7 +94,7 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
94
94
  static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
95
95
  for (int i = 0; i < 4; i++) {
96
96
  const int byte = input & 0xff;
97
- if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
97
+ if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
98
98
  input >>= 8;
99
99
  }
100
100
  return 32;
@@ -103,7 +103,7 @@ static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
103
103
  static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
104
104
  for (int i = 0; i < 8; i++) {
105
105
  const int byte = input & 0xff;
106
- if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
106
+ if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
107
107
  input >>= 8;
108
108
  }
109
109
  return 64;
@@ -51,7 +51,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
51
51
  bool failure = false;
52
52
  try {
53
53
  os.write(reinterpret_cast<const char*>(items), sizeof(T) * num);
54
- } catch (std::ostream::failure& e) {
54
+ } catch (std::ostream::failure&) {
55
55
  failure = true;
56
56
  }
57
57
  if (failure || !os.good()) {
@@ -62,7 +62,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
62
62
  bool failure = false;
63
63
  try {
64
64
  is.read((char*)items, sizeof(T) * num);
65
- } catch (std::istream::failure& e) {
65
+ } catch (std::istream::failure&) {
66
66
  failure = true;
67
67
  }
68
68
  if (failure || !is.good()) {
@@ -99,11 +99,11 @@ struct serde<std::string> {
99
99
  bool failure = false;
100
100
  try {
101
101
  for (; i < num && os.good(); i++) {
102
- uint32_t length = items[i].size();
102
+ uint32_t length = static_cast<uint32_t>(items[i].size());
103
103
  os.write((char*)&length, sizeof(length));
104
104
  os.write(items[i].c_str(), length);
105
105
  }
106
- } catch (std::ostream::failure& e) {
106
+ } catch (std::ostream::failure&) {
107
107
  failure = true;
108
108
  }
109
109
  if (failure || !os.good()) {
@@ -121,12 +121,12 @@ struct serde<std::string> {
121
121
  std::string str;
122
122
  str.reserve(length);
123
123
  for (uint32_t j = 0; j < length; j++) {
124
- str.push_back(is.get());
124
+ str.push_back(static_cast<char>(is.get()));
125
125
  }
126
126
  if (!is.good()) { break; }
127
127
  new (&items[i]) std::string(std::move(str));
128
128
  }
129
- } catch (std::istream::failure& e) {
129
+ } catch (std::istream::failure&) {
130
130
  failure = true;
131
131
  }
132
132
  if (failure || !is.good()) {
@@ -143,7 +143,7 @@ struct serde<std::string> {
143
143
  size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
144
144
  size_t bytes_written = 0;
145
145
  for (unsigned i = 0; i < num; ++i) {
146
- const uint32_t length = items[i].size();
146
+ const uint32_t length = static_cast<uint32_t>(items[i].size());
147
147
  const size_t new_bytes = length + sizeof(length);
148
148
  check_memory_size(bytes_written + new_bytes, capacity);
149
149
  memcpy(ptr, &length, sizeof(length));
@@ -48,44 +48,44 @@ template<typename A>
48
48
  class cpc_compressor {
49
49
  public:
50
50
  void compress(const cpc_sketch_alloc<A>& source, compressed_state<A>& target) const;
51
- void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint64_t num_coupons) const;
51
+ void uncompress(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
52
52
 
53
53
  // methods below are public for testing
54
54
 
55
55
  // This returns the number of compressed words that were actually used. It is the caller's
56
56
  // responsibility to ensure that the compressed_words array is long enough to prevent over-run.
57
- size_t low_level_compress_bytes(
57
+ uint32_t low_level_compress_bytes(
58
58
  const uint8_t* byte_array, // input
59
- size_t num_bytes_to_encode,
59
+ uint32_t num_bytes_to_encode,
60
60
  const uint16_t* encoding_table,
61
61
  uint32_t* compressed_words // output
62
62
  ) const;
63
63
 
64
64
  void low_level_uncompress_bytes(
65
65
  uint8_t* byte_array, // output
66
- size_t num_bytes_to_decode,
66
+ uint32_t num_bytes_to_decode,
67
67
  const uint16_t* decoding_table,
68
68
  const uint32_t* compressed_words,
69
- size_t num_compressed_words // input
69
+ uint32_t num_compressed_words // input
70
70
  ) const;
71
71
 
72
72
  // Here "pairs" refers to row-column pairs that specify
73
73
  // the positions of surprising values in the bit matrix.
74
74
 
75
75
  // returns the number of compressedWords actually used
76
- size_t low_level_compress_pairs(
76
+ uint32_t low_level_compress_pairs(
77
77
  const uint32_t* pair_array, // input
78
- size_t num_pairs_to_encode,
79
- size_t num_base_bits,
78
+ uint32_t num_pairs_to_encode,
79
+ uint8_t num_base_bits,
80
80
  uint32_t* compressed_words // output
81
81
  ) const;
82
82
 
83
83
  void low_level_uncompress_pairs(
84
84
  uint32_t* pair_array, // output
85
- size_t num_pairs_to_decode,
86
- size_t num_base_bits,
85
+ uint32_t num_pairs_to_decode,
86
+ uint8_t num_base_bits,
87
87
  const uint32_t* compressed_words, // input
88
- size_t num_compressed_words // input
88
+ uint32_t num_compressed_words // input
89
89
  ) const;
90
90
 
91
91
  private:
@@ -122,22 +122,22 @@ private:
122
122
  void uncompress_pinned_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
123
123
  void uncompress_sliding_flavor(const compressed_state<A>& source, uncompressed_state<A>& target, uint8_t lg_k, uint32_t num_coupons) const;
124
124
 
125
- uint8_t* make_inverse_permutation(const uint8_t* permu, int length);
126
- uint16_t* make_decoding_table(const uint16_t* encoding_table, int num_byte_values);
125
+ uint8_t* make_inverse_permutation(const uint8_t* permu, unsigned length);
126
+ uint16_t* make_decoding_table(const uint16_t* encoding_table, unsigned num_byte_values);
127
127
  void validate_decoding_table(const uint16_t* decoding_table, const uint16_t* encoding_table) const;
128
128
 
129
129
  void compress_surprising_values(const vector_u32<A>& pairs, uint8_t lg_k, compressed_state<A>& result) const;
130
130
  void compress_sliding_window(const uint8_t* window, uint8_t lg_k, uint32_t num_coupons, compressed_state<A>& target) const;
131
131
 
132
- vector_u32<A> uncompress_surprising_values(const uint32_t* data, size_t data_words, size_t num_pairs, uint8_t lg_k, const A& allocator) const;
133
- void uncompress_sliding_window(const uint32_t* data, size_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
132
+ vector_u32<A> uncompress_surprising_values(const uint32_t* data, uint32_t data_words, uint32_t num_pairs, uint8_t lg_k, const A& allocator) const;
133
+ void uncompress_sliding_window(const uint32_t* data, uint32_t data_words, vector_u8<A>& window, uint8_t lg_k, uint32_t num_coupons) const;
134
134
 
135
- static size_t safe_length_for_compressed_pair_buf(uint64_t k, size_t num_pairs, size_t num_base_bits);
136
- static size_t safe_length_for_compressed_window_buf(uint64_t k);
137
- static uint8_t determine_pseudo_phase(uint8_t lg_k, uint64_t c);
135
+ static size_t safe_length_for_compressed_pair_buf(uint32_t k, uint32_t num_pairs, uint8_t num_base_bits);
136
+ static size_t safe_length_for_compressed_window_buf(uint32_t k);
137
+ static uint8_t determine_pseudo_phase(uint8_t lg_k, uint32_t c);
138
138
 
139
139
  static inline vector_u32<A> tricky_get_pairs_from_window(const uint8_t* window, uint32_t k, uint32_t num_pairs_to_get, uint32_t empty_space, const A& allocator);
140
- static inline uint64_t golomb_choose_number_of_base_bits(uint64_t k, uint64_t count);
140
+ static inline uint8_t golomb_choose_number_of_base_bits(uint32_t k, uint64_t count);
141
141
  };
142
142
 
143
143
  } /* namespace datasketches */