datasketches 0.2.2 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -51,18 +51,27 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- enum resize_factor { X1 = 0, X2, X4, X8 };
55
-
56
54
  template <typename T, typename S, typename A> class var_opt_union; // forward declaration
57
55
 
58
- template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
56
+ namespace var_opt_constants {
57
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
58
+ const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
59
+ }
60
+
61
+ template<
62
+ typename T,
63
+ typename S = serde<T>, // deprecated, to be removed in the next major version
64
+ typename A = std::allocator<T>
65
+ >
59
66
  class var_opt_sketch {
60
67
 
61
68
  public:
62
- static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
- static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
69
+ static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
70
+ static const uint32_t MAX_K = var_opt_constants::MAX_K;
64
71
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
72
+ explicit var_opt_sketch(uint32_t k,
73
+ resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
74
+ const A& allocator = A());
66
75
  var_opt_sketch(const var_opt_sketch& other);
67
76
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
77
 
@@ -130,18 +139,20 @@ class var_opt_sketch {
130
139
  /**
131
140
  * Computes size needed to serialize the current state of the sketch.
132
141
  * This version is for fixed-size arithmetic types (integral and floating point).
142
+ * @param instance of a SerDe
133
143
  * @return size in bytes needed to serialize this sketch
134
144
  */
135
- template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
136
- inline size_t get_serialized_size_bytes() const;
145
+ template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
146
+ inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
137
147
 
138
148
  /**
139
149
  * Computes size needed to serialize the current state of the sketch.
140
150
  * This version is for all other types and can be expensive since every item needs to be looked at.
151
+ * @param instance of a SerDe
141
152
  * @return size in bytes needed to serialize this sketch
142
153
  */
143
- template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
144
- inline size_t get_serialized_size_bytes() const;
154
+ template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
155
+ inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
145
156
 
146
157
  // This is a convenience alias for users
147
158
  // The type returned by the following serialize method
@@ -153,30 +164,61 @@ class var_opt_sketch {
153
164
  * It is a blank space of a given size.
154
165
  * This header is used in Datasketches PostgreSQL extension.
155
166
  * @param header_size_bytes space to reserve in front of the sketch
167
+ * @param instance of a SerDe
156
168
  */
157
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
169
+ template<typename SerDe = S>
170
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
158
171
 
159
172
  /**
160
173
  * This method serializes the sketch into a given stream in a binary form
161
174
  * @param os output stream
175
+ * @param instance of a SerDe
162
176
  */
163
- void serialize(std::ostream& os) const;
177
+ template<typename SerDe = S>
178
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
164
179
 
165
180
  /**
166
181
  * This method deserializes a sketch from a given stream.
167
182
  * @param is input stream
183
+ * @param instance of an Allocator
168
184
  * @return an instance of a sketch
185
+ *
186
+ * Deprecated, to be removed in the next major version
169
187
  */
170
188
  static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
171
189
 
190
+ /**
191
+ * This method deserializes a sketch from a given stream.
192
+ * @param is input stream
193
+ * @param instance of a SerDe
194
+ * @param instance of an Allocator
195
+ * @return an instance of a sketch
196
+ */
197
+ template<typename SerDe = S>
198
+ static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
199
+
172
200
  /**
173
201
  * This method deserializes a sketch from a given array of bytes.
174
202
  * @param bytes pointer to the array of bytes
175
203
  * @param size the size of the array
204
+ * @param instance of an Allocator
176
205
  * @return an instance of a sketch
206
+ *
207
+ * Deprecated, to be removed in the next major version
177
208
  */
178
209
  static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
179
210
 
211
+ /**
212
+ * This method deserializes a sketch from a given array of bytes.
213
+ * @param bytes pointer to the array of bytes
214
+ * @param size the size of the array
215
+ * @param instance of a SerDe
216
+ * @param instance of an Allocator
217
+ * @return an instance of a sketch
218
+ */
219
+ template<typename SerDe = S>
220
+ static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
221
+
180
222
  /**
181
223
  * Prints a summary of the sketch.
182
224
  * @return the summary as a string
@@ -25,6 +25,7 @@
25
25
  #include <cmath>
26
26
  #include <random>
27
27
  #include <algorithm>
28
+ #include <stdexcept>
28
29
 
29
30
  #include "var_opt_sketch.hpp"
30
31
  #include "serde.hpp"
@@ -128,7 +129,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
128
129
  r_(r_count),
129
130
  n_(n),
130
131
  total_wt_r_(total_wt_r),
131
- rf_(DEFAULT_RESIZE_FACTOR),
132
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
132
133
  curr_items_alloc_(len),
133
134
  filled_data_(n > k),
134
135
  allocator_(allocator),
@@ -311,8 +312,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
311
312
 
312
313
  // implementation for fixed-size arithmetic types (integral and floating point)
313
314
  template<typename T, typename S, typename A>
314
- template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
315
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
315
+ template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
316
+ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
316
317
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
317
318
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
318
319
  num_bytes += h_ * sizeof(double); // weights
@@ -325,8 +326,8 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
325
326
 
326
327
  // implementation for all other types
327
328
  template<typename T, typename S, typename A>
328
- template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
329
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
329
+ template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
330
+ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
330
331
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
331
332
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
332
333
  num_bytes += h_ * sizeof(double); // weights
@@ -335,13 +336,14 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
335
336
  }
336
337
  // must iterate over the items
337
338
  for (auto it: *this)
338
- num_bytes += S().size_of_item(it.first);
339
+ num_bytes += sd.size_of_item(it.first);
339
340
  return num_bytes;
340
341
  }
341
342
 
342
343
  template<typename T, typename S, typename A>
343
- std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes) const {
344
- const size_t size = header_size_bytes + get_serialized_size_bytes();
344
+ template<typename SerDe>
345
+ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
346
+ const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
345
347
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
346
348
  uint8_t* ptr = bytes.data() + header_size_bytes;
347
349
  uint8_t* end_ptr = ptr + size;
@@ -400,8 +402,8 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
400
402
  }
401
403
 
402
404
  // write the sample items, skipping the gap. Either h_ or r_ may be 0
403
- ptr += S().serialize(ptr, end_ptr - ptr, data_, h_);
404
- ptr += S().serialize(ptr, end_ptr - ptr, &data_[h_ + 1], r_);
405
+ ptr += sd.serialize(ptr, end_ptr - ptr, data_, h_);
406
+ ptr += sd.serialize(ptr, end_ptr - ptr, &data_[h_ + 1], r_);
405
407
  }
406
408
 
407
409
  size_t bytes_written = ptr - bytes.data();
@@ -413,7 +415,8 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
413
415
  }
414
416
 
415
417
  template<typename T, typename S, typename A>
416
- void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
418
+ template<typename SerDe>
419
+ void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
417
420
  const bool empty = (h_ == 0) && (r_ == 0);
418
421
 
419
422
  const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
@@ -469,13 +472,19 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
469
472
  }
470
473
 
471
474
  // write the sample items, skipping the gap. Either h_ or r_ may be 0
472
- S().serialize(os, data_, h_);
473
- S().serialize(os, &data_[h_ + 1], r_);
475
+ sd.serialize(os, data_, h_);
476
+ sd.serialize(os, &data_[h_ + 1], r_);
474
477
  }
475
478
  }
476
479
 
477
480
  template<typename T, typename S, typename A>
478
481
  var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
482
+ return deserialize(bytes, size, S(), allocator);
483
+ }
484
+
485
+ template<typename T, typename S, typename A>
486
+ template<typename SerDe>
487
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
479
488
  ensure_minimum_memory(size, 8);
480
489
  const char* ptr = static_cast<const char*>(bytes);
481
490
  const char* base = ptr;
@@ -559,10 +568,10 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
559
568
  items_deleter deleter(array_size, allocator);
560
569
  std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
561
570
 
562
- ptr += S().deserialize(ptr, end_ptr - ptr, items.get(), h);
571
+ ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), h);
563
572
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
564
573
 
565
- ptr += S().deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
574
+ ptr += sd.deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
566
575
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
567
576
 
568
577
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
@@ -571,6 +580,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
571
580
 
572
581
  template<typename T, typename S, typename A>
573
582
  var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
583
+ return deserialize(is, S(), allocator);
584
+ }
585
+
586
+ template<typename T, typename S, typename A>
587
+ template<typename SerDe>
588
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
574
589
  const auto first_byte = read<uint8_t>(is);
575
590
  uint8_t preamble_longs = first_byte & 0x3f;
576
591
  const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
@@ -640,10 +655,10 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
640
655
  items_deleter deleter(array_size, allocator);
641
656
  std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
642
657
 
643
- S().deserialize(is, items.get(), h); // aka &data_[0]
658
+ sd.deserialize(is, items.get(), h); // aka &data_[0]
644
659
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
645
660
 
646
- S().deserialize(is, &(items.get()[h + 1]), r);
661
+ sd.deserialize(is, &(items.get()[h + 1]), r);
647
662
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
648
663
 
649
664
  if (!is.good())
@@ -731,8 +746,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
731
746
 
732
747
  template<typename T, typename S, typename A>
733
748
  string<A> var_opt_sketch<T,S,A>::to_string() const {
734
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
735
- os << "### VarOpt SUMMARY: " << std::endl;
749
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
750
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
751
+ std::ostringstream os;
752
+ os << "### VarOpt SUMMARY:" << std::endl;
736
753
  os << " k : " << k_ << std::endl;
737
754
  os << " h : " << h_ << std::endl;
738
755
  os << " r : " << r_ << std::endl;
@@ -740,24 +757,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
740
757
  os << " Current size : " << curr_items_alloc_ << std::endl;
741
758
  os << " Resize factor: " << (1 << rf_) << std::endl;
742
759
  os << "### END SKETCH SUMMARY" << std::endl;
743
- return os.str();
760
+ return string<A>(os.str().c_str(), allocator_);
744
761
  }
745
762
 
746
763
  template<typename T, typename S, typename A>
747
764
  string<A> var_opt_sketch<T,S,A>::items_to_string() const {
748
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
765
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
766
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
767
+ std::ostringstream os;
749
768
  os << "### Sketch Items" << std::endl;
750
769
  int idx = 0;
751
770
  for (auto record : *this) {
752
771
  os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
753
772
  ++idx;
754
773
  }
755
- return os.str();
774
+ return string<A>(os.str().c_str(), allocator_);
756
775
  }
757
776
 
758
777
  template<typename T, typename S, typename A>
759
778
  string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
760
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
779
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
780
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
781
+ std::ostringstream os;
761
782
  os << "### Sketch Items" << std::endl;
762
783
  const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
763
784
  for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
@@ -774,7 +795,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
774
795
  ++display_idx;
775
796
  }
776
797
  }
777
- return os.str();
798
+ return string<A>(os.str().c_str(), allocator_);
778
799
  }
779
800
 
780
801
  template<typename T, typename S, typename A>
@@ -1677,16 +1698,6 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
1677
1698
  return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
1678
1699
  }
1679
1700
 
1680
-
1681
-
1682
- // ******************** MOVE TO COMMON UTILS AREA EVENTUALLY *********************
1683
-
1684
- namespace random_utils {
1685
- static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
1686
- static std::mt19937_64 rand(rd());
1687
- static std::uniform_real_distribution<> next_double(0.0, 1.0);
1688
- }
1689
-
1690
1701
  /**
1691
1702
  * Checks if target sampling allocation is more than 50% of max sampling size.
1692
1703
  * If so, returns max sampling size, otherwise passes through target size.
@@ -45,7 +45,11 @@ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template
45
45
  * author Kevin Lang
46
46
  * author Jon Malkin
47
47
  */
48
- template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
48
+ template<
49
+ typename T,
50
+ typename S = serde<T>, // deprecated, to be removed in the next major version
51
+ typename A = std::allocator<T>
52
+ >
49
53
  class var_opt_union {
50
54
 
51
55
  public:
@@ -88,14 +92,16 @@ public:
88
92
  /**
89
93
  * Computes size needed to serialize the current state of the union.
90
94
  * This version is for all other types and can be expensive since every item needs to be looked at.
95
+ * @param instance of a SerDe
91
96
  * @return size in bytes needed to serialize this sketch
92
97
  */
93
- size_t get_serialized_size_bytes() const;
94
-
98
+ template<typename SerDe = S>
99
+ size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
100
+
95
101
  // This is a convenience alias for users
96
102
  // The type returned by the following serialize method
97
103
  typedef vector_u8<A> vector_bytes;
98
-
104
+
99
105
  /**
100
106
  * NOTE: This method may be deprecated in a future version.
101
107
  * This method serializes the sketch as a vector of bytes.
@@ -103,33 +109,62 @@ public:
103
109
  * It is a blank space of a given size.
104
110
  * This header is used in Datasketches PostgreSQL extension.
105
111
  * @param header_size_bytes space to reserve in front of the sketch
112
+ * @param instance of a SerDe
106
113
  */
107
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
114
+ template<typename SerDe = S>
115
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
108
116
 
109
117
  /**
110
118
  * NOTE: This method may be deprecated in a future version.
111
119
  * This method serializes the sketch into a given stream in a binary form
112
120
  * @param os output stream
121
+ * @param instance of a SerDe
113
122
  */
114
- void serialize(std::ostream& os) const;
123
+ template<typename SerDe = S>
124
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
115
125
 
116
126
  /**
117
127
  * NOTE: This method may be deprecated in a future version.
118
128
  * This method deserializes a union from a given stream.
119
129
  * @param is input stream
130
+ * @param instance of an Allocator
120
131
  * @return an instance of a union
121
132
  */
122
133
  static var_opt_union deserialize(std::istream& is, const A& allocator = A());
123
134
 
135
+ /**
136
+ * NOTE: This method may be deprecated in a future version.
137
+ * This method deserializes a union from a given stream.
138
+ * @param is input stream
139
+ * @param instance of a SerDe
140
+ * @param instance of an Allocator
141
+ * @return an instance of a union
142
+ */
143
+ template<typename SerDe = S>
144
+ static var_opt_union deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
145
+
124
146
  /**
125
147
  * NOTE: This method may be deprecated in a future version.
126
148
  * This method deserializes a union from a given array of bytes.
127
149
  * @param bytes pointer to the array of bytes
128
150
  * @param size the size of the array
151
+ * @param instance of an Allocator
129
152
  * @return an instance of a union
130
153
  */
131
154
  static var_opt_union deserialize(const void* bytes, size_t size, const A& allocator = A());
132
155
 
156
+ /**
157
+ * NOTE: This method may be deprecated in a future version.
158
+ * This method deserializes a union from a given array of bytes.
159
+ * @param bytes pointer to the array of bytes
160
+ * @param size the size of the array
161
+ * @param instance of a SerDe
162
+ * @param instance of an Allocator
163
+ * @return an instance of a union
164
+ */
165
+ template<typename SerDe = S>
166
+ static var_opt_union deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
167
+
133
168
  /**
134
169
  * Prints a summary of the union as a string.
135
170
  * @return the summary as a string
@@ -24,6 +24,7 @@
24
24
 
25
25
  #include <cmath>
26
26
  #include <sstream>
27
+ #include <stdexcept>
27
28
 
28
29
  namespace datasketches {
29
30
 
@@ -129,6 +130,12 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
129
130
 
130
131
  template<typename T, typename S, typename A>
131
132
  var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A& allocator) {
133
+ return deserialize(is, S(), allocator);
134
+ }
135
+
136
+ template<typename T, typename S, typename A>
137
+ template<typename SerDe>
138
+ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
132
139
  const auto preamble_longs = read<uint8_t>(is);
133
140
  const auto serial_version = read<uint8_t>(is);
134
141
  const auto family_id = read<uint8_t>(is);
@@ -155,7 +162,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A
155
162
  const auto outer_tau_numer = read<double>(is);
156
163
  const auto outer_tau_denom = read<uint64_t>(is);
157
164
 
158
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, allocator);
165
+ var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, sd, allocator);
159
166
 
160
167
  if (!is.good())
161
168
  throw std::runtime_error("error reading from std::istream");
@@ -165,6 +172,12 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A
165
172
 
166
173
  template<typename T, typename S, typename A>
167
174
  var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
175
+ return deserialize(bytes, size, S(), allocator);
176
+ }
177
+
178
+ template<typename T, typename S, typename A>
179
+ template<typename SerDe>
180
+ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
168
181
  ensure_minimum_memory(size, 8);
169
182
  const char* ptr = static_cast<const char*>(bytes);
170
183
  uint8_t preamble_longs;
@@ -199,22 +212,24 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
199
212
  ptr += copy_from_mem(ptr, outer_tau_denom);
200
213
 
201
214
  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
202
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, allocator);
215
+ var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, sd, allocator);
203
216
 
204
217
  return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
205
218
  }
206
219
 
207
220
  template<typename T, typename S, typename A>
208
- size_t var_opt_union<T,S,A>::get_serialized_size_bytes() const {
221
+ template<typename SerDe>
222
+ size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
209
223
  if (n_ == 0) {
210
224
  return PREAMBLE_LONGS_EMPTY << 3;
211
225
  } else {
212
- return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes();
226
+ return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes(sd);
213
227
  }
214
228
  }
215
229
 
216
230
  template<typename T, typename S, typename A>
217
- void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
231
+ template<typename SerDe>
232
+ void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
218
233
  bool empty = (n_ == 0);
219
234
 
220
235
  const uint8_t serialization_version(SER_VER);
@@ -240,13 +255,14 @@ void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
240
255
  write(os, n_);
241
256
  write(os, outer_tau_numer_);
242
257
  write(os, outer_tau_denom_);
243
- gadget_.serialize(os);
258
+ gadget_.serialize(os, sd);
244
259
  }
245
260
  }
246
261
 
247
262
  template<typename T, typename S, typename A>
248
- std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes) const {
249
- const size_t size = header_size_bytes + get_serialized_size_bytes();
263
+ template<typename SerDe>
264
+ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
265
+ const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
250
266
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
251
267
  uint8_t* ptr = bytes.data() + header_size_bytes;
252
268
 
@@ -278,7 +294,7 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
278
294
  ptr += copy_to_mem(outer_tau_numer_, ptr);
279
295
  ptr += copy_to_mem(outer_tau_denom_, ptr);
280
296
 
281
- auto gadget_bytes = gadget_.serialize();
297
+ auto gadget_bytes = gadget_.serialize(0, sd);
282
298
  ptr += copy_to_mem(gadget_bytes.data(), ptr, gadget_bytes.size() * sizeof(uint8_t));
283
299
  }
284
300
 
@@ -295,14 +311,16 @@ void var_opt_union<T,S,A>::reset() {
295
311
 
296
312
  template<typename T, typename S, typename A>
297
313
  string<A> var_opt_union<T,S,A>::to_string() const {
298
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
299
- os << "### VarOpt Union SUMMARY: " << std::endl;
300
- os << " . n : " << n_ << std::endl;
314
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
315
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
316
+ std::ostringstream os;
317
+ os << "### VarOpt Union SUMMARY:" << std::endl;
318
+ os << " n : " << n_ << std::endl;
301
319
  os << " Max k : " << max_k_ << std::endl;
302
- os << " Gadget Summary: " << std::endl;
320
+ os << " Gadget Summary:" << std::endl;
303
321
  os << gadget_.to_string();
304
- os << "### END VarOpt Union SUMMARY: " << std::endl;
305
- return os.str();
322
+ os << "### END VarOpt Union SUMMARY" << std::endl;
323
+ return string<A>(os.str().c_str(), gadget_.allocator_);
306
324
  }
307
325
 
308
326
  template<typename T, typename S, typename A>
@@ -39,7 +39,7 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
39
39
  var_opt_test_sketch sk1(10, var_opt_test_sketch::DEFAULT_RESIZE_FACTOR, 0);
40
40
  for (int i = 0; i < 100; ++i) sk1.update(i);
41
41
  auto bytes1 = sk1.serialize();
42
- auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), 0);
42
+ auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), test_type_serde(), 0);
43
43
 
44
44
  std::stringstream ss;
45
45
  sk1.serialize(ss);
@@ -51,7 +51,7 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
51
51
  u1.update(sk3);
52
52
 
53
53
  auto bytes2 = u1.serialize();
54
- auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), 0);
54
+ auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), test_type_serde(), 0);
55
55
  }
56
56
  REQUIRE(test_allocator_total_bytes == 0);
57
57
  REQUIRE(test_allocator_net_allocations == 0);
@@ -27,6 +27,7 @@
27
27
  #include <fstream>
28
28
  #include <cmath>
29
29
  #include <random>
30
+ #include <stdexcept>
30
31
 
31
32
  #ifdef TEST_BINARY_INPUT_PATH
32
33
  static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
@@ -27,6 +27,7 @@
27
27
  #include <fstream>
28
28
  #include <cmath>
29
29
  #include <random>
30
+ #include <stdexcept>
30
31
 
31
32
  #ifdef TEST_BINARY_INPUT_PATH
32
33
  static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
@@ -49,8 +49,9 @@ class CMakeBuild(build_ext):
49
49
  os.path.dirname(self.get_ext_fullpath(ext.name)))
50
50
  cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
51
51
  cmake_args += ['-DWITH_PYTHON=True']
52
+ cmake_args += ['-DCMAKE_CXX_STANDARD=11']
52
53
  # ensure we use a consistent python version
53
- cmake_args += ['-DPYTHON_EXECUTABLE=' + sys.executable]
54
+ cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
54
55
  cfg = 'Debug' if self.debug else 'Release'
55
56
  build_args = ['--config', cfg]
56
57
 
@@ -59,7 +60,8 @@ class CMakeBuild(build_ext):
59
60
  cfg.upper(),
60
61
  extdir)]
61
62
  if sys.maxsize > 2**32:
62
- cmake_args += ['-A', 'x64']
63
+ cmake_args += ['-T', 'host=x64']
64
+ cmake_args += ['-DCMAKE_GENERATOR_PLATFORM=x64']
63
65
  build_args += ['--', '/m']
64
66
  else:
65
67
  cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
@@ -74,23 +76,24 @@ class CMakeBuild(build_ext):
74
76
  subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
75
77
  cwd=self.build_temp, env=env)
76
78
  subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
77
- cwd=self.build_temp)
79
+ cwd=self.build_temp, env=env)
78
80
  print() # add an empty line to pretty print
79
81
 
80
82
  setup(
81
83
  name='datasketches',
82
- version='3.1.0',
83
- author='Apache DataSketches Developers',
84
+ version='3.4.0',
85
+ author='Apache Software Foundation',
84
86
  author_email='dev@datasketches.apache.org',
85
- description='A wrapper for the C++ Apache DataSketches library',
87
+ description='The Apache DataSketches Library for Python',
86
88
  license='Apache License 2.0',
87
89
  url='http://datasketches.apache.org',
88
90
  long_description=open('python/README.md').read(),
91
+ long_description_content_type='text/markdown',
89
92
  packages=find_packages('python'), # python pacakges only in this dir
90
93
  package_dir={'':'python'},
91
94
  # may need to add all source paths for sdist packages w/o MANIFEST.in
92
95
  ext_modules=[CMakeExtension('datasketches')],
93
96
  cmdclass={'build_ext': CMakeBuild},
94
- setup_requires=['setuptools_scm','tox-setuptools'],
97
+ install_requires=['numpy'],
95
98
  zip_safe=False
96
99
  )