datasketches 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -51,18 +51,27 @@ struct subset_summary {
51
51
  double total_sketch_weight;
52
52
  };
53
53
 
54
- enum resize_factor { X1 = 0, X2, X4, X8 };
55
-
56
54
  template <typename T, typename S, typename A> class var_opt_union; // forward declaration
57
55
 
58
- template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
56
+ namespace var_opt_constants {
57
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
58
+ const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
59
+ }
60
+
61
+ template<
62
+ typename T,
63
+ typename S = serde<T>, // deprecated, to be removed in the next major version
64
+ typename A = std::allocator<T>
65
+ >
59
66
  class var_opt_sketch {
60
67
 
61
68
  public:
62
- static const resize_factor DEFAULT_RESIZE_FACTOR = X8;
63
- static const uint32_t MAX_K = ((uint32_t) 1 << 31) - 2;
69
+ static const resize_factor DEFAULT_RESIZE_FACTOR = var_opt_constants::DEFAULT_RESIZE_FACTOR;
70
+ static const uint32_t MAX_K = var_opt_constants::MAX_K;
64
71
 
65
- explicit var_opt_sketch(uint32_t k, resize_factor rf = DEFAULT_RESIZE_FACTOR, const A& allocator = A());
72
+ explicit var_opt_sketch(uint32_t k,
73
+ resize_factor rf = var_opt_constants::DEFAULT_RESIZE_FACTOR,
74
+ const A& allocator = A());
66
75
  var_opt_sketch(const var_opt_sketch& other);
67
76
  var_opt_sketch(var_opt_sketch&& other) noexcept;
68
77
 
@@ -130,18 +139,20 @@ class var_opt_sketch {
130
139
  /**
131
140
  * Computes size needed to serialize the current state of the sketch.
132
141
  * This version is for fixed-size arithmetic types (integral and floating point).
142
+ * @param instance of a SerDe
133
143
  * @return size in bytes needed to serialize this sketch
134
144
  */
135
- template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
136
- inline size_t get_serialized_size_bytes() const;
145
+ template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
146
+ inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
137
147
 
138
148
  /**
139
149
  * Computes size needed to serialize the current state of the sketch.
140
150
  * This version is for all other types and can be expensive since every item needs to be looked at.
151
+ * @param instance of a SerDe
141
152
  * @return size in bytes needed to serialize this sketch
142
153
  */
143
- template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
144
- inline size_t get_serialized_size_bytes() const;
154
+ template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
155
+ inline size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
145
156
 
146
157
  // This is a convenience alias for users
147
158
  // The type returned by the following serialize method
@@ -153,30 +164,61 @@ class var_opt_sketch {
153
164
  * It is a blank space of a given size.
154
165
  * This header is used in Datasketches PostgreSQL extension.
155
166
  * @param header_size_bytes space to reserve in front of the sketch
167
+ * @param instance of a SerDe
156
168
  */
157
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
169
+ template<typename SerDe = S>
170
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
158
171
 
159
172
  /**
160
173
  * This method serializes the sketch into a given stream in a binary form
161
174
  * @param os output stream
175
+ * @param instance of a SerDe
162
176
  */
163
- void serialize(std::ostream& os) const;
177
+ template<typename SerDe = S>
178
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
164
179
 
165
180
  /**
166
181
  * This method deserializes a sketch from a given stream.
167
182
  * @param is input stream
183
+ * @param instance of an Allocator
168
184
  * @return an instance of a sketch
185
+ *
186
+ * Deprecated, to be removed in the next major version
169
187
  */
170
188
  static var_opt_sketch deserialize(std::istream& is, const A& allocator = A());
171
189
 
190
+ /**
191
+ * This method deserializes a sketch from a given stream.
192
+ * @param is input stream
193
+ * @param instance of a SerDe
194
+ * @param instance of an Allocator
195
+ * @return an instance of a sketch
196
+ */
197
+ template<typename SerDe = S>
198
+ static var_opt_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
199
+
172
200
  /**
173
201
  * This method deserializes a sketch from a given array of bytes.
174
202
  * @param bytes pointer to the array of bytes
175
203
  * @param size the size of the array
204
+ * @param instance of an Allocator
176
205
  * @return an instance of a sketch
206
+ *
207
+ * Deprecated, to be removed in the next major version
177
208
  */
178
209
  static var_opt_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
179
210
 
211
+ /**
212
+ * This method deserializes a sketch from a given array of bytes.
213
+ * @param bytes pointer to the array of bytes
214
+ * @param size the size of the array
215
+ * @param instance of a SerDe
216
+ * @param instance of an Allocator
217
+ * @return an instance of a sketch
218
+ */
219
+ template<typename SerDe = S>
220
+ static var_opt_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
221
+
180
222
  /**
181
223
  * Prints a summary of the sketch.
182
224
  * @return the summary as a string
@@ -25,6 +25,7 @@
25
25
  #include <cmath>
26
26
  #include <random>
27
27
  #include <algorithm>
28
+ #include <stdexcept>
28
29
 
29
30
  #include "var_opt_sketch.hpp"
30
31
  #include "serde.hpp"
@@ -128,7 +129,7 @@ var_opt_sketch<T,S,A>::var_opt_sketch(T* data, double* weights, size_t len,
128
129
  r_(r_count),
129
130
  n_(n),
130
131
  total_wt_r_(total_wt_r),
131
- rf_(DEFAULT_RESIZE_FACTOR),
132
+ rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
132
133
  curr_items_alloc_(len),
133
134
  filled_data_(n > k),
134
135
  allocator_(allocator),
@@ -311,8 +312,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
311
312
 
312
313
  // implementation for fixed-size arithmetic types (integral and floating point)
313
314
  template<typename T, typename S, typename A>
314
- template<typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
315
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
315
+ template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
316
+ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
316
317
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
317
318
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
318
319
  num_bytes += h_ * sizeof(double); // weights
@@ -325,8 +326,8 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
325
326
 
326
327
  // implementation for all other types
327
328
  template<typename T, typename S, typename A>
328
- template<typename TT, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
329
- size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
329
+ template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
330
+ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
330
331
  if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
331
332
  size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
332
333
  num_bytes += h_ * sizeof(double); // weights
@@ -335,13 +336,14 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes() const {
335
336
  }
336
337
  // must iterate over the items
337
338
  for (auto it: *this)
338
- num_bytes += S().size_of_item(it.first);
339
+ num_bytes += sd.size_of_item(it.first);
339
340
  return num_bytes;
340
341
  }
341
342
 
342
343
  template<typename T, typename S, typename A>
343
- std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes) const {
344
- const size_t size = header_size_bytes + get_serialized_size_bytes();
344
+ template<typename SerDe>
345
+ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
346
+ const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
345
347
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
346
348
  uint8_t* ptr = bytes.data() + header_size_bytes;
347
349
  uint8_t* end_ptr = ptr + size;
@@ -400,8 +402,8 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
400
402
  }
401
403
 
402
404
  // write the sample items, skipping the gap. Either h_ or r_ may be 0
403
- ptr += S().serialize(ptr, end_ptr - ptr, data_, h_);
404
- ptr += S().serialize(ptr, end_ptr - ptr, &data_[h_ + 1], r_);
405
+ ptr += sd.serialize(ptr, end_ptr - ptr, data_, h_);
406
+ ptr += sd.serialize(ptr, end_ptr - ptr, &data_[h_ + 1], r_);
405
407
  }
406
408
 
407
409
  size_t bytes_written = ptr - bytes.data();
@@ -413,7 +415,8 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
413
415
  }
414
416
 
415
417
  template<typename T, typename S, typename A>
416
- void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
418
+ template<typename SerDe>
419
+ void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
417
420
  const bool empty = (h_ == 0) && (r_ == 0);
418
421
 
419
422
  const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
@@ -469,13 +472,19 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os) const {
469
472
  }
470
473
 
471
474
  // write the sample items, skipping the gap. Either h_ or r_ may be 0
472
- S().serialize(os, data_, h_);
473
- S().serialize(os, &data_[h_ + 1], r_);
475
+ sd.serialize(os, data_, h_);
476
+ sd.serialize(os, &data_[h_ + 1], r_);
474
477
  }
475
478
  }
476
479
 
477
480
  template<typename T, typename S, typename A>
478
481
  var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
482
+ return deserialize(bytes, size, S(), allocator);
483
+ }
484
+
485
+ template<typename T, typename S, typename A>
486
+ template<typename SerDe>
487
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
479
488
  ensure_minimum_memory(size, 8);
480
489
  const char* ptr = static_cast<const char*>(bytes);
481
490
  const char* base = ptr;
@@ -559,10 +568,10 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
559
568
  items_deleter deleter(array_size, allocator);
560
569
  std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
561
570
 
562
- ptr += S().deserialize(ptr, end_ptr - ptr, items.get(), h);
571
+ ptr += sd.deserialize(ptr, end_ptr - ptr, items.get(), h);
563
572
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
564
573
 
565
- ptr += S().deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
574
+ ptr += sd.deserialize(ptr, end_ptr - ptr, &(items.get()[h + 1]), r);
566
575
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
567
576
 
568
577
  return var_opt_sketch(k, h, (r > 0 ? 1 : 0), r, n, total_wt_r, rf, array_size, false,
@@ -571,6 +580,12 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
571
580
 
572
581
  template<typename T, typename S, typename A>
573
582
  var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
583
+ return deserialize(is, S(), allocator);
584
+ }
585
+
586
+ template<typename T, typename S, typename A>
587
+ template<typename SerDe>
588
+ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
574
589
  const auto first_byte = read<uint8_t>(is);
575
590
  uint8_t preamble_longs = first_byte & 0x3f;
576
591
  const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
@@ -640,10 +655,10 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
640
655
  items_deleter deleter(array_size, allocator);
641
656
  std::unique_ptr<T, items_deleter> items(A(allocator).allocate(array_size), deleter);
642
657
 
643
- S().deserialize(is, items.get(), h); // aka &data_[0]
658
+ sd.deserialize(is, items.get(), h); // aka &data_[0]
644
659
  items.get_deleter().set_h(h); // serde didn't throw, so the items are now valid
645
660
 
646
- S().deserialize(is, &(items.get()[h + 1]), r);
661
+ sd.deserialize(is, &(items.get()[h + 1]), r);
647
662
  items.get_deleter().set_r(r); // serde didn't throw, so the items are now valid
648
663
 
649
664
  if (!is.good())
@@ -731,8 +746,10 @@ void var_opt_sketch<T,S,A>::update(T&& item, double weight) {
731
746
 
732
747
  template<typename T, typename S, typename A>
733
748
  string<A> var_opt_sketch<T,S,A>::to_string() const {
734
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
735
- os << "### VarOpt SUMMARY: " << std::endl;
749
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
750
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
751
+ std::ostringstream os;
752
+ os << "### VarOpt SUMMARY:" << std::endl;
736
753
  os << " k : " << k_ << std::endl;
737
754
  os << " h : " << h_ << std::endl;
738
755
  os << " r : " << r_ << std::endl;
@@ -740,24 +757,28 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
740
757
  os << " Current size : " << curr_items_alloc_ << std::endl;
741
758
  os << " Resize factor: " << (1 << rf_) << std::endl;
742
759
  os << "### END SKETCH SUMMARY" << std::endl;
743
- return os.str();
760
+ return string<A>(os.str().c_str(), allocator_);
744
761
  }
745
762
 
746
763
  template<typename T, typename S, typename A>
747
764
  string<A> var_opt_sketch<T,S,A>::items_to_string() const {
748
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
765
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
766
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
767
+ std::ostringstream os;
749
768
  os << "### Sketch Items" << std::endl;
750
769
  int idx = 0;
751
770
  for (auto record : *this) {
752
771
  os << idx << ": " << record.first << "\twt = " << record.second << std::endl;
753
772
  ++idx;
754
773
  }
755
- return os.str();
774
+ return string<A>(os.str().c_str(), allocator_);
756
775
  }
757
776
 
758
777
  template<typename T, typename S, typename A>
759
778
  string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
760
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
779
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
780
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
781
+ std::ostringstream os;
761
782
  os << "### Sketch Items" << std::endl;
762
783
  const uint32_t array_length = (n_ < k_ ? n_ : k_ + 1);
763
784
  for (uint32_t i = 0, display_idx = 0; i < array_length; ++i) {
@@ -774,7 +795,7 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
774
795
  ++display_idx;
775
796
  }
776
797
  }
777
- return os.str();
798
+ return string<A>(os.str().c_str(), allocator_);
778
799
  }
779
800
 
780
801
  template<typename T, typename S, typename A>
@@ -1677,16 +1698,6 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
1677
1698
  return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
1678
1699
  }
1679
1700
 
1680
-
1681
-
1682
- // ******************** MOVE TO COMMON UTILS AREA EVENTUALLY *********************
1683
-
1684
- namespace random_utils {
1685
- static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
1686
- static std::mt19937_64 rand(rd());
1687
- static std::uniform_real_distribution<> next_double(0.0, 1.0);
1688
- }
1689
-
1690
1701
  /**
1691
1702
  * Checks if target sampling allocation is more than 50% of max sampling size.
1692
1703
  * If so, returns max sampling size, otherwise passes through target size.
@@ -45,7 +45,11 @@ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template
45
45
  * author Kevin Lang
46
46
  * author Jon Malkin
47
47
  */
48
- template <typename T, typename S = serde<T>, typename A = std::allocator<T>>
48
+ template<
49
+ typename T,
50
+ typename S = serde<T>, // deprecated, to be removed in the next major version
51
+ typename A = std::allocator<T>
52
+ >
49
53
  class var_opt_union {
50
54
 
51
55
  public:
@@ -88,14 +92,16 @@ public:
88
92
  /**
89
93
  * Computes size needed to serialize the current state of the union.
90
94
  * This version is for all other types and can be expensive since every item needs to be looked at.
95
+ * @param instance of a SerDe
91
96
  * @return size in bytes needed to serialize this sketch
92
97
  */
93
- size_t get_serialized_size_bytes() const;
94
-
98
+ template<typename SerDe = S>
99
+ size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
100
+
95
101
  // This is a convenience alias for users
96
102
  // The type returned by the following serialize method
97
103
  typedef vector_u8<A> vector_bytes;
98
-
104
+
99
105
  /**
100
106
  * NOTE: This method may be deprecated in a future version.
101
107
  * This method serializes the sketch as a vector of bytes.
@@ -103,33 +109,62 @@ public:
103
109
  * It is a blank space of a given size.
104
110
  * This header is used in Datasketches PostgreSQL extension.
105
111
  * @param header_size_bytes space to reserve in front of the sketch
112
+ * @param instance of a SerDe
106
113
  */
107
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
114
+ template<typename SerDe = S>
115
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
108
116
 
109
117
  /**
110
118
  * NOTE: This method may be deprecated in a future version.
111
119
  * This method serializes the sketch into a given stream in a binary form
112
120
  * @param os output stream
121
+ * @param instance of a SerDe
113
122
  */
114
- void serialize(std::ostream& os) const;
123
+ template<typename SerDe = S>
124
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
115
125
 
116
126
  /**
117
127
  * NOTE: This method may be deprecated in a future version.
118
128
  * This method deserializes a union from a given stream.
119
129
  * @param is input stream
130
+ * @param instance of an Allocator
120
131
  * @return an instance of a union
121
132
  */
122
133
  static var_opt_union deserialize(std::istream& is, const A& allocator = A());
123
134
 
135
+ /**
136
+ * NOTE: This method may be deprecated in a future version.
137
+ * This method deserializes a union from a given stream.
138
+ * @param is input stream
139
+ * @param instance of a SerDe
140
+ * @param instance of an Allocator
141
+ * @return an instance of a union
142
+ */
143
+ template<typename SerDe = S>
144
+ static var_opt_union deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
145
+
124
146
  /**
125
147
  * NOTE: This method may be deprecated in a future version.
126
148
  * This method deserializes a union from a given array of bytes.
127
149
  * @param bytes pointer to the array of bytes
128
150
  * @param size the size of the array
151
+ * @param instance of an Allocator
129
152
  * @return an instance of a union
130
153
  */
131
154
  static var_opt_union deserialize(const void* bytes, size_t size, const A& allocator = A());
132
155
 
156
+ /**
157
+ * NOTE: This method may be deprecated in a future version.
158
+ * This method deserializes a union from a given array of bytes.
159
+ * @param bytes pointer to the array of bytes
160
+ * @param size the size of the array
161
+ * @param instance of a SerDe
162
+ * @param instance of an Allocator
163
+ * @return an instance of a union
164
+ */
165
+ template<typename SerDe = S>
166
+ static var_opt_union deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
167
+
133
168
  /**
134
169
  * Prints a summary of the union as a string.
135
170
  * @return the summary as a string
@@ -24,6 +24,7 @@
24
24
 
25
25
  #include <cmath>
26
26
  #include <sstream>
27
+ #include <stdexcept>
27
28
 
28
29
  namespace datasketches {
29
30
 
@@ -129,6 +130,12 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
129
130
 
130
131
  template<typename T, typename S, typename A>
131
132
  var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A& allocator) {
133
+ return deserialize(is, S(), allocator);
134
+ }
135
+
136
+ template<typename T, typename S, typename A>
137
+ template<typename SerDe>
138
+ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
132
139
  const auto preamble_longs = read<uint8_t>(is);
133
140
  const auto serial_version = read<uint8_t>(is);
134
141
  const auto family_id = read<uint8_t>(is);
@@ -155,7 +162,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A
155
162
  const auto outer_tau_numer = read<double>(is);
156
163
  const auto outer_tau_denom = read<uint64_t>(is);
157
164
 
158
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, allocator);
165
+ var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, sd, allocator);
159
166
 
160
167
  if (!is.good())
161
168
  throw std::runtime_error("error reading from std::istream");
@@ -165,6 +172,12 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A
165
172
 
166
173
  template<typename T, typename S, typename A>
167
174
  var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
175
+ return deserialize(bytes, size, S(), allocator);
176
+ }
177
+
178
+ template<typename T, typename S, typename A>
179
+ template<typename SerDe>
180
+ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
168
181
  ensure_minimum_memory(size, 8);
169
182
  const char* ptr = static_cast<const char*>(bytes);
170
183
  uint8_t preamble_longs;
@@ -199,22 +212,24 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
199
212
  ptr += copy_from_mem(ptr, outer_tau_denom);
200
213
 
201
214
  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
202
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, allocator);
215
+ var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, sd, allocator);
203
216
 
204
217
  return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
205
218
  }
206
219
 
207
220
  template<typename T, typename S, typename A>
208
- size_t var_opt_union<T,S,A>::get_serialized_size_bytes() const {
221
+ template<typename SerDe>
222
+ size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
209
223
  if (n_ == 0) {
210
224
  return PREAMBLE_LONGS_EMPTY << 3;
211
225
  } else {
212
- return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes();
226
+ return (PREAMBLE_LONGS_NON_EMPTY << 3) + gadget_.get_serialized_size_bytes(sd);
213
227
  }
214
228
  }
215
229
 
216
230
  template<typename T, typename S, typename A>
217
- void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
231
+ template<typename SerDe>
232
+ void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
218
233
  bool empty = (n_ == 0);
219
234
 
220
235
  const uint8_t serialization_version(SER_VER);
@@ -240,13 +255,14 @@ void var_opt_union<T,S,A>::serialize(std::ostream& os) const {
240
255
  write(os, n_);
241
256
  write(os, outer_tau_numer_);
242
257
  write(os, outer_tau_denom_);
243
- gadget_.serialize(os);
258
+ gadget_.serialize(os, sd);
244
259
  }
245
260
  }
246
261
 
247
262
  template<typename T, typename S, typename A>
248
- std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes) const {
249
- const size_t size = header_size_bytes + get_serialized_size_bytes();
263
+ template<typename SerDe>
264
+ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
265
+ const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
250
266
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
251
267
  uint8_t* ptr = bytes.data() + header_size_bytes;
252
268
 
@@ -278,7 +294,7 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
278
294
  ptr += copy_to_mem(outer_tau_numer_, ptr);
279
295
  ptr += copy_to_mem(outer_tau_denom_, ptr);
280
296
 
281
- auto gadget_bytes = gadget_.serialize();
297
+ auto gadget_bytes = gadget_.serialize(0, sd);
282
298
  ptr += copy_to_mem(gadget_bytes.data(), ptr, gadget_bytes.size() * sizeof(uint8_t));
283
299
  }
284
300
 
@@ -295,14 +311,16 @@ void var_opt_union<T,S,A>::reset() {
295
311
 
296
312
  template<typename T, typename S, typename A>
297
313
  string<A> var_opt_union<T,S,A>::to_string() const {
298
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
299
- os << "### VarOpt Union SUMMARY: " << std::endl;
300
- os << " . n : " << n_ << std::endl;
314
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
315
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
316
+ std::ostringstream os;
317
+ os << "### VarOpt Union SUMMARY:" << std::endl;
318
+ os << " n : " << n_ << std::endl;
301
319
  os << " Max k : " << max_k_ << std::endl;
302
- os << " Gadget Summary: " << std::endl;
320
+ os << " Gadget Summary:" << std::endl;
303
321
  os << gadget_.to_string();
304
- os << "### END VarOpt Union SUMMARY: " << std::endl;
305
- return os.str();
322
+ os << "### END VarOpt Union SUMMARY" << std::endl;
323
+ return string<A>(os.str().c_str(), gadget_.allocator_);
306
324
  }
307
325
 
308
326
  template<typename T, typename S, typename A>
@@ -39,7 +39,7 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
39
39
  var_opt_test_sketch sk1(10, var_opt_test_sketch::DEFAULT_RESIZE_FACTOR, 0);
40
40
  for (int i = 0; i < 100; ++i) sk1.update(i);
41
41
  auto bytes1 = sk1.serialize();
42
- auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), 0);
42
+ auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), test_type_serde(), 0);
43
43
 
44
44
  std::stringstream ss;
45
45
  sk1.serialize(ss);
@@ -51,7 +51,7 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
51
51
  u1.update(sk3);
52
52
 
53
53
  auto bytes2 = u1.serialize();
54
- auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), 0);
54
+ auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), test_type_serde(), 0);
55
55
  }
56
56
  REQUIRE(test_allocator_total_bytes == 0);
57
57
  REQUIRE(test_allocator_net_allocations == 0);
@@ -27,6 +27,7 @@
27
27
  #include <fstream>
28
28
  #include <cmath>
29
29
  #include <random>
30
+ #include <stdexcept>
30
31
 
31
32
  #ifdef TEST_BINARY_INPUT_PATH
32
33
  static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
@@ -27,6 +27,7 @@
27
27
  #include <fstream>
28
28
  #include <cmath>
29
29
  #include <random>
30
+ #include <stdexcept>
30
31
 
31
32
  #ifdef TEST_BINARY_INPUT_PATH
32
33
  static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
@@ -49,8 +49,9 @@ class CMakeBuild(build_ext):
49
49
  os.path.dirname(self.get_ext_fullpath(ext.name)))
50
50
  cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir]
51
51
  cmake_args += ['-DWITH_PYTHON=True']
52
+ cmake_args += ['-DCMAKE_CXX_STANDARD=11']
52
53
  # ensure we use a consistent python version
53
- cmake_args += ['-DPYTHON_EXECUTABLE=' + sys.executable]
54
+ cmake_args += ['-DPython3_EXECUTABLE=' + sys.executable]
54
55
  cfg = 'Debug' if self.debug else 'Release'
55
56
  build_args = ['--config', cfg]
56
57
 
@@ -59,7 +60,8 @@ class CMakeBuild(build_ext):
59
60
  cfg.upper(),
60
61
  extdir)]
61
62
  if sys.maxsize > 2**32:
62
- cmake_args += ['-A', 'x64']
63
+ cmake_args += ['-T', 'host=x64']
64
+ cmake_args += ['-DCMAKE_GENERATOR_PLATFORM=x64']
63
65
  build_args += ['--', '/m']
64
66
  else:
65
67
  cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
@@ -74,23 +76,24 @@ class CMakeBuild(build_ext):
74
76
  subprocess.check_call(['cmake', ext.sourcedir] + cmake_args,
75
77
  cwd=self.build_temp, env=env)
76
78
  subprocess.check_call(['cmake', '--build', '.', '--target', 'python'] + build_args,
77
- cwd=self.build_temp)
79
+ cwd=self.build_temp, env=env)
78
80
  print() # add an empty line to pretty print
79
81
 
80
82
  setup(
81
83
  name='datasketches',
82
- version='3.1.0',
83
- author='Apache DataSketches Developers',
84
+ version='3.4.0',
85
+ author='Apache Software Foundation',
84
86
  author_email='dev@datasketches.apache.org',
85
- description='A wrapper for the C++ Apache DataSketches library',
87
+ description='The Apache DataSketches Library for Python',
86
88
  license='Apache License 2.0',
87
89
  url='http://datasketches.apache.org',
88
90
  long_description=open('python/README.md').read(),
91
+ long_description_content_type='text/markdown',
89
92
  packages=find_packages('python'), # python pacakges only in this dir
90
93
  package_dir={'':'python'},
91
94
  # may need to add all source paths for sdist packages w/o MANIFEST.in
92
95
  ext_modules=[CMakeExtension('datasketches')],
93
96
  cmdclass={'build_ext': CMakeBuild},
94
- setup_requires=['setuptools_scm','tox-setuptools'],
97
+ install_requires=['numpy'],
95
98
  zip_safe=False
96
99
  )