datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -47,7 +47,6 @@ template<typename A> using AllocU8 = typename std::allocator_traits<A>::template
47
47
  */
48
48
  template<
49
49
  typename T,
50
- typename S = serde<T>, // deprecated, to be removed in the next major version
51
50
  typename A = std::allocator<T>
52
51
  >
53
52
  class var_opt_union {
@@ -69,20 +68,20 @@ public:
69
68
  * This method takes an lvalue.
70
69
  * @param sk a sketch to add to the union
71
70
  */
72
- void update(const var_opt_sketch<T,S,A>& sk);
71
+ void update(const var_opt_sketch<T, A>& sk);
73
72
 
74
73
  /**
75
74
  * Updates this union with the given sketch
76
75
  * This method takes an rvalue.
77
76
  * @param sk a sketch to add to the union
78
77
  */
79
- void update(var_opt_sketch<T,S,A>&& sk);
78
+ void update(var_opt_sketch<T, A>&& sk);
80
79
 
81
80
  /**
82
81
  * Gets the varopt sketch resulting from the union of any input sketches.
83
82
  * @return a varopt sketch
84
83
  */
85
- var_opt_sketch<T,S,A> get_result() const;
84
+ var_opt_sketch<T, A> get_result() const;
86
85
 
87
86
  /**
88
87
  * Resets the union to its default, empty state.
@@ -95,7 +94,7 @@ public:
95
94
  * @param instance of a SerDe
96
95
  * @return size in bytes needed to serialize this sketch
97
96
  */
98
- template<typename SerDe = S>
97
+ template<typename SerDe = serde<T>>
99
98
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
100
99
 
101
100
  // This is a convenience alias for users
@@ -111,7 +110,7 @@ public:
111
110
  * @param header_size_bytes space to reserve in front of the sketch
112
111
  * @param instance of a SerDe
113
112
  */
114
- template<typename SerDe = S>
113
+ template<typename SerDe = serde<T>>
115
114
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
116
115
 
117
116
  /**
@@ -120,18 +119,9 @@ public:
120
119
  * @param os output stream
121
120
  * @param instance of a SerDe
122
121
  */
123
- template<typename SerDe = S>
122
+ template<typename SerDe = serde<T>>
124
123
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
125
124
 
126
- /**
127
- * NOTE: This method may be deprecated in a future version.
128
- * This method deserializes a union from a given stream.
129
- * @param is input stream
130
- * @param instance of an Allocator
131
- * @return an instance of a union
132
- */
133
- static var_opt_union deserialize(std::istream& is, const A& allocator = A());
134
-
135
125
  /**
136
126
  * NOTE: This method may be deprecated in a future version.
137
127
  * This method deserializes a union from a given stream.
@@ -140,19 +130,9 @@ public:
140
130
  * @param instance of an Allocator
141
131
  * @return an instance of a union
142
132
  */
143
- template<typename SerDe = S>
133
+ template<typename SerDe = serde<T>>
144
134
  static var_opt_union deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
145
135
 
146
- /**
147
- * NOTE: This method may be deprecated in a future version.
148
- * This method deserializes a union from a given array of bytes.
149
- * @param bytes pointer to the array of bytes
150
- * @param size the size of the array
151
- * @param instance of an Allocator
152
- * @return an instance of a union
153
- */
154
- static var_opt_union deserialize(const void* bytes, size_t size, const A& allocator = A());
155
-
156
136
  /**
157
137
  * NOTE: This method may be deprecated in a future version.
158
138
  * This method deserializes a union from a given array of bytes.
@@ -162,7 +142,7 @@ public:
162
142
  * @param instance of an Allocator
163
143
  * @return an instance of a union
164
144
  */
165
- template<typename SerDe = S>
145
+ template<typename SerDe = serde<T>>
166
146
  static var_opt_union deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
167
147
 
168
148
  /**
@@ -171,9 +151,8 @@ public:
171
151
  */
172
152
  string<A> to_string() const;
173
153
 
174
-
175
154
  private:
176
- typedef typename std::allocator_traits<A>::template rebind_alloc<var_opt_sketch<T,S,A>> AllocSketch;
155
+ typedef typename std::allocator_traits<A>::template rebind_alloc<var_opt_sketch<T, A>> AllocSketch;
177
156
 
178
157
  static const uint8_t PREAMBLE_LONGS_EMPTY = 1;
179
158
  static const uint8_t PREAMBLE_LONGS_NON_EMPTY = 4;
@@ -191,10 +170,10 @@ private:
191
170
 
192
171
  uint32_t max_k_;
193
172
 
194
- var_opt_sketch<T,S,A> gadget_;
173
+ var_opt_sketch<T, A> gadget_;
195
174
 
196
175
  var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
197
- uint32_t max_k, var_opt_sketch<T,S,A>&& gadget);
176
+ uint32_t max_k, var_opt_sketch<T, A>&& gadget);
198
177
 
199
178
  /*
200
179
  IMPORTANT NOTE: the "gadget" in the union object appears to be a varopt sketch,
@@ -250,18 +229,18 @@ private:
250
229
  more importantly, this design choice allows us to exactly re-construct the input sketch
251
230
  when there is only one of them.
252
231
  */
253
- inline void merge_items(const var_opt_sketch<T,S,A>& sk);
254
- inline void merge_items(var_opt_sketch<T,S,A>&& sk);
255
- inline void resolve_tau(const var_opt_sketch<T,S,A>& sketch);
232
+ inline void merge_items(const var_opt_sketch<T, A>& sk);
233
+ inline void merge_items(var_opt_sketch<T, A>&& sk);
234
+ inline void resolve_tau(const var_opt_sketch<T, A>& sketch);
256
235
 
257
236
  double get_outer_tau() const;
258
237
 
259
- var_opt_sketch<T,S,A> simple_gadget_coercer() const;
238
+ var_opt_sketch<T, A> simple_gadget_coercer() const;
260
239
 
261
240
  bool there_exist_unmarked_h_items_lighter_than_target(double threshold) const;
262
- bool detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T,S,A>& sk) const;
263
- void mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk) const;
264
- void migrate_marked_items_by_decreasing_k(var_opt_sketch<T,S,A>& sk) const;
241
+ bool detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T, A>& sk) const;
242
+ void mark_moving_gadget_coercer(var_opt_sketch<T, A>& sk) const;
243
+ void migrate_marked_items_by_decreasing_k(var_opt_sketch<T, A>& sk) const;
265
244
 
266
245
  static void check_preamble_longs(uint8_t preamble_longs, uint8_t flags);
267
246
  static void check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver);
@@ -28,17 +28,17 @@
28
28
 
29
29
  namespace datasketches {
30
30
 
31
- template<typename T, typename S, typename A>
32
- var_opt_union<T,S,A>::var_opt_union(uint32_t max_k, const A& allocator) :
31
+ template<typename T, typename A>
32
+ var_opt_union<T, A>::var_opt_union(uint32_t max_k, const A& allocator) :
33
33
  n_(0),
34
34
  outer_tau_numer_(0.0),
35
35
  outer_tau_denom_(0),
36
36
  max_k_(max_k),
37
- gadget_(max_k, var_opt_sketch<T,S,A>::DEFAULT_RESIZE_FACTOR, true, allocator)
37
+ gadget_(max_k, var_opt_sketch<T, A>::DEFAULT_RESIZE_FACTOR, true, allocator)
38
38
  {}
39
39
 
40
- template<typename T, typename S, typename A>
41
- var_opt_union<T,S,A>::var_opt_union(const var_opt_union& other) :
40
+ template<typename T, typename A>
41
+ var_opt_union<T, A>::var_opt_union(const var_opt_union& other) :
42
42
  n_(other.n_),
43
43
  outer_tau_numer_(other.outer_tau_numer_),
44
44
  outer_tau_denom_(other.outer_tau_denom_),
@@ -46,8 +46,8 @@ var_opt_union<T,S,A>::var_opt_union(const var_opt_union& other) :
46
46
  gadget_(other.gadget_)
47
47
  {}
48
48
 
49
- template<typename T, typename S, typename A>
50
- var_opt_union<T,S,A>::var_opt_union(var_opt_union&& other) noexcept :
49
+ template<typename T, typename A>
50
+ var_opt_union<T, A>::var_opt_union(var_opt_union&& other) noexcept :
51
51
  n_(other.n_),
52
52
  outer_tau_numer_(other.outer_tau_numer_),
53
53
  outer_tau_denom_(other.outer_tau_denom_),
@@ -55,9 +55,9 @@ var_opt_union<T,S,A>::var_opt_union(var_opt_union&& other) noexcept :
55
55
  gadget_(std::move(other.gadget_))
56
56
  {}
57
57
 
58
- template<typename T, typename S, typename A>
59
- var_opt_union<T,S,A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
60
- uint32_t max_k, var_opt_sketch<T,S,A>&& gadget) :
58
+ template<typename T, typename A>
59
+ var_opt_union<T, A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t outer_tau_denom,
60
+ uint32_t max_k, var_opt_sketch<T, A>&& gadget) :
61
61
  n_(n),
62
62
  outer_tau_numer_(outer_tau_numer),
63
63
  outer_tau_denom_(outer_tau_denom),
@@ -65,12 +65,12 @@ var_opt_union<T,S,A>::var_opt_union(uint64_t n, double outer_tau_numer, uint64_t
65
65
  gadget_(gadget)
66
66
  {}
67
67
 
68
- template<typename T, typename S, typename A>
69
- var_opt_union<T,S,A>::~var_opt_union() {}
68
+ template<typename T, typename A>
69
+ var_opt_union<T, A>::~var_opt_union() {}
70
70
 
71
- template<typename T, typename S, typename A>
72
- var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(const var_opt_union& other) {
73
- var_opt_union<T,S,A> union_copy(other);
71
+ template<typename T, typename A>
72
+ var_opt_union<T, A>& var_opt_union<T, A>::operator=(const var_opt_union& other) {
73
+ var_opt_union union_copy(other);
74
74
  std::swap(n_, union_copy.n_);
75
75
  std::swap(outer_tau_numer_, union_copy.outer_tau_numer_);
76
76
  std::swap(outer_tau_denom_, union_copy.outer_tau_denom_);
@@ -79,8 +79,8 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(const var_opt_union& other
79
79
  return *this;
80
80
  }
81
81
 
82
- template<typename T, typename S, typename A>
83
- var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
82
+ template<typename T, typename A>
83
+ var_opt_union<T, A>& var_opt_union<T, A>::operator=(var_opt_union&& other) {
84
84
  std::swap(n_, other.n_);
85
85
  std::swap(outer_tau_numer_, other.outer_tau_numer_);
86
86
  std::swap(outer_tau_denom_, other.outer_tau_denom_);
@@ -128,14 +128,9 @@ var_opt_union<T,S,A>& var_opt_union<T,S,A>::operator=(var_opt_union&& other) {
128
128
  * </pre>
129
129
  */
130
130
 
131
- template<typename T, typename S, typename A>
132
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const A& allocator) {
133
- return deserialize(is, S(), allocator);
134
- }
135
-
136
- template<typename T, typename S, typename A>
131
+ template<typename T, typename A>
137
132
  template<typename SerDe>
138
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
133
+ var_opt_union<T, A> var_opt_union<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
139
134
  const auto preamble_longs = read<uint8_t>(is);
140
135
  const auto serial_version = read<uint8_t>(is);
141
136
  const auto family_id = read<uint8_t>(is);
@@ -155,29 +150,24 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(std::istream& is, const S
155
150
  if (!is.good())
156
151
  throw std::runtime_error("error reading from std::istream");
157
152
  else
158
- return var_opt_union<T,S,A>(max_k);
153
+ return var_opt_union(max_k);
159
154
  }
160
155
 
161
156
  const auto items_seen = read<uint64_t>(is);
162
157
  const auto outer_tau_numer = read<double>(is);
163
158
  const auto outer_tau_denom = read<uint64_t>(is);
164
159
 
165
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(is, sd, allocator);
160
+ var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(is, sd, allocator);
166
161
 
167
162
  if (!is.good())
168
163
  throw std::runtime_error("error reading from std::istream");
169
164
 
170
- return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
171
- }
172
-
173
- template<typename T, typename S, typename A>
174
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
175
- return deserialize(bytes, size, S(), allocator);
165
+ return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
176
166
  }
177
167
 
178
- template<typename T, typename S, typename A>
168
+ template<typename T, typename A>
179
169
  template<typename SerDe>
180
- var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
170
+ var_opt_union<T, A> var_opt_union<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
181
171
  ensure_minimum_memory(size, 8);
182
172
  const char* ptr = static_cast<const char*>(bytes);
183
173
  uint8_t preamble_longs;
@@ -201,7 +191,7 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
201
191
  bool is_empty = flags & EMPTY_FLAG_MASK;
202
192
 
203
193
  if (is_empty) {
204
- return var_opt_union<T,S,A>(max_k);
194
+ return var_opt_union(max_k);
205
195
  }
206
196
 
207
197
  uint64_t items_seen;
@@ -212,14 +202,14 @@ var_opt_union<T,S,A> var_opt_union<T,S,A>::deserialize(const void* bytes, size_t
212
202
  ptr += copy_from_mem(ptr, outer_tau_denom);
213
203
 
214
204
  const size_t gadget_size = size - (PREAMBLE_LONGS_NON_EMPTY << 3);
215
- var_opt_sketch<T,S,A> gadget = var_opt_sketch<T,S,A>::deserialize(ptr, gadget_size, sd, allocator);
205
+ var_opt_sketch<T, A> gadget = var_opt_sketch<T, A>::deserialize(ptr, gadget_size, sd, allocator);
216
206
 
217
- return var_opt_union<T,S,A>(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
207
+ return var_opt_union(items_seen, outer_tau_numer, outer_tau_denom, max_k, std::move(gadget));
218
208
  }
219
209
 
220
- template<typename T, typename S, typename A>
210
+ template<typename T, typename A>
221
211
  template<typename SerDe>
222
- size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
212
+ size_t var_opt_union<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
223
213
  if (n_ == 0) {
224
214
  return PREAMBLE_LONGS_EMPTY << 3;
225
215
  } else {
@@ -227,9 +217,9 @@ size_t var_opt_union<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
227
217
  }
228
218
  }
229
219
 
230
- template<typename T, typename S, typename A>
220
+ template<typename T, typename A>
231
221
  template<typename SerDe>
232
- void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
222
+ void var_opt_union<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
233
223
  bool empty = (n_ == 0);
234
224
 
235
225
  const uint8_t serialization_version(SER_VER);
@@ -259,9 +249,9 @@ void var_opt_union<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
259
249
  }
260
250
  }
261
251
 
262
- template<typename T, typename S, typename A>
252
+ template<typename T, typename A>
263
253
  template<typename SerDe>
264
- std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
254
+ std::vector<uint8_t, AllocU8<A>> var_opt_union<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
265
255
  const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
266
256
  std::vector<uint8_t, AllocU8<A>> bytes(size, 0, gadget_.allocator_);
267
257
  uint8_t* ptr = bytes.data() + header_size_bytes;
@@ -301,16 +291,16 @@ std::vector<uint8_t, AllocU8<A>> var_opt_union<T,S,A>::serialize(unsigned header
301
291
  return bytes;
302
292
  }
303
293
 
304
- template<typename T, typename S, typename A>
305
- void var_opt_union<T,S,A>::reset() {
294
+ template<typename T, typename A>
295
+ void var_opt_union<T, A>::reset() {
306
296
  n_ = 0;
307
297
  outer_tau_numer_ = 0.0;
308
298
  outer_tau_denom_ = 0;
309
299
  gadget_.reset();
310
300
  }
311
301
 
312
- template<typename T, typename S, typename A>
313
- string<A> var_opt_union<T,S,A>::to_string() const {
302
+ template<typename T, typename A>
303
+ string<A> var_opt_union<T, A>::to_string() const {
314
304
  // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
315
305
  // The stream does not support passing an allocator instance, and alternatives are complicated.
316
306
  std::ostringstream os;
@@ -323,20 +313,20 @@ string<A> var_opt_union<T,S,A>::to_string() const {
323
313
  return string<A>(os.str().c_str(), gadget_.allocator_);
324
314
  }
325
315
 
326
- template<typename T, typename S, typename A>
327
- void var_opt_union<T,S,A>::update(const var_opt_sketch<T,S,A>& sk) {
316
+ template<typename T, typename A>
317
+ void var_opt_union<T, A>::update(const var_opt_sketch<T, A>& sk) {
328
318
  merge_items(sk);
329
319
  resolve_tau(sk);
330
320
  }
331
321
 
332
- template<typename T, typename S, typename A>
333
- void var_opt_union<T,S,A>::update(var_opt_sketch<T,S,A>&& sk) {
322
+ template<typename T, typename A>
323
+ void var_opt_union<T, A>::update(var_opt_sketch<T, A>&& sk) {
334
324
  merge_items(std::move(sk));
335
325
  resolve_tau(sk); // don't need items, so ok even if they've been moved out
336
326
  }
337
327
 
338
- template<typename T, typename S, typename A>
339
- double var_opt_union<T,S,A>::get_outer_tau() const {
328
+ template<typename T, typename A>
329
+ double var_opt_union<T, A>::get_outer_tau() const {
340
330
  if (outer_tau_denom_ == 0) {
341
331
  return 0.0;
342
332
  } else {
@@ -344,8 +334,8 @@ double var_opt_union<T,S,A>::get_outer_tau() const {
344
334
  }
345
335
  }
346
336
 
347
- template<typename T, typename S, typename A>
348
- void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
337
+ template<typename T, typename A>
338
+ void var_opt_union<T, A>::merge_items(const var_opt_sketch<T, A>& sketch) {
349
339
  if (sketch.n_ == 0) {
350
340
  return;
351
341
  }
@@ -353,8 +343,8 @@ void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
353
343
  n_ += sketch.n_;
354
344
 
355
345
  // H region const_iterator
356
- typename var_opt_sketch<T,S,A>::const_iterator h_itr(sketch, false, false);
357
- typename var_opt_sketch<T,S,A>::const_iterator h_end(sketch, true, false);
346
+ typename var_opt_sketch<T, A>::const_iterator h_itr(sketch, false, false);
347
+ typename var_opt_sketch<T, A>::const_iterator h_end(sketch, true, false);
358
348
  while (h_itr != h_end) {
359
349
  std::pair<const T&, const double> sample = *h_itr;
360
350
  gadget_.update(sample.first, sample.second, false);
@@ -362,8 +352,8 @@ void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
362
352
  }
363
353
 
364
354
  // Weight-correcting R region iterator (const_iterator doesn't do the correction)
365
- typename var_opt_sketch<T,S,A>::iterator r_itr(sketch, false, true);
366
- typename var_opt_sketch<T,S,A>::iterator r_end(sketch, true, true);
355
+ typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
356
+ typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
367
357
  while (r_itr != r_end) {
368
358
  std::pair<const T&, const double> sample = *r_itr;
369
359
  gadget_.update(sample.first, sample.second, true);
@@ -371,8 +361,8 @@ void var_opt_union<T,S,A>::merge_items(const var_opt_sketch<T,S,A>& sketch) {
371
361
  }
372
362
  }
373
363
 
374
- template<typename T, typename S, typename A>
375
- void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
364
+ template<typename T, typename A>
365
+ void var_opt_union<T, A>::merge_items(var_opt_sketch<T, A>&& sketch) {
376
366
  if (sketch.n_ == 0) {
377
367
  return;
378
368
  }
@@ -380,8 +370,8 @@ void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
380
370
  n_ += sketch.n_;
381
371
 
382
372
  // H region iterator
383
- typename var_opt_sketch<T,S,A>::iterator h_itr(sketch, false, false);
384
- typename var_opt_sketch<T,S,A>::iterator h_end(sketch, true, false);
373
+ typename var_opt_sketch<T, A>::iterator h_itr(sketch, false, false);
374
+ typename var_opt_sketch<T, A>::iterator h_end(sketch, true, false);
385
375
  while (h_itr != h_end) {
386
376
  std::pair<T&, double> sample = *h_itr;
387
377
  gadget_.update(std::move(sample.first), sample.second, false);
@@ -389,8 +379,8 @@ void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
389
379
  }
390
380
 
391
381
  // Weight-correcting R region iterator
392
- typename var_opt_sketch<T,S,A>::iterator r_itr(sketch, false, true);
393
- typename var_opt_sketch<T,S,A>::iterator r_end(sketch, true, true);
382
+ typename var_opt_sketch<T, A>::iterator r_itr(sketch, false, true);
383
+ typename var_opt_sketch<T, A>::iterator r_end(sketch, true, true);
394
384
  while (r_itr != r_end) {
395
385
  std::pair<T&, double> sample = *r_itr;
396
386
  gadget_.update(std::move(sample.first), sample.second, true);
@@ -398,8 +388,8 @@ void var_opt_union<T,S,A>::merge_items(var_opt_sketch<T,S,A>&& sketch) {
398
388
  }
399
389
  }
400
390
 
401
- template<typename T, typename S, typename A>
402
- void var_opt_union<T,S,A>::resolve_tau(const var_opt_sketch<T,S,A>& sketch) {
391
+ template<typename T, typename A>
392
+ void var_opt_union<T, A>::resolve_tau(const var_opt_sketch<T, A>& sketch) {
403
393
  if (sketch.r_ > 0) {
404
394
  const double sketch_tau = sketch.get_tau();
405
395
  const double outer_tau = get_outer_tau();
@@ -425,8 +415,8 @@ void var_opt_union<T,S,A>::resolve_tau(const var_opt_sketch<T,S,A>& sketch) {
425
415
  }
426
416
  }
427
417
 
428
- template<typename T, typename S, typename A>
429
- var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
418
+ template<typename T, typename A>
419
+ var_opt_sketch<T, A> var_opt_union<T, A>::get_result() const {
430
420
  // If no marked items in H, gadget is already valid mathematically. We can return what is
431
421
  // basically just a copy of the gadget.
432
422
  if (gadget_.num_marks_in_h_ == 0) {
@@ -435,7 +425,7 @@ var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
435
425
  // Copy of gadget. This may produce needless copying in the
436
426
  // pseudo-exact case below, but should simplify the code without
437
427
  // needing to make the gadget a pointer
438
- var_opt_sketch<T,S,A> gcopy(gadget_, false, n_);
428
+ var_opt_sketch<T, A> gcopy(gadget_, false, n_);
439
429
 
440
430
  // At this point, we know that marked items are present in H. So:
441
431
  // 1. Result will necessarily be in estimation mode
@@ -456,15 +446,15 @@ var_opt_sketch<T,S,A> var_opt_union<T,S,A>::get_result() const {
456
446
  *
457
447
  * @return A shallow copy of the gadget as valid varopt sketch
458
448
  */
459
- template<typename T, typename S, typename A>
460
- var_opt_sketch<T,S,A> var_opt_union<T,S,A>::simple_gadget_coercer() const {
449
+ template<typename T, typename A>
450
+ var_opt_sketch<T, A> var_opt_union<T, A>::simple_gadget_coercer() const {
461
451
  if (gadget_.num_marks_in_h_ != 0) throw std::logic_error("simple gadget coercer only applies if no marks");
462
- return var_opt_sketch<T,S,A>(gadget_, true, n_);
452
+ return var_opt_sketch<T, A>(gadget_, true, n_);
463
453
  }
464
454
 
465
455
  // this is a condition checked in detect_and_handle_subcase_of_pseudo_exact()
466
- template<typename T, typename S, typename A>
467
- bool var_opt_union<T,S,A>::there_exist_unmarked_h_items_lighter_than_target(double threshold) const {
456
+ template<typename T, typename A>
457
+ bool var_opt_union<T, A>::there_exist_unmarked_h_items_lighter_than_target(double threshold) const {
468
458
  for (uint32_t i = 0; i < gadget_.h_; ++i) {
469
459
  if ((gadget_.weights_[i] < threshold) && !gadget_.marks_[i]) {
470
460
  return true;
@@ -473,8 +463,8 @@ bool var_opt_union<T,S,A>::there_exist_unmarked_h_items_lighter_than_target(doub
473
463
  return false;
474
464
  }
475
465
 
476
- template<typename T, typename S, typename A>
477
- bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T,S,A>& sk) const {
466
+ template<typename T, typename A>
467
+ bool var_opt_union<T, A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_sketch<T, A>& sk) const {
478
468
  // gadget is seemingly exact
479
469
  const bool condition1 = gadget_.r_ == 0;
480
470
 
@@ -510,8 +500,8 @@ bool var_opt_union<T,S,A>::detect_and_handle_subcase_of_pseudo_exact(var_opt_ske
510
500
  *
511
501
  * @param sk Copy of the gadget, modified with marked items moved to the reservoir
512
502
  */
513
- template<typename T, typename S, typename A>
514
- void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk) const {
503
+ template<typename T, typename A>
504
+ void var_opt_union<T, A>::mark_moving_gadget_coercer(var_opt_sketch<T, A>& sk) const {
515
505
  const uint32_t result_k = gadget_.h_ + gadget_.r_;
516
506
 
517
507
  uint32_t result_h = 0;
@@ -583,8 +573,8 @@ void var_opt_union<T,S,A>::mark_moving_gadget_coercer(var_opt_sketch<T,S,A>& sk)
583
573
  }
584
574
 
585
575
  // this is basically a continuation of get_result(), but modifying the input gadget copy
586
- template<typename T, typename S, typename A>
587
- void var_opt_union<T,S,A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T,S,A>& gcopy) const {
576
+ template<typename T, typename A>
577
+ void var_opt_union<T, A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T, A>& gcopy) const {
588
578
  const uint32_t r_count = gcopy.r_;
589
579
  const uint32_t h_count = gcopy.h_;
590
580
  const uint32_t k = gcopy.k_;
@@ -616,8 +606,8 @@ void var_opt_union<T,S,A>::migrate_marked_items_by_decreasing_k(var_opt_sketch<T
616
606
  gcopy.strip_marks();
617
607
  }
618
608
 
619
- template<typename T, typename S, typename A>
620
- void var_opt_union<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
609
+ template<typename T, typename A>
610
+ void var_opt_union<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
621
611
  bool is_empty(flags & EMPTY_FLAG_MASK);
622
612
 
623
613
  if (is_empty) {
@@ -635,8 +625,8 @@ void var_opt_union<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t
635
625
  }
636
626
  }
637
627
 
638
- template<typename T, typename S, typename A>
639
- void var_opt_union<T,S,A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
628
+ template<typename T, typename A>
629
+ void var_opt_union<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
640
630
  if (family_id == FAMILY_ID) {
641
631
  if (ser_ver != SER_VER) {
642
632
  throw std::invalid_argument("Possible corruption: VarOpt Union serialization version must be "
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(sampling_test)
19
19
 
20
- target_link_libraries(sampling_test sampling common_test)
20
+ target_link_libraries(sampling_test sampling common_test_lib)
21
21
 
22
22
  set_target_properties(sampling_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -28,8 +28,8 @@
28
28
 
29
29
  namespace datasketches {
30
30
 
31
- using var_opt_test_sketch = var_opt_sketch<test_type, test_type_serde, test_allocator<test_type>>;
32
- using var_opt_test_union = var_opt_union<test_type, test_type_serde, test_allocator<test_type>>;
31
+ using var_opt_test_sketch = var_opt_sketch<test_type, test_allocator<test_type>>;
32
+ using var_opt_test_union = var_opt_union<test_type, test_allocator<test_type>>;
33
33
  using alloc = test_allocator<test_type>;
34
34
 
35
35
  TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
@@ -38,19 +38,19 @@ TEST_CASE("varopt allocation test", "[var_opt_sketch]") {
38
38
  {
39
39
  var_opt_test_sketch sk1(10, var_opt_test_sketch::DEFAULT_RESIZE_FACTOR, 0);
40
40
  for (int i = 0; i < 100; ++i) sk1.update(i);
41
- auto bytes1 = sk1.serialize();
41
+ auto bytes1 = sk1.serialize(0, test_type_serde());
42
42
  auto sk2 = var_opt_test_sketch::deserialize(bytes1.data(), bytes1.size(), test_type_serde(), 0);
43
43
 
44
44
  std::stringstream ss;
45
- sk1.serialize(ss);
46
- auto sk3 = var_opt_test_sketch::deserialize(ss, alloc(0));
45
+ sk1.serialize(ss, test_type_serde());
46
+ auto sk3 = var_opt_test_sketch::deserialize(ss, test_type_serde(), alloc(0));
47
47
 
48
48
  var_opt_test_union u1(10, 0);
49
49
  u1.update(sk1);
50
50
  u1.update(sk2);
51
51
  u1.update(sk3);
52
52
 
53
- auto bytes2 = u1.serialize();
53
+ auto bytes2 = u1.serialize(0, test_type_serde());
54
54
  auto u2 = var_opt_test_union::deserialize(bytes2.data(), bytes2.size(), test_type_serde(), 0);
55
55
  }
56
56
  REQUIRE(test_allocator_total_bytes == 0);
@@ -47,8 +47,8 @@ static var_opt_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
47
47
  return sk;
48
48
  }
49
49
 
50
- template<typename T, typename S, typename A>
51
- static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk2) {
50
+ template<typename T, typename A>
51
+ static void check_if_equal(var_opt_sketch<T, A>& sk1, var_opt_sketch<T, A>& sk2) {
52
52
  REQUIRE(sk1.get_k() == sk2.get_k());
53
53
  REQUIRE(sk1.get_n() == sk2.get_n());
54
54
  REQUIRE(sk1.get_num_samples() == sk2.get_num_samples());
@@ -49,8 +49,8 @@ static var_opt_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
49
49
 
50
50
  // if exact_compare = false, checks for equivalence -- specific R region values may differ but
51
51
  // R region weights must match
52
- template<typename T, typename S, typename A>
53
- static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk2, bool exact_compare = true) {
52
+ template<typename T, typename A>
53
+ static void check_if_equal(var_opt_sketch<T, A>& sk1, var_opt_sketch<T, A>& sk2, bool exact_compare = true) {
54
54
  REQUIRE(sk1.get_k() == sk2.get_k());
55
55
  REQUIRE(sk1.get_n() == sk2.get_n());
56
56
  REQUIRE(sk1.get_num_samples() == sk2.get_num_samples());
@@ -78,8 +78,8 @@ static void check_if_equal(var_opt_sketch<T,S,A>& sk1, var_opt_sketch<T,S,A>& sk
78
78
  // ensure that the resulting binary images are compatible.
79
79
  // if exact_compare = false, checks for equivalence -- specific R region values may differ but
80
80
  // R region weights must match
81
- template<typename T, typename S, typename A>
82
- static void compare_serialization_deserialization(var_opt_union<T,S,A>& vo_union, bool exact_compare = true) {
81
+ template<typename T, typename A>
82
+ static void compare_serialization_deserialization(var_opt_union<T,A>& vo_union, bool exact_compare = true) {
83
83
  std::vector<uint8_t> bytes = vo_union.serialize();
84
84
 
85
85
  var_opt_union<T> u_from_bytes = var_opt_union<T>::deserialize(bytes.data(), bytes.size());
@@ -22,6 +22,8 @@ import os
22
22
  import sys
23
23
  import platform
24
24
  import subprocess
25
+ import re
26
+ from datetime import datetime, timezone
25
27
 
26
28
  from setuptools import setup, find_packages, Extension
27
29
  from setuptools.command.build_ext import build_ext
@@ -78,9 +80,19 @@ class CMakeBuild(build_ext):
78
80
  cwd=self.build_temp, env=env)
79
81
  print() # add an empty line to pretty print
80
82
 
83
+ # Read and parse the version format
84
+ # @DT@ -> datestamp
85
+ # @HHMM@ -> .devHHMM to indicate development version
86
+ # Releases should have a fixed version with no @ variables
87
+ with open('version.cfg.in', 'r') as file:
88
+ ds_version = file.read().rstrip()
89
+ dt = datetime.now(timezone.utc)
90
+ ds_version = re.sub('@DT@', dt.strftime('%Y%m%d'), ds_version)
91
+ ds_version = re.sub('@HHMM@', 'dev' + dt.strftime('%H%M'), ds_version)
92
+
81
93
  setup(
82
94
  name='datasketches',
83
- version='3.5.1',
95
+ version=ds_version,
84
96
  author='Apache Software Foundation',
85
97
  author_email='dev@datasketches.apache.org',
86
98
  description='The Apache DataSketches Library for Python',
@@ -88,7 +100,7 @@ setup(
88
100
  url='http://datasketches.apache.org',
89
101
  long_description=open('python/README.md').read(),
90
102
  long_description_content_type='text/markdown',
91
- packages=find_packages(where='python',exclude=['src','*tests*']), # src not needed if only the.so
103
+ packages=find_packages(where='python',exclude=['src','*tests*']), # src not needed if only the .so
92
104
  package_dir={'':'python'},
93
105
  # may need to add all source paths for sdist packages w/o MANIFEST.in
94
106
  ext_modules=[CMakeExtension('datasketches')],